| mat!(ascii_literal, r"a", "a", Some((0, 1))); |
| |
| // Some crazy expressions from regular-expressions.info. |
| mat!( |
| match_ranges, |
| r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", |
| "num: 255", |
| Some((5, 8)) |
| ); |
| mat!( |
| match_ranges_not, |
| r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", |
| "num: 256", |
| None |
| ); |
| mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); |
| mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); |
| mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); |
| mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); |
| mat!( |
| match_email, |
| r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", |
| "mine is [email protected] ", |
| Some((8, 26)) |
| ); |
| mat!( |
| match_email_not, |
| r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", |
| "mine is jam.slam@gmail ", |
| None |
| ); |
| mat!( |
| match_email_big, |
| r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", |
| "mine is [email protected] ", |
| Some((8, 26)) |
| ); |
| mat!( |
| match_date1, |
| r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", |
| "1900-01-01", |
| Some((0, 10)) |
| ); |
| mat!( |
| match_date2, |
| r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", |
| "1900-00-01", |
| None |
| ); |
| mat!( |
| match_date3, |
| r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", |
| "1900-13-01", |
| None |
| ); |
| |
| // Do some crazy dancing with the start/end assertions. |
| matiter!(match_start_end_empty, r"^$", "", (0, 0)); |
| matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); |
| matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); |
| matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); |
| matiter!( |
| match_start_end_empty_rep, |
| r"(?:^$)*", |
| "a\nb\nc", |
| (0, 0), |
| (1, 1), |
| (2, 2), |
| (3, 3), |
| (4, 4), |
| (5, 5) |
| ); |
| matiter!( |
| match_start_end_empty_rep_rev, |
| r"(?:$^)*", |
| "a\nb\nc", |
| (0, 0), |
| (1, 1), |
| (2, 2), |
| (3, 3), |
| (4, 4), |
| (5, 5) |
| ); |
| |
| // Test negated character classes. |
| mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); |
| mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); |
| mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); |
| mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); |
| mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); |
| mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); |
| mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); |
| mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); |
| |
| // Test that repeated empty expressions don't loop forever. |
| mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); |
| mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); |
| mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); |
| mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); |
| mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); |
| mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); |
| mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); |
| mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); |
| mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); |
| mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); |
| mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); |
| mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); |
| |
| // Test that we handle various flavors of empty expressions. |
| matiter!(match_empty1, r"", "", (0, 0)); |
| matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); |
| matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); |
| matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); |
| matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); |
| matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); |
| |
| // Test that the DFA can handle pathological cases. |
| // (This should result in the DFA's cache being flushed too frequently, which |
| // should cause it to quit and fall back to the NFA algorithm.) |
| #[test] |
| fn dfa_handles_pathological_case() { |
| fn ones_and_zeroes(count: usize) -> String { |
| use rand::rngs::SmallRng; |
| use rand::{Rng, SeedableRng}; |
| |
| let mut rng = SmallRng::from_entropy(); |
| let mut s = String::new(); |
| for _ in 0..count { |
| if rng.gen() { |
| s.push('1'); |
| } else { |
| s.push('0'); |
| } |
| } |
| s |
| } |
| |
| let re = regex!(r"[01]*1[01]{20}$"); |
| let text = { |
| let mut pieces = ones_and_zeroes(100_000); |
| pieces.push('1'); |
| pieces.push_str(&ones_and_zeroes(20)); |
| pieces |
| }; |
| assert!(re.is_match(text!(&*text))); |
| } |
| |
| #[test] |
| fn nest_limit_makes_it_parse() { |
| use regex::RegexBuilder; |
| |
| RegexBuilder::new( |
| r#"(?-u) |
| 2(?: |
| [45]\d{3}| |
| 7(?: |
| 1[0-267]| |
| 2[0-289]| |
| 3[0-29]| |
| 4[01]| |
| 5[1-3]| |
| 6[013]| |
| 7[0178]| |
| 91 |
| )| |
| 8(?: |
| 0[125]| |
| [139][1-6]| |
| 2[0157-9]| |
| 41| |
| 6[1-35]| |
| 7[1-5]| |
| 8[1-8]| |
| 90 |
| )| |
| 9(?: |
| 0[0-2]| |
| 1[0-4]| |
| 2[568]| |
| 3[3-6]| |
| 5[5-7]| |
| 6[0167]| |
| 7[15]| |
| 8[0146-9] |
| ) |
| )\d{4}| |
| 3(?: |
| 12?[5-7]\d{2}| |
| 0(?: |
| 2(?: |
| [025-79]\d| |
| [348]\d{1,2} |
| )| |
| 3(?: |
| [2-4]\d| |
| [56]\d? |
| ) |
| )| |
| 2(?: |
| 1\d{2}| |
| 2(?: |
| [12]\d| |
| [35]\d{1,2}| |
| 4\d? |
| ) |
| )| |
| 3(?: |
| 1\d{2}| |
| 2(?: |
| [2356]\d| |
| 4\d{1,2} |
| ) |
| )| |
| 4(?: |
| 1\d{2}| |
| 2(?: |
| 2\d{1,2}| |
| [47]| |
| 5\d{2} |
| ) |
| )| |
| 5(?: |
| 1\d{2}| |
| 29 |
| )| |
| [67]1\d{2}| |
| 8(?: |
| 1\d{2}| |
| 2(?: |
| 2\d{2}| |
| 3| |
| 4\d |
| ) |
| ) |
| )\d{3}| |
| 4(?: |
| 0(?: |
| 2(?: |
| [09]\d| |
| 7 |
| )| |
| 33\d{2} |
| )| |
| 1\d{3}| |
| 2(?: |
| 1\d{2}| |
| 2(?: |
| [25]\d?| |
| [348]\d| |
| [67]\d{1,2} |
| ) |
| )| |
| 3(?: |
| 1\d{2}(?: |
| \d{2} |
| )?| |
| 2(?: |
| [045]\d| |
| [236-9]\d{1,2} |
| )| |
| 32\d{2} |
| )| |
| 4(?: |
| [18]\d{2}| |
| 2(?: |
| [2-46]\d{2}| |
| 3 |
| )| |
| 5[25]\d{2} |
| )| |
| 5(?: |
| 1\d{2}| |
| 2(?: |
| 3\d| |
| 5 |
| ) |
| )| |
| 6(?: |
| [18]\d{2}| |
| 2(?: |
| 3(?: |
| \d{2} |
| )?| |
| [46]\d{1,2}| |
| 5\d{2}| |
| 7\d |
| )| |
| 5(?: |
| 3\d?| |
| 4\d| |
| [57]\d{1,2}| |
| 6\d{2}| |
| 8 |
| ) |
| )| |
| 71\d{2}| |
| 8(?: |
| [18]\d{2}| |
| 23\d{2}| |
| 54\d{2} |
| )| |
| 9(?: |
| [18]\d{2}| |
| 2[2-5]\d{2}| |
| 53\d{1,2} |
| ) |
| )\d{3}| |
| 5(?: |
| 02[03489]\d{2}| |
| 1\d{2}| |
| 2(?: |
| 1\d{2}| |
| 2(?: |
| 2(?: |
| \d{2} |
| )?| |
| [457]\d{2} |
| ) |
| )| |
| 3(?: |
| 1\d{2}| |
| 2(?: |
| [37](?: |
| \d{2} |
| )?| |
| [569]\d{2} |
| ) |
| )| |
| 4(?: |
| 1\d{2}| |
| 2[46]\d{2} |
| )| |
| 5(?: |
| 1\d{2}| |
| 26\d{1,2} |
| )| |
| 6(?: |
| [18]\d{2}| |
| 2| |
| 53\d{2} |
| )| |
| 7(?: |
| 1| |
| 24 |
| )\d{2}| |
| 8(?: |
| 1| |
| 26 |
| )\d{2}| |
| 91\d{2} |
| )\d{3}| |
| 6(?: |
| 0(?: |
| 1\d{2}| |
| 2(?: |
| 3\d{2}| |
| 4\d{1,2} |
| ) |
| )| |
| 2(?: |
| 2[2-5]\d{2}| |
| 5(?: |
| [3-5]\d{2}| |
| 7 |
| )| |
| 8\d{2} |
| )| |
| 3(?: |
| 1| |
| 2[3478] |
| )\d{2}| |
| 4(?: |
| 1| |
| 2[34] |
| )\d{2}| |
| 5(?: |
| 1| |
| 2[47] |
| )\d{2}| |
| 6(?: |
| [18]\d{2}| |
| 6(?: |
| 2(?: |
| 2\d| |
| [34]\d{2} |
| )| |
| 5(?: |
| [24]\d{2}| |
| 3\d| |
| 5\d{1,2} |
| ) |
| ) |
| )| |
| 72[2-5]\d{2}| |
| 8(?: |
| 1\d{2}| |
| 2[2-5]\d{2} |
| )| |
| 9(?: |
| 1\d{2}| |
| 2[2-6]\d{2} |
| ) |
| )\d{3}| |
| 7(?: |
| (?: |
| 02| |
| [3-589]1| |
| 6[12]| |
| 72[24] |
| )\d{2}| |
| 21\d{3}| |
| 32 |
| )\d{3}| |
| 8(?: |
| (?: |
| 4[12]| |
| [5-7]2| |
| 1\d? |
| )| |
| (?: |
| 0| |
| 3[12]| |
| [5-7]1| |
| 217 |
| )\d |
| )\d{4}| |
| 9(?: |
| [35]1| |
| (?: |
| [024]2| |
| 81 |
| )\d| |
| (?: |
| 1| |
| [24]1 |
| )\d{2} |
| )\d{3} |
| "#, |
| ) |
| .build() |
| .unwrap(); |
| } |