| use regex_automata::{ |
| dfa::{self, dense, regex::Regex, sparse, Automaton}, |
| nfa::thompson, |
| MatchKind, SyntaxConfig, |
| }; |
| use regex_syntax as syntax; |
| |
| use regex_test::{ |
| bstr::{BString, ByteSlice}, |
| CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests, |
| SearchKind as TestSearchKind, TestResult, TestRunner, |
| }; |
| |
| use crate::{suite, Result}; |
| |
| /// Runs the test suite with the default configuration. |
| #[test] |
| fn unminimized_default() -> Result<()> { |
| let builder = Regex::builder(); |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), dense_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Runs the test suite with byte classes disabled. |
| #[test] |
| fn unminimized_no_byte_class() -> Result<()> { |
| let mut builder = Regex::builder(); |
| builder.dense(dense::Config::new().byte_classes(false)); |
| |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), dense_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Runs the test suite with NFA shrinking disabled. |
| #[test] |
| fn unminimized_no_nfa_shrink() -> Result<()> { |
| let mut builder = Regex::builder(); |
| builder.thompson(thompson::Config::new().shrink(false)); |
| |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), dense_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Runs the test suite on a minimized DFA with an otherwise default |
| /// configuration. |
| #[test] |
| fn minimized_default() -> Result<()> { |
| let mut builder = Regex::builder(); |
| builder.dense(dense::Config::new().minimize(true)); |
| TestRunner::new()? |
| // These regexes tend to be too big. Minimization takes... forever. |
| .blacklist("expensive") |
| .test_iter(suite()?.iter(), dense_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Runs the test suite on a minimized DFA with byte classes disabled. |
| #[test] |
| fn minimized_no_byte_class() -> Result<()> { |
| let mut builder = Regex::builder(); |
| builder.dense(dense::Config::new().minimize(true).byte_classes(false)); |
| |
| TestRunner::new()? |
| // These regexes tend to be too big. Minimization takes... forever. |
| .blacklist("expensive") |
| .test_iter(suite()?.iter(), dense_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Runs the test suite on a sparse unminimized DFA. |
| #[test] |
| fn sparse_unminimized_default() -> Result<()> { |
| let builder = Regex::builder(); |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), sparse_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// Another basic sanity test that checks we can serialize and then deserialize |
| /// a regex, and that the resulting regex can be used for searching correctly. |
| #[test] |
| fn serialization_unminimized_default() -> Result<()> { |
| let builder = Regex::builder(); |
| let my_compiler = |builder| { |
| compiler(builder, |builder, re| { |
| let builder = builder.clone(); |
| let (fwd_bytes, _) = re.forward().to_bytes_native_endian(); |
| let (rev_bytes, _) = re.reverse().to_bytes_native_endian(); |
| Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { |
| let fwd: dense::DFA<&[u32]> = |
| dense::DFA::from_bytes(&fwd_bytes).unwrap().0; |
| let rev: dense::DFA<&[u32]> = |
| dense::DFA::from_bytes(&rev_bytes).unwrap().0; |
| let re = builder.build_from_dfas(fwd, rev); |
| |
| run_test(&re, test) |
| })) |
| }) |
| }; |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), my_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| /// A basic sanity test that checks we can serialize and then deserialize a |
| /// regex using sparse DFAs, and that the resulting regex can be used for |
| /// searching correctly. |
| #[test] |
| fn sparse_serialization_unminimized_default() -> Result<()> { |
| let builder = Regex::builder(); |
| let my_compiler = |builder| { |
| compiler(builder, |builder, re| { |
| let builder = builder.clone(); |
| let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian(); |
| let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian(); |
| Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { |
| let fwd: sparse::DFA<&[u8]> = |
| sparse::DFA::from_bytes(&fwd_bytes).unwrap().0; |
| let rev: sparse::DFA<&[u8]> = |
| sparse::DFA::from_bytes(&rev_bytes).unwrap().0; |
| let re = builder.build_from_dfas(fwd, rev); |
| run_test(&re, test) |
| })) |
| }) |
| }; |
| TestRunner::new()? |
| .test_iter(suite()?.iter(), my_compiler(builder)) |
| .assert(); |
| Ok(()) |
| } |
| |
| fn dense_compiler( |
| builder: dfa::regex::Builder, |
| ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { |
| compiler(builder, |_, re| { |
| Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { |
| run_test(&re, test) |
| })) |
| }) |
| } |
| |
| fn sparse_compiler( |
| builder: dfa::regex::Builder, |
| ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { |
| compiler(builder, |builder, re| { |
| let fwd = re.forward().to_sparse()?; |
| let rev = re.reverse().to_sparse()?; |
| let re = builder.build_from_dfas(fwd, rev); |
| Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> { |
| run_test(&re, test) |
| })) |
| }) |
| } |
| |
| fn compiler( |
| mut builder: dfa::regex::Builder, |
| mut create_matcher: impl FnMut( |
| &dfa::regex::Builder, |
| Regex, |
| ) -> Result<CompiledRegex>, |
| ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> { |
| move |test, regexes| { |
| let regexes = regexes |
| .iter() |
| .map(|r| r.to_str().map(|s| s.to_string())) |
| .collect::<std::result::Result<Vec<String>, _>>()?; |
| |
| // Check if our regex contains things that aren't supported by DFAs. |
| // That is, Unicode word boundaries when searching non-ASCII text. |
| let mut thompson = thompson::Builder::new(); |
| thompson.configure(config_thompson(test)); |
| // TODO: Modify Hir to report facts like this, instead of needing to |
| // build an NFA to do it. |
| if let Ok(nfa) = thompson.build_many(®exes) { |
| let non_ascii = test.input().iter().any(|&b| !b.is_ascii()); |
| if nfa.has_word_boundary_unicode() && non_ascii { |
| return Ok(CompiledRegex::skip()); |
| } |
| } |
| if !configure_regex_builder(test, &mut builder) { |
| return Ok(CompiledRegex::skip()); |
| } |
| create_matcher(&builder, builder.build_many(®exes)?) |
| } |
| } |
| |
| fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> { |
| let is_match = if re.is_match(test.input()) { |
| TestResult::matched() |
| } else { |
| TestResult::no_match() |
| }; |
| let is_match = is_match.name("is_match"); |
| |
| let find_matches = match test.search_kind() { |
| TestSearchKind::Earliest => { |
| let it = re |
| .find_earliest_iter(test.input()) |
| .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| .map(|m| Match { |
| id: m.pattern().as_usize(), |
| start: m.start(), |
| end: m.end(), |
| }); |
| TestResult::matches(it).name("find_earliest_iter") |
| } |
| TestSearchKind::Leftmost => { |
| let it = re |
| .find_leftmost_iter(test.input()) |
| .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| .map(|m| Match { |
| id: m.pattern().as_usize(), |
| start: m.start(), |
| end: m.end(), |
| }); |
| TestResult::matches(it).name("find_leftmost_iter") |
| } |
| TestSearchKind::Overlapping => { |
| let it = re |
| .find_overlapping_iter(test.input()) |
| .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| .map(|m| Match { |
| id: m.pattern().as_usize(), |
| start: m.start(), |
| end: m.end(), |
| }); |
| TestResult::matches(it).name("find_overlapping_iter") |
| } |
| }; |
| |
| vec![is_match, find_matches] |
| } |
| |
| /// Configures the given regex builder with all relevant settings on the given |
| /// regex test. |
| /// |
| /// If the regex test has a setting that is unsupported, then this returns |
| /// false (implying the test should be skipped). |
| fn configure_regex_builder( |
| test: &RegexTest, |
| builder: &mut dfa::regex::Builder, |
| ) -> bool { |
| let match_kind = match test.match_kind() { |
| TestMatchKind::All => MatchKind::All, |
| TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst, |
| TestMatchKind::LeftmostLongest => return false, |
| }; |
| |
| let syntax_config = SyntaxConfig::new() |
| .case_insensitive(test.case_insensitive()) |
| .unicode(test.unicode()) |
| .utf8(test.utf8()); |
| let dense_config = dense::Config::new() |
| .anchored(test.anchored()) |
| .match_kind(match_kind) |
| .unicode_word_boundary(true); |
| let regex_config = Regex::config().utf8(test.utf8()); |
| |
| builder |
| .configure(regex_config) |
| .syntax(syntax_config) |
| .thompson(config_thompson(test)) |
| .dense(dense_config); |
| true |
| } |
| |
| /// Configuration of a Thompson NFA compiler from a regex test. |
| fn config_thompson(test: &RegexTest) -> thompson::Config { |
| thompson::Config::new().utf8(test.utf8()) |
| } |