| use regex::internal::ExecBuilder; |
| |
| /// Given a regex, check if all of the backends produce the same |
| /// results on a number of different inputs. |
| /// |
| /// For now this just throws quickcheck at the problem, which |
| /// is not very good because it only really tests half of the |
| /// problem space. It is pretty unlikely that a random string |
| /// will match any given regex, so this will probably just |
| /// be checking that the different backends fail in the same |
| /// way. This is still worthwhile to test, but is definitely not |
| /// the whole story. |
| /// |
| /// TODO(ethan): In order to cover the other half of the problem |
| /// space, we should generate a random matching string by inspecting |
| /// the AST of the input regex. The right way to do this probably |
| /// involves adding a custom Arbitrary instance around a couple |
| /// of newtypes. That way we can respect the quickcheck size hinting |
| /// and shrinking and whatnot. |
| pub fn backends_are_consistent(re: &str) -> Result<u64, String> { |
| let standard_backends = vec![ |
| ( |
| "bounded_backtracking_re", |
| ExecBuilder::new(re) |
| .bounded_backtracking() |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "pikevm_re", |
| ExecBuilder::new(re) |
| .nfa() |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "default_re", |
| ExecBuilder::new(re) |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ]; |
| |
| let utf8bytes_backends = vec![ |
| ( |
| "bounded_backtracking_utf8bytes_re", |
| ExecBuilder::new(re) |
| .bounded_backtracking() |
| .bytes(true) |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "pikevm_utf8bytes_re", |
| ExecBuilder::new(re) |
| .nfa() |
| .bytes(true) |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "default_utf8bytes_re", |
| ExecBuilder::new(re) |
| .bytes(true) |
| .build() |
| .map(|exec| exec.into_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ]; |
| |
| let bytes_backends = vec![ |
| ( |
| "bounded_backtracking_bytes_re", |
| ExecBuilder::new(re) |
| .bounded_backtracking() |
| .only_utf8(false) |
| .build() |
| .map(|exec| exec.into_byte_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "pikevm_bytes_re", |
| ExecBuilder::new(re) |
| .nfa() |
| .only_utf8(false) |
| .build() |
| .map(|exec| exec.into_byte_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ( |
| "default_bytes_re", |
| ExecBuilder::new(re) |
| .only_utf8(false) |
| .build() |
| .map(|exec| exec.into_byte_regex()) |
| .map_err(|err| format!("{}", err))?, |
| ), |
| ]; |
| |
| Ok(string_checker::check_backends(&standard_backends)? |
| + string_checker::check_backends(&utf8bytes_backends)? |
| + bytes_checker::check_backends(&bytes_backends)?) |
| } |
| |
| // |
| // A consistency checker parameterized by the input type (&str or &[u8]). |
| // |
| |
| macro_rules! checker { |
| ($module_name:ident, $regex_type:path, $mk_input:expr) => { |
| mod $module_name { |
| use quickcheck; |
| use quickcheck::{Arbitrary, TestResult}; |
| |
| pub fn check_backends( |
| backends: &[(&str, $regex_type)], |
| ) -> Result<u64, String> { |
| let mut total_passed = 0; |
| for regex in backends[1..].iter() { |
| total_passed += quickcheck_regex_eq(&backends[0], regex)?; |
| } |
| |
| Ok(total_passed) |
| } |
| |
| fn quickcheck_regex_eq( |
| &(name1, ref re1): &(&str, $regex_type), |
| &(name2, ref re2): &(&str, $regex_type), |
| ) -> Result<u64, String> { |
| quickcheck::QuickCheck::new() |
| .quicktest(RegexEqualityTest::new( |
| re1.clone(), |
| re2.clone(), |
| )) |
| .map_err(|err| { |
| format!( |
| "{}(/{}/) and {}(/{}/) are inconsistent.\ |
| QuickCheck Err: {:?}", |
| name1, re1, name2, re2, err |
| ) |
| }) |
| } |
| |
| struct RegexEqualityTest { |
| re1: $regex_type, |
| re2: $regex_type, |
| } |
| impl RegexEqualityTest { |
| fn new(re1: $regex_type, re2: $regex_type) -> Self { |
| RegexEqualityTest { re1: re1, re2: re2 } |
| } |
| } |
| |
| impl quickcheck::Testable for RegexEqualityTest { |
| fn result(&self, gen: &mut quickcheck::Gen) -> TestResult { |
| let input = $mk_input(gen); |
| let input = &input; |
| |
| if self.re1.find(&input) != self.re2.find(input) { |
| return TestResult::error(format!( |
| "find mismatch input={:?}", |
| input |
| )); |
| } |
| |
| let cap1 = self.re1.captures(input); |
| let cap2 = self.re2.captures(input); |
| match (cap1, cap2) { |
| (None, None) => {} |
| (Some(cap1), Some(cap2)) => { |
| for (c1, c2) in cap1.iter().zip(cap2.iter()) { |
| if c1 != c2 { |
| return TestResult::error(format!( |
| "captures mismatch input={:?}", |
| input |
| )); |
| } |
| } |
| } |
| _ => { |
| return TestResult::error(format!( |
| "captures mismatch input={:?}", |
| input |
| )) |
| } |
| } |
| |
| let fi1 = self.re1.find_iter(input); |
| let fi2 = self.re2.find_iter(input); |
| for (m1, m2) in fi1.zip(fi2) { |
| if m1 != m2 { |
| return TestResult::error(format!( |
| "find_iter mismatch input={:?}", |
| input |
| )); |
| } |
| } |
| |
| let ci1 = self.re1.captures_iter(input); |
| let ci2 = self.re2.captures_iter(input); |
| for (cap1, cap2) in ci1.zip(ci2) { |
| for (c1, c2) in cap1.iter().zip(cap2.iter()) { |
| if c1 != c2 { |
| return TestResult::error(format!( |
| "captures_iter mismatch input={:?}", |
| input |
| )); |
| } |
| } |
| } |
| |
| let s1 = self.re1.split(input); |
| let s2 = self.re2.split(input); |
| for (chunk1, chunk2) in s1.zip(s2) { |
| if chunk1 != chunk2 { |
| return TestResult::error(format!( |
| "split mismatch input={:?}", |
| input |
| )); |
| } |
| } |
| |
| TestResult::from_bool(true) |
| } |
| } |
| } // mod |
| }; // rule case |
| } // macro_rules! |
| |
| checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); |
| checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary( |
| gen |
| )); |