vendor/regex-automata/tests/dfa/suite.rs - toolchain/rustc - Git at Google

 use regex_automata::{
     dfa::{self, dense, regex::Regex, sparse, Automaton},
     nfa::thompson,
     MatchKind, SyntaxConfig,
 };
 use regex_syntax as syntax;

 use regex_test::{
     bstr::{BString, ByteSlice},
     CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
     SearchKind as TestSearchKind, TestResult, TestRunner,
 };

 use crate::{suite, Result};

 /// Runs the test suite with the default configuration.
 #[test]
 fn unminimized_default() -> Result<()> {
     let builder = Regex::builder();
     TestRunner::new()?
         .test_iter(suite()?.iter(), dense_compiler(builder))
         .assert();
     Ok(())
 }

 /// Runs the test suite with byte classes disabled.
 #[test]
 fn unminimized_no_byte_class() -> Result<()> {
     let mut builder = Regex::builder();
     builder.dense(dense::Config::new().byte_classes(false));

     TestRunner::new()?
         .test_iter(suite()?.iter(), dense_compiler(builder))
         .assert();
     Ok(())
 }

 /// Runs the test suite with NFA shrinking disabled.
 #[test]
 fn unminimized_no_nfa_shrink() -> Result<()> {
     let mut builder = Regex::builder();
     builder.thompson(thompson::Config::new().shrink(false));

     TestRunner::new()?
         .test_iter(suite()?.iter(), dense_compiler(builder))
         .assert();
     Ok(())
 }

 /// Runs the test suite on a minimized DFA with an otherwise default
 /// configuration.
 #[test]
 fn minimized_default() -> Result<()> {
     let mut builder = Regex::builder();
     builder.dense(dense::Config::new().minimize(true));
     TestRunner::new()?
         // These regexes tend to be too big. Minimization takes... forever.
         .blacklist("expensive")
         .test_iter(suite()?.iter(), dense_compiler(builder))
         .assert();
     Ok(())
 }

 /// Runs the test suite on a minimized DFA with byte classes disabled.
 #[test]
 fn minimized_no_byte_class() -> Result<()> {
     let mut builder = Regex::builder();
     builder.dense(dense::Config::new().minimize(true).byte_classes(false));

     TestRunner::new()?
         // These regexes tend to be too big. Minimization takes... forever.
         .blacklist("expensive")
         .test_iter(suite()?.iter(), dense_compiler(builder))
         .assert();
     Ok(())
 }

 /// Runs the test suite on a sparse unminimized DFA.
 #[test]
 fn sparse_unminimized_default() -> Result<()> {
     let builder = Regex::builder();
     TestRunner::new()?
         .test_iter(suite()?.iter(), sparse_compiler(builder))
         .assert();
     Ok(())
 }

 /// Another basic sanity test that checks we can serialize and then deserialize
 /// a regex, and that the resulting regex can be used for searching correctly.
 #[test]
 fn serialization_unminimized_default() -> Result<()> {
     let builder = Regex::builder();
     let my_compiler = |builder| {
         compiler(builder, |builder, re| {
             let builder = builder.clone();
             let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
             let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
             Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
                 let fwd: dense::DFA<&[u32]> =
                     dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
                 let rev: dense::DFA<&[u32]> =
                     dense::DFA::from_bytes(&rev_bytes).unwrap().0;
                 let re = builder.build_from_dfas(fwd, rev);

                 run_test(&re, test)
             }))
         })
     };
     TestRunner::new()?
         .test_iter(suite()?.iter(), my_compiler(builder))
         .assert();
     Ok(())
 }

 /// A basic sanity test that checks we can serialize and then deserialize a
 /// regex using sparse DFAs, and that the resulting regex can be used for
 /// searching correctly.
 #[test]
 fn sparse_serialization_unminimized_default() -> Result<()> {
     let builder = Regex::builder();
     let my_compiler = |builder| {
         compiler(builder, |builder, re| {
             let builder = builder.clone();
             let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
             let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
             Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
                 let fwd: sparse::DFA<&[u8]> =
                     sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
                 let rev: sparse::DFA<&[u8]> =
                     sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
                 let re = builder.build_from_dfas(fwd, rev);
                 run_test(&re, test)
             }))
         })
     };
     TestRunner::new()?
         .test_iter(suite()?.iter(), my_compiler(builder))
         .assert();
     Ok(())
 }

 fn dense_compiler(
     builder: dfa::regex::Builder,
 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
     compiler(builder, |_, re| {
         Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
             run_test(&re, test)
         }))
     })
 }

 fn sparse_compiler(
     builder: dfa::regex::Builder,
 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
     compiler(builder, |builder, re| {
         let fwd = re.forward().to_sparse()?;
         let rev = re.reverse().to_sparse()?;
         let re = builder.build_from_dfas(fwd, rev);
         Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
             run_test(&re, test)
         }))
     })
 }

 fn compiler(
     mut builder: dfa::regex::Builder,
     mut create_matcher: impl FnMut(
         &dfa::regex::Builder,
         Regex,
     ) -> Result<CompiledRegex>,
 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
     move |test, regexes| {
         let regexes = regexes
             .iter()
             .map(|r| r.to_str().map(|s| s.to_string()))
             .collect::<std::result::Result<Vec<String>, _>>()?;

         // Check if our regex contains things that aren't supported by DFAs.
         // That is, Unicode word boundaries when searching non-ASCII text.
         let mut thompson = thompson::Builder::new();
         thompson.configure(config_thompson(test));
         // TODO: Modify Hir to report facts like this, instead of needing to
         // build an NFA to do it.
         if let Ok(nfa) = thompson.build_many(&regexes) {
             let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
             if nfa.has_word_boundary_unicode() && non_ascii {
                 return Ok(CompiledRegex::skip());
             }
         }
         if !configure_regex_builder(test, &mut builder) {
             return Ok(CompiledRegex::skip());
         }
         create_matcher(&builder, builder.build_many(&regexes)?)
     }
 }

 fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
     let is_match = if re.is_match(test.input()) {
         TestResult::matched()
     } else {
         TestResult::no_match()
     };
     let is_match = is_match.name("is_match");

     let find_matches = match test.search_kind() {
         TestSearchKind::Earliest => {
             let it = re
                 .find_earliest_iter(test.input())
                 .take(test.match_limit().unwrap_or(std::usize::MAX))
                 .map(|m| Match {
                     id: m.pattern().as_usize(),
                     start: m.start(),
                     end: m.end(),
                 });
             TestResult::matches(it).name("find_earliest_iter")
         }
         TestSearchKind::Leftmost => {
             let it = re
                 .find_leftmost_iter(test.input())
                 .take(test.match_limit().unwrap_or(std::usize::MAX))
                 .map(|m| Match {
                     id: m.pattern().as_usize(),
                     start: m.start(),
                     end: m.end(),
                 });
             TestResult::matches(it).name("find_leftmost_iter")
         }
         TestSearchKind::Overlapping => {
             let it = re
                 .find_overlapping_iter(test.input())
                 .take(test.match_limit().unwrap_or(std::usize::MAX))
                 .map(|m| Match {
                     id: m.pattern().as_usize(),
                     start: m.start(),
                     end: m.end(),
                 });
             TestResult::matches(it).name("find_overlapping_iter")
         }
     };

     vec![is_match, find_matches]
 }

 /// Configures the given regex builder with all relevant settings on the given
 /// regex test.
 ///
 /// If the regex test has a setting that is unsupported, then this returns
 /// false (implying the test should be skipped).
 fn configure_regex_builder(
     test: &RegexTest,
     builder: &mut dfa::regex::Builder,
 ) -> bool {
     let match_kind = match test.match_kind() {
         TestMatchKind::All => MatchKind::All,
         TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
         TestMatchKind::LeftmostLongest => return false,
     };

     let syntax_config = SyntaxConfig::new()
         .case_insensitive(test.case_insensitive())
         .unicode(test.unicode())
         .utf8(test.utf8());
     let dense_config = dense::Config::new()
         .anchored(test.anchored())
         .match_kind(match_kind)
         .unicode_word_boundary(true);
     let regex_config = Regex::config().utf8(test.utf8());

     builder
         .configure(regex_config)
         .syntax(syntax_config)
         .thompson(config_thompson(test))
         .dense(dense_config);
     true
 }

 /// Configuration of a Thompson NFA compiler from a regex test.
 fn config_thompson(test: &RegexTest) -> thompson::Config {
     thompson::Config::new().utf8(test.utf8())
 }
	use regex_automata::{
	dfa::{self, dense, regex::Regex, sparse, Automaton},
	nfa::thompson,
	MatchKind, SyntaxConfig,
	};
	use regex_syntax as syntax;

	use regex_test::{
	bstr::{BString, ByteSlice},
	CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
	SearchKind as TestSearchKind, TestResult, TestRunner,
	};

	use crate::{suite, Result};

	/// Runs the test suite with the default configuration.
	#[test]
	fn unminimized_default() -> Result<()> {
	let builder = Regex::builder();
	TestRunner::new()?
	.test_iter(suite()?.iter(), dense_compiler(builder))
	.assert();
	Ok(())
	}

	/// Runs the test suite with byte classes disabled.
	#[test]
	fn unminimized_no_byte_class() -> Result<()> {
	let mut builder = Regex::builder();
	builder.dense(dense::Config::new().byte_classes(false));

	TestRunner::new()?
	.test_iter(suite()?.iter(), dense_compiler(builder))
	.assert();
	Ok(())
	}

	/// Runs the test suite with NFA shrinking disabled.
	#[test]
	fn unminimized_no_nfa_shrink() -> Result<()> {
	let mut builder = Regex::builder();
	builder.thompson(thompson::Config::new().shrink(false));

	TestRunner::new()?
	.test_iter(suite()?.iter(), dense_compiler(builder))
	.assert();
	Ok(())
	}

	/// Runs the test suite on a minimized DFA with an otherwise default
	/// configuration.
	#[test]
	fn minimized_default() -> Result<()> {
	let mut builder = Regex::builder();
	builder.dense(dense::Config::new().minimize(true));
	TestRunner::new()?
	// These regexes tend to be too big. Minimization takes... forever.
	.blacklist("expensive")
	.test_iter(suite()?.iter(), dense_compiler(builder))
	.assert();
	Ok(())
	}

	/// Runs the test suite on a minimized DFA with byte classes disabled.
	#[test]
	fn minimized_no_byte_class() -> Result<()> {
	let mut builder = Regex::builder();
	builder.dense(dense::Config::new().minimize(true).byte_classes(false));

	TestRunner::new()?
	// These regexes tend to be too big. Minimization takes... forever.
	.blacklist("expensive")
	.test_iter(suite()?.iter(), dense_compiler(builder))
	.assert();
	Ok(())
	}

	/// Runs the test suite on a sparse unminimized DFA.
	#[test]
	fn sparse_unminimized_default() -> Result<()> {
	let builder = Regex::builder();
	TestRunner::new()?
	.test_iter(suite()?.iter(), sparse_compiler(builder))
	.assert();
	Ok(())
	}

	/// Another basic sanity test that checks we can serialize and then deserialize
	/// a regex, and that the resulting regex can be used for searching correctly.
	#[test]
	fn serialization_unminimized_default() -> Result<()> {
	let builder = Regex::builder();
	let my_compiler = \|builder\| {
	compiler(builder, \|builder, re\| {
	let builder = builder.clone();
	let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
	let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
	Ok(CompiledRegex::compiled(move \|test\| -> Vec<TestResult> {
	let fwd: dense::DFA<&[u32]> =
	dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
	let rev: dense::DFA<&[u32]> =
	dense::DFA::from_bytes(&rev_bytes).unwrap().0;
	let re = builder.build_from_dfas(fwd, rev);

	run_test(&re, test)
	}))
	})
	};
	TestRunner::new()?
	.test_iter(suite()?.iter(), my_compiler(builder))
	.assert();
	Ok(())
	}

	/// A basic sanity test that checks we can serialize and then deserialize a
	/// regex using sparse DFAs, and that the resulting regex can be used for
	/// searching correctly.
	#[test]
	fn sparse_serialization_unminimized_default() -> Result<()> {
	let builder = Regex::builder();
	let my_compiler = \|builder\| {
	compiler(builder, \|builder, re\| {
	let builder = builder.clone();
	let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
	let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
	Ok(CompiledRegex::compiled(move \|test\| -> Vec<TestResult> {
	let fwd: sparse::DFA<&[u8]> =
	sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
	let rev: sparse::DFA<&[u8]> =
	sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
	let re = builder.build_from_dfas(fwd, rev);
	run_test(&re, test)
	}))
	})
	};
	TestRunner::new()?
	.test_iter(suite()?.iter(), my_compiler(builder))
	.assert();
	Ok(())
	}

	fn dense_compiler(
	builder: dfa::regex::Builder,
	) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
	compiler(builder, \|_, re\| {
	Ok(CompiledRegex::compiled(move \|test\| -> Vec<TestResult> {
	run_test(&re, test)
	}))
	})
	}

	fn sparse_compiler(
	builder: dfa::regex::Builder,
	) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
	compiler(builder, \|builder, re\| {
	let fwd = re.forward().to_sparse()?;
	let rev = re.reverse().to_sparse()?;
	let re = builder.build_from_dfas(fwd, rev);
	Ok(CompiledRegex::compiled(move \|test\| -> Vec<TestResult> {
	run_test(&re, test)
	}))
	})
	}

	fn compiler(
	mut builder: dfa::regex::Builder,
	mut create_matcher: impl FnMut(
	&dfa::regex::Builder,
	Regex,
	) -> Result<CompiledRegex>,
	) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
	move \|test, regexes\| {
	let regexes = regexes
	.iter()
	.map(\|r\| r.to_str().map(\|s\| s.to_string()))
	.collect::<std::result::Result<Vec<String>, _>>()?;

	// Check if our regex contains things that aren't supported by DFAs.
	// That is, Unicode word boundaries when searching non-ASCII text.
	let mut thompson = thompson::Builder::new();
	thompson.configure(config_thompson(test));
	// TODO: Modify Hir to report facts like this, instead of needing to
	// build an NFA to do it.
	if let Ok(nfa) = thompson.build_many(&regexes) {
	let non_ascii = test.input().iter().any(\|&b\| !b.is_ascii());
	if nfa.has_word_boundary_unicode() && non_ascii {
	return Ok(CompiledRegex::skip());
	}
	}
	if !configure_regex_builder(test, &mut builder) {
	return Ok(CompiledRegex::skip());
	}
	create_matcher(&builder, builder.build_many(&regexes)?)
	}
	}

	fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
	let is_match = if re.is_match(test.input()) {
	TestResult::matched()
	} else {
	TestResult::no_match()
	};
	let is_match = is_match.name("is_match");

	let find_matches = match test.search_kind() {
	TestSearchKind::Earliest => {
	let it = re
	.find_earliest_iter(test.input())
	.take(test.match_limit().unwrap_or(std::usize::MAX))
	.map(\|m\| Match {
	id: m.pattern().as_usize(),
	start: m.start(),
	end: m.end(),
	});
	TestResult::matches(it).name("find_earliest_iter")
	}
	TestSearchKind::Leftmost => {
	let it = re
	.find_leftmost_iter(test.input())
	.take(test.match_limit().unwrap_or(std::usize::MAX))
	.map(\|m\| Match {
	id: m.pattern().as_usize(),
	start: m.start(),
	end: m.end(),
	});
	TestResult::matches(it).name("find_leftmost_iter")
	}
	TestSearchKind::Overlapping => {
	let it = re
	.find_overlapping_iter(test.input())
	.take(test.match_limit().unwrap_or(std::usize::MAX))
	.map(\|m\| Match {
	id: m.pattern().as_usize(),
	start: m.start(),
	end: m.end(),
	});
	TestResult::matches(it).name("find_overlapping_iter")
	}
	};

	vec![is_match, find_matches]
	}

	/// Configures the given regex builder with all relevant settings on the given
	/// regex test.
	///
	/// If the regex test has a setting that is unsupported, then this returns
	/// false (implying the test should be skipped).
	fn configure_regex_builder(
	test: &RegexTest,
	builder: &mut dfa::regex::Builder,
	) -> bool {
	let match_kind = match test.match_kind() {
	TestMatchKind::All => MatchKind::All,
	TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
	TestMatchKind::LeftmostLongest => return false,
	};

	let syntax_config = SyntaxConfig::new()
	.case_insensitive(test.case_insensitive())
	.unicode(test.unicode())
	.utf8(test.utf8());
	let dense_config = dense::Config::new()
	.anchored(test.anchored())
	.match_kind(match_kind)
	.unicode_word_boundary(true);
	let regex_config = Regex::config().utf8(test.utf8());

	builder
	.configure(regex_config)
	.syntax(syntax_config)
	.thompson(config_thompson(test))
	.dense(dense_config);
	true
	}

	/// Configuration of a Thompson NFA compiler from a regex test.
	fn config_thompson(test: &RegexTest) -> thompson::Config {
	thompson::Config::new().utf8(test.utf8())
	}