Blame - tests/collection.rs - platform/external/rust/crates/regex-automata

blob: 68b03229e50b693324b113e1d29393547c3f1b02 [file] [log] [blame]

Jakub Kotur	3bceaeb	2020-12-21 17:28:16 +0100	[diff] [blame]	1	use std::collections::BTreeMap;
				2	use std::env;
				3	use std::fmt::{self, Write};
				4	use std::thread;
				5
				6	use regex;
				7	use regex_automata::{DenseDFA, ErrorKind, Regex, RegexBuilder, StateID, DFA};
				8	use serde_bytes;
				9	use toml;
				10
				11	macro_rules! load {
				12	($col:ident, $path:expr) => {
				13	$col.extend(RegexTests::load(
				14	concat!("../data/tests/", $path),
				15	include_bytes!(concat!("../data/tests/", $path)),
				16	));
				17	};
				18	}
				19
				20	lazy_static! {
				21	pub static ref SUITE: RegexTestCollection = {
				22	let mut col = RegexTestCollection::new();
				23	load!(col, "fowler/basic.toml");
				24	load!(col, "fowler/nullsubexpr.toml");
				25	load!(col, "fowler/repetition.toml");
				26	load!(col, "fowler/repetition-long.toml");
				27	load!(col, "crazy.toml");
				28	load!(col, "flags.toml");
				29	load!(col, "iter.toml");
				30	load!(col, "no-unicode.toml");
				31	load!(col, "unicode.toml");
				32	col
				33	};
				34	}
				35
				36	#[derive(Clone, Debug)]
				37	pub struct RegexTestCollection {
				38	pub by_name: BTreeMap<String, RegexTest>,
				39	}
				40
				41	#[derive(Clone, Debug, Deserialize)]
				42	pub struct RegexTests {
				43	pub tests: Vec<RegexTest>,
				44	}
				45
				46	#[derive(Clone, Debug, Deserialize)]
				47	pub struct RegexTest {
				48	pub name: String,
				49	#[serde(default)]
				50	pub options: Vec<RegexTestOption>,
				51	pub pattern: String,
				52	#[serde(with = "serde_bytes")]
				53	pub input: Vec<u8>,
				54	#[serde(rename = "matches")]
				55	pub matches: Vec<Match>,
				56	#[serde(default)]
				57	pub captures: Vec<Option<Match>>,
				58	#[serde(default)]
				59	pub fowler_line_number: Option<u64>,
				60	}
				61
				62	#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)]
				63	#[serde(rename_all = "kebab-case")]
				64	pub enum RegexTestOption {
				65	Anchored,
				66	CaseInsensitive,
				67	NoUnicode,
				68	Escaped,
				69	#[serde(rename = "invalid-utf8")]
				70	InvalidUTF8,
				71	}
				72
				73	#[derive(Clone, Copy, Deserialize, Eq, PartialEq)]
				74	pub struct Match {
				75	pub start: usize,
				76	pub end: usize,
				77	}
				78
				79	impl RegexTestCollection {
				80	fn new() -> RegexTestCollection {
				81	RegexTestCollection { by_name: BTreeMap::new() }
				82	}
				83
				84	fn extend(&mut self, tests: RegexTests) {
				85	for test in tests.tests {
				86	let name = test.name.clone();
				87	if self.by_name.contains_key(&name) {
				88	panic!("found duplicate test {}", name);
				89	}
				90	self.by_name.insert(name, test);
				91	}
				92	}
				93
				94	pub fn tests(&self) -> Vec<&RegexTest> {
				95	self.by_name.values().collect()
				96	}
				97	}
				98
				99	impl RegexTests {
				100	fn load(path: &str, slice: &[u8]) -> RegexTests {
				101	let mut data: RegexTests = toml::from_slice(slice)
				102	.expect(&format!("failed to load {}", path));
				103	for test in &mut data.tests {
				104	if test.options.contains(&RegexTestOption::Escaped) {
				105	test.input = unescape_bytes(&test.input);
				106	}
				107	}
				108	data
				109	}
				110	}
				111
				112	#[derive(Debug)]
				113	pub struct RegexTester {
				114	asserted: bool,
				115	results: RegexTestResults,
				116	skip_expensive: bool,
				117	whitelist: Vec<regex::Regex>,
				118	blacklist: Vec<regex::Regex>,
				119	}
				120
				121	impl Drop for RegexTester {
				122	fn drop(&mut self) {
				123	// If we haven't asserted yet, then the test is probably buggy, so
				124	// fail it. But if we're already panicking (e.g., a bug in the regex
				125	// engine), then don't double-panic, which causes an immediate abort.
				126	if !thread::panicking() && !self.asserted {
				127	panic!("must call RegexTester::assert at end of test");
				128	}
				129	}
				130	}
				131
				132	impl RegexTester {
				133	pub fn new() -> RegexTester {
				134	let mut tester = RegexTester {
				135	asserted: false,
				136	results: RegexTestResults::default(),
				137	skip_expensive: false,
				138	whitelist: vec![],
				139	blacklist: vec![],
				140	};
				141	for x in env::var("REGEX_TEST").unwrap_or("".to_string()).split(",") {
				142	let x = x.trim();
				143	if x.is_empty() {
				144	continue;
				145	}
				146	if x.starts_with("-") {
				147	tester = tester.blacklist(&x[1..]);
				148	} else {
				149	tester = tester.whitelist(x);
				150	}
				151	}
				152	tester
				153	}
				154
				155	pub fn skip_expensive(mut self) -> RegexTester {
				156	self.skip_expensive = true;
				157	self
				158	}
				159
				160	pub fn whitelist(mut self, name: &str) -> RegexTester {
				161	self.whitelist.push(regex::Regex::new(name).unwrap());
				162	self
				163	}
				164
				165	pub fn blacklist(mut self, name: &str) -> RegexTester {
				166	self.blacklist.push(regex::Regex::new(name).unwrap());
				167	self
				168	}
				169
				170	pub fn assert(&mut self) {
				171	self.asserted = true;
				172	self.results.assert();
				173	}
				174
				175	pub fn build_regex<S: StateID>(
				176	&self,
				177	mut builder: RegexBuilder,
				178	test: &RegexTest,
				179	) -> Option<Regex<DenseDFA<Vec<S>, S>>> {
				180	if self.skip(test) {
				181	return None;
				182	}
				183	self.apply_options(test, &mut builder);
				184
				185	match builder.build_with_size::<S>(&test.pattern) {
				186	Ok(re) => Some(re),
				187	Err(err) => {
				188	if let ErrorKind::Unsupported(_) = *err.kind() {
				189	None
				190	} else {
				191	panic!(
				192	"failed to build {:?} with pattern '{:?}': {}",
				193	test.name, test.pattern, err
				194	);
				195	}
				196	}
				197	}
				198	}
				199
				200	pub fn test_all<'a, I, T>(&mut self, builder: RegexBuilder, tests: I)
				201	where
				202	I: IntoIterator<IntoIter = T, Item = &'a RegexTest>,
				203	T: Iterator<Item = &'a RegexTest>,
				204	{
				205	for test in tests {
				206	let builder = builder.clone();
				207	let re: Regex = match self.build_regex(builder, test) {
				208	None => continue,
				209	Some(re) => re,
				210	};
				211	self.test(test, &re);
				212	}
				213	}
				214
				215	pub fn test<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) {
				216	self.test_is_match(test, re);
				217	self.test_find(test, re);
				218	// Some tests (namely, fowler) are designed only to detect the
				219	// first match even if there are more subsequent matches. To that
				220	// end, we only test match iteration when the number of matches
				221	// expected is not 1, or if the test name has 'iter' in it.
				222	if test.name.contains("iter") \|\| test.matches.len() != 1 {
				223	self.test_find_iter(test, re);
				224	}
				225	}
				226
				227	pub fn test_is_match<'a, D: DFA>(
				228	&mut self,
				229	test: &RegexTest,
				230	re: &Regex<D>,
				231	) {
				232	self.asserted = false;
				233
				234	let got = re.is_match(&test.input);
				235	let expected = test.matches.len() >= 1;
				236	if got == expected {
				237	self.results.succeeded.push(test.clone());
				238	return;
				239	}
				240	self.results.failed.push(RegexTestFailure {
				241	test: test.clone(),
				242	kind: RegexTestFailureKind::IsMatch,
				243	});
				244	}
				245
				246	pub fn test_find<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) {
				247	self.asserted = false;
				248
				249	let got =
				250	re.find(&test.input).map(\|(start, end)\| Match { start, end });
				251	if got == test.matches.get(0).map(\|&m\| m) {
				252	self.results.succeeded.push(test.clone());
				253	return;
				254	}
				255	self.results.failed.push(RegexTestFailure {
				256	test: test.clone(),
				257	kind: RegexTestFailureKind::Find { got },
				258	});
				259	}
				260
				261	pub fn test_find_iter<'a, D: DFA>(
				262	&mut self,
				263	test: &RegexTest,
				264	re: &Regex<D>,
				265	) {
				266	self.asserted = false;
				267
				268	let got: Vec<Match> = re
				269	.find_iter(&test.input)
				270	.map(\|(start, end)\| Match { start, end })
				271	.collect();
				272	if got == test.matches {
				273	self.results.succeeded.push(test.clone());
				274	return;
				275	}
				276	self.results.failed.push(RegexTestFailure {
				277	test: test.clone(),
				278	kind: RegexTestFailureKind::FindIter { got },
				279	});
				280	}
				281
				282	fn skip(&self, test: &RegexTest) -> bool {
				283	if self.skip_expensive {
				284	if test.name.starts_with("repetition-long") {
				285	return true;
				286	}
				287	}
				288	if !self.blacklist.is_empty() {
				289	if self.blacklist.iter().any(\|re\| re.is_match(&test.name)) {
				290	return true;
				291	}
				292	}
				293	if !self.whitelist.is_empty() {
				294	if !self.whitelist.iter().any(\|re\| re.is_match(&test.name)) {
				295	return true;
				296	}
				297	}
				298	false
				299	}
				300
				301	fn apply_options(&self, test: &RegexTest, builder: &mut RegexBuilder) {
				302	for opt in &test.options {
				303	match *opt {
				304	RegexTestOption::Anchored => {
				305	builder.anchored(true);
				306	}
				307	RegexTestOption::CaseInsensitive => {
				308	builder.case_insensitive(true);
				309	}
				310	RegexTestOption::NoUnicode => {
				311	builder.unicode(false);
				312	}
				313	RegexTestOption::Escaped => {}
				314	RegexTestOption::InvalidUTF8 => {
				315	builder.allow_invalid_utf8(true);
				316	}
				317	}
				318	}
				319	}
				320	}
				321
				322	#[derive(Clone, Debug, Default)]
				323	pub struct RegexTestResults {
				324	/// Tests that succeeded.
				325	pub succeeded: Vec<RegexTest>,
				326	/// Failed tests, indexed by group name.
				327	pub failed: Vec<RegexTestFailure>,
				328	}
				329
				330	#[derive(Clone, Debug)]
				331	pub struct RegexTestFailure {
				332	test: RegexTest,
				333	kind: RegexTestFailureKind,
				334	}
				335
				336	#[derive(Clone, Debug)]
				337	pub enum RegexTestFailureKind {
				338	IsMatch,
				339	Find { got: Option<Match> },
				340	FindIter { got: Vec<Match> },
				341	}
				342
				343	impl RegexTestResults {
				344	pub fn assert(&self) {
				345	if self.failed.is_empty() {
				346	return;
				347	}
				348	let failures = self
				349	.failed
				350	.iter()
				351	.map(\|f\| f.to_string())
				352	.collect::<Vec<String>>()
				353	.join("\n\n");
				354	panic!(
				355	"found {} failures:\n{}\n{}\n{}\n\n\
				356	Set the REGEX_TEST environment variable to filter tests, \n\
				357	e.g., REGEX_TEST=crazy-misc,-crazy-misc2 runs every test \n\
				358	whose name contains crazy-misc but not crazy-misc2\n\n",
				359	self.failed.len(),
				360	"~".repeat(79),
				361	failures.trim(),
				362	"~".repeat(79)
				363	)
				364	}
				365	}
				366
				367	impl fmt::Display for RegexTestFailure {
				368	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				369	write!(
				370	f,
				371	"{}: {}\n \
				372	options: {:?}\n \
				373	pattern: {}\n \
				374	pattern (escape): {}\n \
				375	input: {}\n \
				376	input (escape): {}\n \
				377	input (hex): {}",
				378	self.test.name,
				379	self.kind.fmt(&self.test)?,
				380	self.test.options,
				381	self.test.pattern,
				382	escape_default(&self.test.pattern),
				383	nice_raw_bytes(&self.test.input),
				384	escape_bytes(&self.test.input),
				385	hex_bytes(&self.test.input)
				386	)
				387	}
				388	}
				389
				390	impl RegexTestFailureKind {
				391	fn fmt(&self, test: &RegexTest) -> Result<String, fmt::Error> {
				392	let mut buf = String::new();
				393	match *self {
				394	RegexTestFailureKind::IsMatch => {
				395	if let Some(&m) = test.matches.get(0) {
				396	write!(buf, "expected match (at {}), but none found", m)?
				397	} else {
				398	write!(buf, "expected no match, but found a match")?
				399	}
				400	}
				401	RegexTestFailureKind::Find { got } => write!(
				402	buf,
				403	"expected {:?}, but found {:?}",
				404	test.matches.get(0),
				405	got
				406	)?,
				407	RegexTestFailureKind::FindIter { ref got } => write!(
				408	buf,
				409	"expected {:?}, but found {:?}",
				410	test.matches, got
				411	)?,
				412	}
				413	Ok(buf)
				414	}
				415	}
				416
				417	impl fmt::Display for Match {
				418	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				419	write!(f, "({}, {})", self.start, self.end)
				420	}
				421	}
				422
				423	impl fmt::Debug for Match {
				424	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				425	write!(f, "({}, {})", self.start, self.end)
				426	}
				427	}
				428
				429	fn nice_raw_bytes(bytes: &[u8]) -> String {
				430	use std::str;
				431
				432	match str::from_utf8(bytes) {
				433	Ok(s) => s.to_string(),
				434	Err(_) => escape_bytes(bytes),
				435	}
				436	}
				437
				438	fn escape_bytes(bytes: &[u8]) -> String {
				439	use std::ascii;
				440
				441	let escaped = bytes
				442	.iter()
				443	.flat_map(\|&b\| ascii::escape_default(b))
				444	.collect::<Vec<u8>>();
				445	String::from_utf8(escaped).unwrap()
				446	}
				447
				448	fn hex_bytes(bytes: &[u8]) -> String {
				449	bytes.iter().map(\|&b\| format!(r"\x{:02X}", b)).collect()
				450	}
				451
				452	fn escape_default(s: &str) -> String {
				453	s.chars().flat_map(\|c\| c.escape_default()).collect()
				454	}
				455
				456	fn unescape_bytes(bytes: &[u8]) -> Vec<u8> {
				457	use std::str;
				458	use unescape::unescape;
				459
				460	unescape(&str::from_utf8(bytes).expect("all input must be valid UTF-8"))
				461	}