vendor/grep-regex-0.1.9/src/strip.rs - toolchain/rustc - Git at Google

 use grep_matcher::LineTerminator;
 use regex_syntax::hir::{self, Hir, HirKind};

 use crate::error::{Error, ErrorKind};

 /// Return an HIR that is guaranteed to never match the given line terminator,
 /// if possible.
 ///
 /// If the transformation isn't possible, then an error is returned.
 ///
 /// In general, if a literal line terminator occurs anywhere in the HIR, then
 /// this will return an error. However, if the line terminator occurs within
 /// a character class with at least one other character (that isn't also a line
 /// terminator), then the line terminator is simply stripped from that class.
 ///
 /// If the given line terminator is not ASCII, then this function returns an
 /// error.
 pub fn strip_from_match(
     expr: Hir,
     line_term: LineTerminator,
 ) -> Result<Hir, Error> {
     if line_term.is_crlf() {
         let expr1 = strip_from_match_ascii(expr, b'\r')?;
         strip_from_match_ascii(expr1, b'\n')
     } else {
         let b = line_term.as_byte();
         if b > 0x7F {
             return Err(Error::new(ErrorKind::InvalidLineTerminator(b)));
         }
         strip_from_match_ascii(expr, b)
     }
 }

 /// The implementation of strip_from_match. The given byte must be ASCII. This
 /// function panics otherwise.
 fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
     assert!(byte <= 0x7F);
     let chr = byte as char;
     assert_eq!(chr.len_utf8(), 1);

     let invalid = || Err(Error::new(ErrorKind::NotAllowed(chr.to_string())));

     Ok(match expr.into_kind() {
         HirKind::Empty => Hir::empty(),
         HirKind::Literal(hir::Literal::Unicode(c)) => {
             if c == chr {
                 return invalid();
             }
             Hir::literal(hir::Literal::Unicode(c))
         }
         HirKind::Literal(hir::Literal::Byte(b)) => {
             if b as char == chr {
                 return invalid();
             }
             Hir::literal(hir::Literal::Byte(b))
         }
         HirKind::Class(hir::Class::Unicode(mut cls)) => {
             let remove = hir::ClassUnicode::new(Some(
                 hir::ClassUnicodeRange::new(chr, chr),
             ));
             cls.difference(&remove);
             if cls.ranges().is_empty() {
                 return invalid();
             }
             Hir::class(hir::Class::Unicode(cls))
         }
         HirKind::Class(hir::Class::Bytes(mut cls)) => {
             let remove = hir::ClassBytes::new(Some(
                 hir::ClassBytesRange::new(byte, byte),
             ));
             cls.difference(&remove);
             if cls.ranges().is_empty() {
                 return invalid();
             }
             Hir::class(hir::Class::Bytes(cls))
         }
         HirKind::Anchor(x) => Hir::anchor(x),
         HirKind::WordBoundary(x) => Hir::word_boundary(x),
         HirKind::Repetition(mut x) => {
             x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
             Hir::repetition(x)
         }
         HirKind::Group(mut x) => {
             x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
             Hir::group(x)
         }
         HirKind::Concat(xs) => {
             let xs = xs
                 .into_iter()
                 .map(|e| strip_from_match_ascii(e, byte))
                 .collect::<Result<Vec<Hir>, Error>>()?;
             Hir::concat(xs)
         }
         HirKind::Alternation(xs) => {
             let xs = xs
                 .into_iter()
                 .map(|e| strip_from_match_ascii(e, byte))
                 .collect::<Result<Vec<Hir>, Error>>()?;
             Hir::alternation(xs)
         }
     })
 }

 #[cfg(test)]
 mod tests {
     use regex_syntax::Parser;

     use super::{strip_from_match, LineTerminator};
     use crate::error::Error;

     fn roundtrip(pattern: &str, byte: u8) -> String {
         roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()
     }

     fn roundtrip_crlf(pattern: &str) -> String {
         roundtrip_line_term(pattern, LineTerminator::crlf()).unwrap()
     }

     fn roundtrip_err(pattern: &str, byte: u8) -> Result<String, Error> {
         roundtrip_line_term(pattern, LineTerminator::byte(byte))
     }

     fn roundtrip_line_term(
         pattern: &str,
         line_term: LineTerminator,
     ) -> Result<String, Error> {
         let expr1 = Parser::new().parse(pattern).unwrap();
         let expr2 = strip_from_match(expr1, line_term)?;
         Ok(expr2.to_string())
     }

     #[test]
     fn various() {
         assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]");
         assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]");
         assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]");
         assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]");
         assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]");

         assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
         assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");

         assert!(roundtrip_err(r"\n", b'\n').is_err());
         assert!(roundtrip_err(r"abc\n", b'\n').is_err());
         assert!(roundtrip_err(r"\nabc", b'\n').is_err());
         assert!(roundtrip_err(r"abc\nxyz", b'\n').is_err());
         assert!(roundtrip_err(r"\x0A", b'\n').is_err());
         assert!(roundtrip_err(r"\u000A", b'\n').is_err());
         assert!(roundtrip_err(r"\U0000000A", b'\n').is_err());
         assert!(roundtrip_err(r"\u{A}", b'\n').is_err());
         assert!(roundtrip_err("\n", b'\n').is_err());
     }
 }
	use grep_matcher::LineTerminator;
	use regex_syntax::hir::{self, Hir, HirKind};

	use crate::error::{Error, ErrorKind};

	/// Return an HIR that is guaranteed to never match the given line terminator,
	/// if possible.
	///
	/// If the transformation isn't possible, then an error is returned.
	///
	/// In general, if a literal line terminator occurs anywhere in the HIR, then
	/// this will return an error. However, if the line terminator occurs within
	/// a character class with at least one other character (that isn't also a line
	/// terminator), then the line terminator is simply stripped from that class.
	///
	/// If the given line terminator is not ASCII, then this function returns an
	/// error.
	pub fn strip_from_match(
	expr: Hir,
	line_term: LineTerminator,
	) -> Result<Hir, Error> {
	if line_term.is_crlf() {
	let expr1 = strip_from_match_ascii(expr, b'\r')?;
	strip_from_match_ascii(expr1, b'\n')
	} else {
	let b = line_term.as_byte();
	if b > 0x7F {
	return Err(Error::new(ErrorKind::InvalidLineTerminator(b)));
	}
	strip_from_match_ascii(expr, b)
	}
	}

	/// The implementation of strip_from_match. The given byte must be ASCII. This
	/// function panics otherwise.
	fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
	assert!(byte <= 0x7F);
	let chr = byte as char;
	assert_eq!(chr.len_utf8(), 1);

	let invalid = \|\| Err(Error::new(ErrorKind::NotAllowed(chr.to_string())));

	Ok(match expr.into_kind() {
	HirKind::Empty => Hir::empty(),
	HirKind::Literal(hir::Literal::Unicode(c)) => {
	if c == chr {
	return invalid();
	}
	Hir::literal(hir::Literal::Unicode(c))
	}
	HirKind::Literal(hir::Literal::Byte(b)) => {
	if b as char == chr {
	return invalid();
	}
	Hir::literal(hir::Literal::Byte(b))
	}
	HirKind::Class(hir::Class::Unicode(mut cls)) => {
	let remove = hir::ClassUnicode::new(Some(
	hir::ClassUnicodeRange::new(chr, chr),
	));
	cls.difference(&remove);
	if cls.ranges().is_empty() {
	return invalid();
	}
	Hir::class(hir::Class::Unicode(cls))
	}
	HirKind::Class(hir::Class::Bytes(mut cls)) => {
	let remove = hir::ClassBytes::new(Some(
	hir::ClassBytesRange::new(byte, byte),
	));
	cls.difference(&remove);
	if cls.ranges().is_empty() {
	return invalid();
	}
	Hir::class(hir::Class::Bytes(cls))
	}
	HirKind::Anchor(x) => Hir::anchor(x),
	HirKind::WordBoundary(x) => Hir::word_boundary(x),
	HirKind::Repetition(mut x) => {
	x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
	Hir::repetition(x)
	}
	HirKind::Group(mut x) => {
	x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
	Hir::group(x)
	}
	HirKind::Concat(xs) => {
	let xs = xs
	.into_iter()
	.map(\|e\| strip_from_match_ascii(e, byte))
	.collect::<Result<Vec<Hir>, Error>>()?;
	Hir::concat(xs)
	}
	HirKind::Alternation(xs) => {
	let xs = xs
	.into_iter()
	.map(\|e\| strip_from_match_ascii(e, byte))
	.collect::<Result<Vec<Hir>, Error>>()?;
	Hir::alternation(xs)
	}
	})
	}

	#[cfg(test)]
	mod tests {
	use regex_syntax::Parser;

	use super::{strip_from_match, LineTerminator};
	use crate::error::Error;

	fn roundtrip(pattern: &str, byte: u8) -> String {
	roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()
	}

	fn roundtrip_crlf(pattern: &str) -> String {
	roundtrip_line_term(pattern, LineTerminator::crlf()).unwrap()
	}

	fn roundtrip_err(pattern: &str, byte: u8) -> Result<String, Error> {
	roundtrip_line_term(pattern, LineTerminator::byte(byte))
	}

	fn roundtrip_line_term(
	pattern: &str,
	line_term: LineTerminator,
	) -> Result<String, Error> {
	let expr1 = Parser::new().parse(pattern).unwrap();
	let expr2 = strip_from_match(expr1, line_term)?;
	Ok(expr2.to_string())
	}

	#[test]
	fn various() {
	assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]");
	assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]");
	assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]");
	assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]");
	assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]");

	assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
	assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");

	assert!(roundtrip_err(r"\n", b'\n').is_err());
	assert!(roundtrip_err(r"abc\n", b'\n').is_err());
	assert!(roundtrip_err(r"\nabc", b'\n').is_err());
	assert!(roundtrip_err(r"abc\nxyz", b'\n').is_err());
	assert!(roundtrip_err(r"\x0A", b'\n').is_err());
	assert!(roundtrip_err(r"\u000A", b'\n').is_err());
	assert!(roundtrip_err(r"\U0000000A", b'\n').is_err());
	assert!(roundtrip_err(r"\u{A}", b'\n').is_err());
	assert!(roundtrip_err("\n", b'\n').is_err());
	}
	}