vendor/grep-pcre2-0.1.5/src/matcher.rs - toolchain/rustc - Git at Google

 use std::collections::HashMap;

 use grep_matcher::{Captures, Match, Matcher};
 use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};

 use crate::error::Error;

 /// A builder for configuring the compilation of a PCRE2 regex.
 #[derive(Clone, Debug)]
 pub struct RegexMatcherBuilder {
     builder: RegexBuilder,
     case_smart: bool,
     word: bool,
 }

 impl RegexMatcherBuilder {
     /// Create a new matcher builder with a default configuration.
     pub fn new() -> RegexMatcherBuilder {
         RegexMatcherBuilder {
             builder: RegexBuilder::new(),
             case_smart: false,
             word: false,
         }
     }

     /// Compile the given pattern into a PCRE matcher using the current
     /// configuration.
     ///
     /// If there was a problem compiling the pattern, then an error is
     /// returned.
     pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
         let mut builder = self.builder.clone();
         if self.case_smart && !has_uppercase_literal(pattern) {
             builder.caseless(true);
         }
         let res = if self.word {
             let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
             builder.build(&pattern)
         } else {
             builder.build(pattern)
         };
         res.map_err(Error::regex).map(|regex| {
             let mut names = HashMap::new();
             for (i, name) in regex.capture_names().iter().enumerate() {
                 if let Some(ref name) = *name {
                     names.insert(name.to_string(), i);
                 }
             }
             RegexMatcher { regex, names }
         })
     }

     /// Enables case insensitive matching.
     ///
     /// If the `utf` option is also set, then Unicode case folding is used
     /// to determine case insensitivity. When the `utf` option is not set,
     /// then only standard ASCII case insensitivity is considered.
     ///
     /// This option corresponds to the `i` flag.
     pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.caseless(yes);
         self
     }

     /// Whether to enable "smart case" or not.
     ///
     /// When smart case is enabled, the builder will automatically enable
     /// case insensitive matching based on how the pattern is written. Namely,
     /// case insensitive mode is enabled when both of the following things
     /// are believed to be true:
     ///
     /// 1. The pattern contains at least one literal character. For example,
     ///    `a\w` contains a literal (`a`) but `\w` does not.
     /// 2. Of the literals in the pattern, none of them are considered to be
     ///    uppercase according to Unicode. For example, `foo\pL` has no
     ///    uppercase literals but `Foo\pL` does.
     ///
     /// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
     /// will prevent case insensitive matching even though it is part of a meta
     /// sequence. This bug will probably never be fixed.
     pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.case_smart = yes;
         self
     }

     /// Enables "dot all" matching.
     ///
     /// When enabled, the `.` metacharacter in the pattern matches any
     /// character, include `\n`. When disabled (the default), `.` will match
     /// any character except for `\n`.
     ///
     /// This option corresponds to the `s` flag.
     pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.dotall(yes);
         self
     }

     /// Enable "extended" mode in the pattern, where whitespace is ignored.
     ///
     /// This option corresponds to the `x` flag.
     pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.extended(yes);
         self
     }

     /// Enable multiline matching mode.
     ///
     /// When enabled, the `^` and `$` anchors will match both at the beginning
     /// and end of a subject string, in addition to matching at the start of
     /// a line and the end of a line. When disabled, the `^` and `$` anchors
     /// will only match at the beginning and end of a subject string.
     ///
     /// This option corresponds to the `m` flag.
     pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.multi_line(yes);
         self
     }

     /// Enable matching of CRLF as a line terminator.
     ///
     /// When enabled, anchors such as `^` and `$` will match any of the
     /// following as a line terminator: `\r`, `\n` or `\r\n`.
     ///
     /// This is disabled by default, in which case, only `\n` is recognized as
     /// a line terminator.
     pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.crlf(yes);
         self
     }

     /// Require that all matches occur on word boundaries.
     ///
     /// Enabling this option is subtly different than putting `\b` assertions
     /// on both sides of your pattern. In particular, a `\b` assertion requires
     /// that one side of it match a word character while the other match a
     /// non-word character. This option, in contrast, merely requires that
     /// one side match a non-word character.
     ///
     /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
     /// word character. However, `-2` with this `word` option enabled will
     /// match the `-2` in `foo -2 bar`.
     pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.word = yes;
         self
     }

     /// Enable Unicode matching mode.
     ///
     /// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
     /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
     ///
     /// When set, this implies UTF matching mode. It is not possible to enable
     /// Unicode matching mode without enabling UTF matching mode.
     ///
     /// This is disabled by default.
     pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.ucp(yes);
         self
     }

     /// Enable UTF matching mode.
     ///
     /// When enabled, characters are treated as sequences of code units that
     /// make up a single codepoint instead of as single bytes. For example,
     /// this will cause `.` to match any single UTF-8 encoded codepoint, where
     /// as when this is disabled, `.` will any single byte (except for `\n` in
     /// both cases, unless "dot all" mode is enabled).
     ///
     /// Note that when UTF matching mode is enabled, every search performed
     /// will do a UTF-8 validation check, which can impact performance. The
     /// UTF-8 check can be disabled via the `disable_utf_check` option, but it
     /// is undefined behavior to enable UTF matching mode and search invalid
     /// UTF-8.
     ///
     /// This is disabled by default.
     pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.utf(yes);
         self
     }

     /// When UTF matching mode is enabled, this will disable the UTF checking
     /// that PCRE2 will normally perform automatically. If UTF matching mode
     /// is not enabled, then this has no effect.
     ///
     /// UTF checking is enabled by default when UTF matching mode is enabled.
     /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
     /// will return an error if you attempt to search a subject string that is
     /// not valid UTF-8.
     ///
     /// # Safety
     ///
     /// It is undefined behavior to disable the UTF check in UTF matching mode
     /// and search a subject string that is not valid UTF-8. When the UTF check
     /// is disabled, callers must guarantee that the subject string is valid
     /// UTF-8.
     pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
         self.builder.disable_utf_check();
         self
     }

     /// Enable PCRE2's JIT and return an error if it's not available.
     ///
     /// This generally speeds up matching quite a bit. The downside is that it
     /// can increase the time it takes to compile a pattern.
     ///
     /// If the JIT isn't available or if JIT compilation returns an error, then
     /// regex compilation will fail with the corresponding error.
     ///
     /// This is disabled by default, and always overrides `jit_if_available`.
     pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.jit(yes);
         self
     }

     /// Enable PCRE2's JIT if it's available.
     ///
     /// This generally speeds up matching quite a bit. The downside is that it
     /// can increase the time it takes to compile a pattern.
     ///
     /// If the JIT isn't available or if JIT compilation returns an error,
     /// then a debug message with the error will be emitted and the regex will
     /// otherwise silently fall back to non-JIT matching.
     ///
     /// This is disabled by default, and always overrides `jit`.
     pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
         self.builder.jit_if_available(yes);
         self
     }

     /// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
     /// not enabled, then this has no effect.
     ///
     /// When `None` is given, no custom JIT stack will be created, and instead,
     /// the default JIT stack is used. When the default is used, its maximum
     /// size is 32 KB.
     ///
     /// When this is set, then a new JIT stack will be created with the given
     /// maximum size as its limit.
     ///
     /// Increasing the stack size can be useful for larger regular expressions.
     ///
     /// By default, this is set to `None`.
     pub fn max_jit_stack_size(
         &mut self,
         bytes: Option<usize>,
     ) -> &mut RegexMatcherBuilder {
         self.builder.max_jit_stack_size(bytes);
         self
     }
 }

 /// An implementation of the `Matcher` trait using PCRE2.
 #[derive(Clone, Debug)]
 pub struct RegexMatcher {
     regex: Regex,
     names: HashMap<String, usize>,
 }

 impl RegexMatcher {
     /// Create a new matcher from the given pattern using the default
     /// configuration.
     pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
         RegexMatcherBuilder::new().build(pattern)
     }
 }

 impl Matcher for RegexMatcher {
     type Captures = RegexCaptures;
     type Error = Error;

     fn find_at(
         &self,
         haystack: &[u8],
         at: usize,
     ) -> Result<Option<Match>, Error> {
         Ok(self
             .regex
             .find_at(haystack, at)
             .map_err(Error::regex)?
             .map(|m| Match::new(m.start(), m.end())))
     }

     fn new_captures(&self) -> Result<RegexCaptures, Error> {
         Ok(RegexCaptures::new(self.regex.capture_locations()))
     }

     fn capture_count(&self) -> usize {
         self.regex.captures_len()
     }

     fn capture_index(&self, name: &str) -> Option<usize> {
         self.names.get(name).map(|i| *i)
     }

     fn try_find_iter<F, E>(
         &self,
         haystack: &[u8],
         mut matched: F,
     ) -> Result<Result<(), E>, Error>
     where
         F: FnMut(Match) -> Result<bool, E>,
     {
         for result in self.regex.find_iter(haystack) {
             let m = result.map_err(Error::regex)?;
             match matched(Match::new(m.start(), m.end())) {
                 Ok(true) => continue,
                 Ok(false) => return Ok(Ok(())),
                 Err(err) => return Ok(Err(err)),
             }
         }
         Ok(Ok(()))
     }

     fn captures_at(
         &self,
         haystack: &[u8],
         at: usize,
         caps: &mut RegexCaptures,
     ) -> Result<bool, Error> {
         Ok(self
             .regex
             .captures_read_at(&mut caps.locs, haystack, at)
             .map_err(Error::regex)?
             .is_some())
     }
 }

 /// Represents the match offsets of each capturing group in a match.
 ///
 /// The first, or `0`th capture group, always corresponds to the entire match
 /// and is guaranteed to be present when a match occurs. The next capture
 /// group, at index `1`, corresponds to the first capturing group in the regex,
 /// ordered by the position at which the left opening parenthesis occurs.
 ///
 /// Note that not all capturing groups are guaranteed to be present in a match.
 /// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
 /// or `bar` will ever be set in any given match.
 ///
 /// In order to access a capture group by name, you'll need to first find the
 /// index of the group using the corresponding matcher's `capture_index`
 /// method, and then use that index with `RegexCaptures::get`.
 #[derive(Clone, Debug)]
 pub struct RegexCaptures {
     /// Where the locations are stored.
     locs: CaptureLocations,
 }

 impl Captures for RegexCaptures {
     fn len(&self) -> usize {
         self.locs.len()
     }

     fn get(&self, i: usize) -> Option<Match> {
         self.locs.get(i).map(|(s, e)| Match::new(s, e))
     }
 }

 impl RegexCaptures {
     pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
         RegexCaptures { locs }
     }
 }

 /// Determine whether the pattern contains an uppercase character which should
 /// negate the effect of the smart-case option.
 ///
 /// Ideally we would be able to check the AST in order to correctly handle
 /// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
 /// cased), but PCRE doesn't expose enough details for that kind of analysis.
 /// For now, our 'good enough' solution is to simply perform a semi-naïve
 /// scan of the input pattern and ignore all characters following a '\'. The
 /// This at least lets us support the most common cases, like 'foo\w' and
 /// 'foo\S', in an intuitive manner.
 fn has_uppercase_literal(pattern: &str) -> bool {
     let mut chars = pattern.chars();
     while let Some(c) = chars.next() {
         if c == '\\' {
             chars.next();
         } else if c.is_uppercase() {
             return true;
         }
     }
     false
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use grep_matcher::{LineMatchKind, Matcher};

     // Test that enabling word matches does the right thing and demonstrate
     // the difference between it and surrounding the regex in `\b`.
     #[test]
     fn word() {
         let matcher =
             RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
         assert!(matcher.is_match(b"abc -2 foo").unwrap());

         let matcher =
             RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
         assert!(!matcher.is_match(b"abc -2 foo").unwrap());
     }

     // Test that enabling CRLF permits `$` to match at the end of a line.
     #[test]
     fn line_terminator_crlf() {
         // Test normal use of `$` with a `\n` line terminator.
         let matcher = RegexMatcherBuilder::new()
             .multi_line(true)
             .build(r"abc$")
             .unwrap();
         assert!(matcher.is_match(b"abc\n").unwrap());

         // Test that `$` doesn't match at `\r\n` boundary normally.
         let matcher = RegexMatcherBuilder::new()
             .multi_line(true)
             .build(r"abc$")
             .unwrap();
         assert!(!matcher.is_match(b"abc\r\n").unwrap());

         // Now check the CRLF handling.
         let matcher = RegexMatcherBuilder::new()
             .multi_line(true)
             .crlf(true)
             .build(r"abc$")
             .unwrap();
         assert!(matcher.is_match(b"abc\r\n").unwrap());
     }

     // Test that smart case works.
     #[test]
     fn case_smart() {
         let matcher =
             RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
         assert!(matcher.is_match(b"ABC").unwrap());

         let matcher =
             RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
         assert!(!matcher.is_match(b"ABC").unwrap());
     }

     // Test that finding candidate lines works as expected.
     #[test]
     fn candidate_lines() {
         fn is_confirmed(m: LineMatchKind) -> bool {
             match m {
                 LineMatchKind::Confirmed(_) => true,
                 _ => false,
             }
         }

         let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
         let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
         assert!(is_confirmed(m));
     }
 }
	use std::collections::HashMap;

	use grep_matcher::{Captures, Match, Matcher};
	use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};

	use crate::error::Error;

	/// A builder for configuring the compilation of a PCRE2 regex.
	#[derive(Clone, Debug)]
	pub struct RegexMatcherBuilder {
	builder: RegexBuilder,
	case_smart: bool,
	word: bool,
	}

	impl RegexMatcherBuilder {
	/// Create a new matcher builder with a default configuration.
	pub fn new() -> RegexMatcherBuilder {
	RegexMatcherBuilder {
	builder: RegexBuilder::new(),
	case_smart: false,
	word: false,
	}
	}

	/// Compile the given pattern into a PCRE matcher using the current
	/// configuration.
	///
	/// If there was a problem compiling the pattern, then an error is
	/// returned.
	pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
	let mut builder = self.builder.clone();
	if self.case_smart && !has_uppercase_literal(pattern) {
	builder.caseless(true);
	}
	let res = if self.word {
	let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
	builder.build(&pattern)
	} else {
	builder.build(pattern)
	};
	res.map_err(Error::regex).map(\|regex\| {
	let mut names = HashMap::new();
	for (i, name) in regex.capture_names().iter().enumerate() {
	if let Some(ref name) = *name {
	names.insert(name.to_string(), i);
	}
	}
	RegexMatcher { regex, names }
	})
	}

	/// Enables case insensitive matching.
	///
	/// If the `utf` option is also set, then Unicode case folding is used
	/// to determine case insensitivity. When the `utf` option is not set,
	/// then only standard ASCII case insensitivity is considered.
	///
	/// This option corresponds to the `i` flag.
	pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.caseless(yes);
	self
	}

	/// Whether to enable "smart case" or not.
	///
	/// When smart case is enabled, the builder will automatically enable
	/// case insensitive matching based on how the pattern is written. Namely,
	/// case insensitive mode is enabled when both of the following things
	/// are believed to be true:
	///
	/// 1. The pattern contains at least one literal character. For example,
	/// `a\w` contains a literal (`a`) but `\w` does not.
	/// 2. Of the literals in the pattern, none of them are considered to be
	/// uppercase according to Unicode. For example, `foo\pL` has no
	/// uppercase literals but `Foo\pL` does.
	///
	/// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
	/// will prevent case insensitive matching even though it is part of a meta
	/// sequence. This bug will probably never be fixed.
	pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.case_smart = yes;
	self
	}

	/// Enables "dot all" matching.
	///
	/// When enabled, the `.` metacharacter in the pattern matches any
	/// character, include `\n`. When disabled (the default), `.` will match
	/// any character except for `\n`.
	///
	/// This option corresponds to the `s` flag.
	pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.dotall(yes);
	self
	}

	/// Enable "extended" mode in the pattern, where whitespace is ignored.
	///
	/// This option corresponds to the `x` flag.
	pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.extended(yes);
	self
	}

	/// Enable multiline matching mode.
	///
	/// When enabled, the `^` and `$` anchors will match both at the beginning
	/// and end of a subject string, in addition to matching at the start of
	/// a line and the end of a line. When disabled, the `^` and `$` anchors
	/// will only match at the beginning and end of a subject string.
	///
	/// This option corresponds to the `m` flag.
	pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.multi_line(yes);
	self
	}

	/// Enable matching of CRLF as a line terminator.
	///
	/// When enabled, anchors such as `^` and `$` will match any of the
	/// following as a line terminator: `\r`, `\n` or `\r\n`.
	///
	/// This is disabled by default, in which case, only `\n` is recognized as
	/// a line terminator.
	pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.crlf(yes);
	self
	}

	/// Require that all matches occur on word boundaries.
	///
	/// Enabling this option is subtly different than putting `\b` assertions
	/// on both sides of your pattern. In particular, a `\b` assertion requires
	/// that one side of it match a word character while the other match a
	/// non-word character. This option, in contrast, merely requires that
	/// one side match a non-word character.
	///
	/// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
	/// word character. However, `-2` with this `word` option enabled will
	/// match the `-2` in `foo -2 bar`.
	pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.word = yes;
	self
	}

	/// Enable Unicode matching mode.
	///
	/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
	/// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
	///
	/// When set, this implies UTF matching mode. It is not possible to enable
	/// Unicode matching mode without enabling UTF matching mode.
	///
	/// This is disabled by default.
	pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.ucp(yes);
	self
	}

	/// Enable UTF matching mode.
	///
	/// When enabled, characters are treated as sequences of code units that
	/// make up a single codepoint instead of as single bytes. For example,
	/// this will cause `.` to match any single UTF-8 encoded codepoint, where
	/// as when this is disabled, `.` will any single byte (except for `\n` in
	/// both cases, unless "dot all" mode is enabled).
	///
	/// Note that when UTF matching mode is enabled, every search performed
	/// will do a UTF-8 validation check, which can impact performance. The
	/// UTF-8 check can be disabled via the `disable_utf_check` option, but it
	/// is undefined behavior to enable UTF matching mode and search invalid
	/// UTF-8.
	///
	/// This is disabled by default.
	pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.utf(yes);
	self
	}

	/// When UTF matching mode is enabled, this will disable the UTF checking
	/// that PCRE2 will normally perform automatically. If UTF matching mode
	/// is not enabled, then this has no effect.
	///
	/// UTF checking is enabled by default when UTF matching mode is enabled.
	/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
	/// will return an error if you attempt to search a subject string that is
	/// not valid UTF-8.
	///
	/// # Safety
	///
	/// It is undefined behavior to disable the UTF check in UTF matching mode
	/// and search a subject string that is not valid UTF-8. When the UTF check
	/// is disabled, callers must guarantee that the subject string is valid
	/// UTF-8.
	pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
	self.builder.disable_utf_check();
	self
	}

	/// Enable PCRE2's JIT and return an error if it's not available.
	///
	/// This generally speeds up matching quite a bit. The downside is that it
	/// can increase the time it takes to compile a pattern.
	///
	/// If the JIT isn't available or if JIT compilation returns an error, then
	/// regex compilation will fail with the corresponding error.
	///
	/// This is disabled by default, and always overrides `jit_if_available`.
	pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.jit(yes);
	self
	}

	/// Enable PCRE2's JIT if it's available.
	///
	/// This generally speeds up matching quite a bit. The downside is that it
	/// can increase the time it takes to compile a pattern.
	///
	/// If the JIT isn't available or if JIT compilation returns an error,
	/// then a debug message with the error will be emitted and the regex will
	/// otherwise silently fall back to non-JIT matching.
	///
	/// This is disabled by default, and always overrides `jit`.
	pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
	self.builder.jit_if_available(yes);
	self
	}

	/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
	/// not enabled, then this has no effect.
	///
	/// When `None` is given, no custom JIT stack will be created, and instead,
	/// the default JIT stack is used. When the default is used, its maximum
	/// size is 32 KB.
	///
	/// When this is set, then a new JIT stack will be created with the given
	/// maximum size as its limit.
	///
	/// Increasing the stack size can be useful for larger regular expressions.
	///
	/// By default, this is set to `None`.
	pub fn max_jit_stack_size(
	&mut self,
	bytes: Option<usize>,
	) -> &mut RegexMatcherBuilder {
	self.builder.max_jit_stack_size(bytes);
	self
	}
	}

	/// An implementation of the `Matcher` trait using PCRE2.
	#[derive(Clone, Debug)]
	pub struct RegexMatcher {
	regex: Regex,
	names: HashMap<String, usize>,
	}

	impl RegexMatcher {
	/// Create a new matcher from the given pattern using the default
	/// configuration.
	pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
	RegexMatcherBuilder::new().build(pattern)
	}
	}

	impl Matcher for RegexMatcher {
	type Captures = RegexCaptures;
	type Error = Error;

	fn find_at(
	&self,
	haystack: &[u8],
	at: usize,
	) -> Result<Option<Match>, Error> {
	Ok(self
	.regex
	.find_at(haystack, at)
	.map_err(Error::regex)?
	.map(\|m\| Match::new(m.start(), m.end())))
	}

	fn new_captures(&self) -> Result<RegexCaptures, Error> {
	Ok(RegexCaptures::new(self.regex.capture_locations()))
	}

	fn capture_count(&self) -> usize {
	self.regex.captures_len()
	}

	fn capture_index(&self, name: &str) -> Option<usize> {
	self.names.get(name).map(\|i\| *i)
	}

	fn try_find_iter<F, E>(
	&self,
	haystack: &[u8],
	mut matched: F,
	) -> Result<Result<(), E>, Error>
	where
	F: FnMut(Match) -> Result<bool, E>,
	{
	for result in self.regex.find_iter(haystack) {
	let m = result.map_err(Error::regex)?;
	match matched(Match::new(m.start(), m.end())) {
	Ok(true) => continue,
	Ok(false) => return Ok(Ok(())),
	Err(err) => return Ok(Err(err)),
	}
	}
	Ok(Ok(()))
	}

	fn captures_at(
	&self,
	haystack: &[u8],
	at: usize,
	caps: &mut RegexCaptures,
	) -> Result<bool, Error> {
	Ok(self
	.regex
	.captures_read_at(&mut caps.locs, haystack, at)
	.map_err(Error::regex)?
	.is_some())
	}
	}

	/// Represents the match offsets of each capturing group in a match.
	///
	/// The first, or `0`th capture group, always corresponds to the entire match
	/// and is guaranteed to be present when a match occurs. The next capture
	/// group, at index `1`, corresponds to the first capturing group in the regex,
	/// ordered by the position at which the left opening parenthesis occurs.
	///
	/// Note that not all capturing groups are guaranteed to be present in a match.
	/// For example, in the regex, `(?P<foo>\w)\|(?P<bar>\W)`, only one of `foo`
	/// or `bar` will ever be set in any given match.
	///
	/// In order to access a capture group by name, you'll need to first find the
	/// index of the group using the corresponding matcher's `capture_index`
	/// method, and then use that index with `RegexCaptures::get`.
	#[derive(Clone, Debug)]
	pub struct RegexCaptures {
	/// Where the locations are stored.
	locs: CaptureLocations,
	}

	impl Captures for RegexCaptures {
	fn len(&self) -> usize {
	self.locs.len()
	}

	fn get(&self, i: usize) -> Option<Match> {
	self.locs.get(i).map(\|(s, e)\| Match::new(s, e))
	}
	}

	impl RegexCaptures {
	pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
	RegexCaptures { locs }
	}
	}

	/// Determine whether the pattern contains an uppercase character which should
	/// negate the effect of the smart-case option.
	///
	/// Ideally we would be able to check the AST in order to correctly handle
	/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
	/// cased), but PCRE doesn't expose enough details for that kind of analysis.
	/// For now, our 'good enough' solution is to simply perform a semi-naïve
	/// scan of the input pattern and ignore all characters following a '\'. The
	/// This at least lets us support the most common cases, like 'foo\w' and
	/// 'foo\S', in an intuitive manner.
	fn has_uppercase_literal(pattern: &str) -> bool {
	let mut chars = pattern.chars();
	while let Some(c) = chars.next() {
	if c == '\\' {
	chars.next();
	} else if c.is_uppercase() {
	return true;
	}
	}
	false
	}

	#[cfg(test)]
	mod tests {
	use super::*;
	use grep_matcher::{LineMatchKind, Matcher};

	// Test that enabling word matches does the right thing and demonstrate
	// the difference between it and surrounding the regex in `\b`.
	#[test]
	fn word() {
	let matcher =
	RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
	assert!(matcher.is_match(b"abc -2 foo").unwrap());

	let matcher =
	RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
	assert!(!matcher.is_match(b"abc -2 foo").unwrap());
	}

	// Test that enabling CRLF permits `$` to match at the end of a line.
	#[test]
	fn line_terminator_crlf() {
	// Test normal use of `$` with a `\n` line terminator.
	let matcher = RegexMatcherBuilder::new()
	.multi_line(true)
	.build(r"abc$")
	.unwrap();
	assert!(matcher.is_match(b"abc\n").unwrap());

	// Test that `$` doesn't match at `\r\n` boundary normally.
	let matcher = RegexMatcherBuilder::new()
	.multi_line(true)
	.build(r"abc$")
	.unwrap();
	assert!(!matcher.is_match(b"abc\r\n").unwrap());

	// Now check the CRLF handling.
	let matcher = RegexMatcherBuilder::new()
	.multi_line(true)
	.crlf(true)
	.build(r"abc$")
	.unwrap();
	assert!(matcher.is_match(b"abc\r\n").unwrap());
	}

	// Test that smart case works.
	#[test]
	fn case_smart() {
	let matcher =
	RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
	assert!(matcher.is_match(b"ABC").unwrap());

	let matcher =
	RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
	assert!(!matcher.is_match(b"ABC").unwrap());
	}

	// Test that finding candidate lines works as expected.
	#[test]
	fn candidate_lines() {
	fn is_confirmed(m: LineMatchKind) -> bool {
	match m {
	LineMatchKind::Confirmed(_) => true,
	_ => false,
	}
	}

	let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
	let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
	assert!(is_confirmed(m));
	}
	}