vendor/pcre2-0.2.3/src/bytes.rs - toolchain/rustc - Git at Google

 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::fmt;
 use std::ops::Index;
 use std::sync::Arc;

 use log::debug;
 use pcre2_sys::{
     PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MULTILINE,
     PCRE2_UCP, PCRE2_UTF, PCRE2_NO_UTF_CHECK, PCRE2_UNSET,
     PCRE2_NEWLINE_ANYCRLF,
 };
 use thread_local::CachedThreadLocal;

 use crate::error::Error;
 use crate::ffi::{Code, CompileContext, MatchConfig, MatchData};

 /// Match represents a single match of a regex in a subject string.
 ///
 /// The lifetime parameter `'s` refers to the lifetime of the matched portion
 /// of the subject string.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct Match<'s> {
     subject: &'s [u8],
     start: usize,
     end: usize,
 }

 impl<'s> Match<'s> {
     /// Returns the starting byte offset of the match in the subject.
     #[inline]
     pub fn start(&self) -> usize {
         self.start
     }

     /// Returns the ending byte offset of the match in the subject.
     #[inline]
     pub fn end(&self) -> usize {
         self.end
     }

     /// Returns the matched portion of the subject string.
     #[inline]
     pub fn as_bytes(&self) -> &'s [u8] {
         &self.subject[self.start..self.end]
     }

     /// Creates a new match from the given subject string and byte offsets.
     fn new(subject: &'s [u8], start: usize, end: usize) -> Match<'s> {
         Match { subject, start, end }
     }

     #[cfg(test)]
     fn as_pair(&self) -> (usize, usize) {
         (self.start, self.end)
     }
 }

 #[derive(Clone, Debug)]
 struct Config {
     /// PCRE2_CASELESS
     caseless: bool,
     /// PCRE2_DOTALL
     dotall: bool,
     /// PCRE2_EXTENDED
     extended: bool,
     /// PCRE2_MULTILINE
     multi_line: bool,
     /// PCRE2_NEWLINE_ANYCRLF
     crlf: bool,
     /// PCRE2_UCP
     ucp: bool,
     /// PCRE2_UTF
     utf: bool,
     /// PCRE2_NO_UTF_CHECK
     utf_check: bool,
     /// use pcre2_jit_compile
     jit: JITChoice,
     /// Match-time specific configuration knobs.
     match_config: MatchConfig,
 }

 #[derive(Clone, Debug)]
 enum JITChoice {
     /// Never do JIT compilation.
     Never,
     /// Always do JIT compilation and return an error if it fails.
     Always,
     /// Attempt to do JIT compilation but silently fall back to non-JIT.
     Attempt,
 }

 impl Default for Config {
     fn default() -> Config {
         Config {
             caseless: false,
             dotall: false,
             extended: false,
             multi_line: false,
             crlf: false,
             ucp: false,
             utf: false,
             utf_check: true,
             jit: JITChoice::Never,
             match_config: MatchConfig::default(),
         }
     }
 }

 /// A builder for configuring the compilation of a PCRE2 regex.
 #[derive(Clone, Debug)]
 pub struct RegexBuilder {
     config: Config,
 }

 impl RegexBuilder {
     /// Create a new builder with a default configuration.
     pub fn new() -> RegexBuilder {
         RegexBuilder { config: Config::default() }
     }

     /// Compile the given pattern into a PCRE regex using the current
     /// configuration.
     ///
     /// If there was a problem compiling the pattern, then an error is
     /// returned.
     pub fn build(&self, pattern: &str) -> Result<Regex, Error> {
         let mut options = 0;
         if self.config.caseless {
             options |= PCRE2_CASELESS;
         }
         if self.config.dotall {
             options |= PCRE2_DOTALL;
         }
         if self.config.extended {
             options |= PCRE2_EXTENDED;
         }
         if self.config.multi_line {
             options |= PCRE2_MULTILINE;
         }
         if self.config.ucp {
             options |= PCRE2_UCP;
             options |= PCRE2_UTF;
         }
         if self.config.utf {
             options |= PCRE2_UTF;
         }

         let mut ctx = CompileContext::new();
         if self.config.crlf {
             ctx.set_newline(PCRE2_NEWLINE_ANYCRLF)
                 .expect("PCRE2_NEWLINE_ANYCRLF is a legal value");
         }

         let mut code = Code::new(pattern, options, ctx)?;
         match self.config.jit {
             JITChoice::Never => {} // fallthrough
             JITChoice::Always => {
                 code.jit_compile()?;
             }
             JITChoice::Attempt => {
                 if let Err(err) = code.jit_compile() {
                     debug!("JIT compilation failed: {}", err);
                 }
             }
         }
         let capture_names = code.capture_names()?;
         let mut idx = HashMap::new();
         for (i, group) in capture_names.iter().enumerate() {
             if let Some(ref name) = *group {
                 idx.insert(name.to_string(), i);
             }
         }
         Ok(Regex {
             config: Arc::new(self.config.clone()),
             pattern: pattern.to_string(),
             code: Arc::new(code),
             capture_names: Arc::new(capture_names),
             capture_names_idx: Arc::new(idx),
             match_data: CachedThreadLocal::new(),
         })
     }

     /// Enables case insensitive matching.
     ///
     /// If the `utf` option is also set, then Unicode case folding is used
     /// to determine case insensitivity. When the `utf` option is not set,
     /// then only standard ASCII case insensitivity is considered.
     ///
     /// This option corresponds to the `i` flag.
     pub fn caseless(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.caseless = yes;
         self
     }

     /// Enables "dot all" matching.
     ///
     /// When enabled, the `.` metacharacter in the pattern matches any
     /// character, include `\n`. When disabled (the default), `.` will match
     /// any character except for `\n`.
     ///
     /// This option corresponds to the `s` flag.
     pub fn dotall(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.dotall = yes;
         self
     }

     /// Enable "extended" mode in the pattern, where whitespace is ignored.
     ///
     /// This option corresponds to the `x` flag.
     pub fn extended(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.extended = yes;
         self
     }

     /// Enable multiline matching mode.
     ///
     /// When enabled, the `^` and `$` anchors will match both at the beginning
     /// and end of a subject string, in addition to matching at the start of
     /// a line and the end of a line. When disabled, the `^` and `$` anchors
     /// will only match at the beginning and end of a subject string.
     ///
     /// This option corresponds to the `m` flag.
     pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.multi_line = yes;
         self
     }

     /// Enable matching of CRLF as a line terminator.
     ///
     /// When enabled, anchors such as `^` and `$` will match any of the
     /// following as a line terminator: `\r`, `\n` or `\r\n`.
     ///
     /// This is disabled by default, in which case, only `\n` is recognized as
     /// a line terminator.
     pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.crlf = yes;
         self
     }

     /// Enable Unicode matching mode.
     ///
     /// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
     /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
     ///
     /// When set, this implies UTF matching mode. It is not possible to enable
     /// Unicode matching mode without enabling UTF matching mode.
     ///
     /// This is disabled by default.
     pub fn ucp(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.ucp = yes;
         self
     }

     /// Enable UTF matching mode.
     ///
     /// When enabled, characters are treated as sequences of code units that
     /// make up a single codepoint instead of as single bytes. For example,
     /// this will cause `.` to match any single UTF-8 encoded codepoint, where
     /// as when this is disabled, `.` will any single byte (except for `\n` in
     /// both cases, unless "dot all" mode is enabled).
     ///
     /// Note that when UTF matching mode is enabled, every search performed
     /// will do a UTF-8 validation check, which can impact performance. The
     /// UTF-8 check can be disabled via the `disable_utf_check` option, but it
     /// is undefined behavior to enable UTF matching mode and search invalid
     /// UTF-8.
     ///
     /// This is disabled by default.
     pub fn utf(&mut self, yes: bool) -> &mut RegexBuilder {
         self.config.utf = yes;
         self
     }

     /// When UTF matching mode is enabled, this will disable the UTF checking
     /// that PCRE2 will normally perform automatically. If UTF matching mode
     /// is not enabled, then this has no effect.
     ///
     /// UTF checking is enabled by default when UTF matching mode is enabled.
     /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
     /// will return an error if you attempt to search a subject string that is
     /// not valid UTF-8.
     ///
     /// # Safety
     ///
     /// It is undefined behavior to disable the UTF check in UTF matching mode
     /// and search a subject string that is not valid UTF-8. When the UTF check
     /// is disabled, callers must guarantee that the subject string is valid
     /// UTF-8.
     pub unsafe fn disable_utf_check(&mut self) -> &mut RegexBuilder {
         self.config.utf_check = false;
         self
     }

     /// Enable PCRE2's JIT and return an error if it's not available.
     ///
     /// This generally speeds up matching quite a bit. The downside is that it
     /// can increase the time it takes to compile a pattern.
     ///
     /// If the JIT isn't available or if JIT compilation returns an error, then
     /// regex compilation will fail with the corresponding error.
     ///
     /// This is disabled by default, and always overrides `jit_if_available`.
     pub fn jit(&mut self, yes: bool) -> &mut RegexBuilder {
         if yes {
             self.config.jit = JITChoice::Always;
         } else {
             self.config.jit = JITChoice::Never;
         }
         self
     }

     /// Enable PCRE2's JIT if it's available.
     ///
     /// This generally speeds up matching quite a bit. The downside is that it
     /// can increase the time it takes to compile a pattern.
     ///
     /// If the JIT isn't available or if JIT compilation returns an error,
     /// then a debug message with the error will be emitted and the regex will
     /// otherwise silently fall back to non-JIT matching.
     ///
     /// This is disabled by default, and always overrides `jit`.
     pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexBuilder {
         if yes {
             self.config.jit = JITChoice::Attempt;
         } else {
             self.config.jit = JITChoice::Never;
         }
         self
     }

     /// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
     /// not enabled, then this has no effect.
     ///
     /// When `None` is given, no custom JIT stack will be created, and instead,
     /// the default JIT stack is used. When the default is used, its maximum
     /// size is 32 KB.
     ///
     /// When this is set, then a new JIT stack will be created with the given
     /// maximum size as its limit.
     ///
     /// Increasing the stack size can be useful for larger regular expressions.
     ///
     /// By default, this is set to `None`.
     pub fn max_jit_stack_size(
         &mut self,
         bytes: Option<usize>,
     ) -> &mut RegexBuilder {
         self.config.match_config.max_jit_stack_size = bytes;
         self
     }
 }

 /// A compiled PCRE2 regular expression.
 ///
 /// This regex is safe to use from multiple threads simultaneously. For top
 /// performance, it is better to clone a new regex for each thread.
 pub struct Regex {
     /// The configuration used to build the regex.
     config: Arc<Config>,
     /// The original pattern string.
     pattern: String,
     /// The underlying compiled PCRE2 object.
     code: Arc<Code>,
     /// The capture group names for this regex.
     capture_names: Arc<Vec<Option<String>>>,
     /// A map from capture group name to capture group index.
     capture_names_idx: Arc<HashMap<String, usize>>,
     /// Mutable scratch data used by PCRE2 during matching.
     ///
     /// We use the same strategy as Rust's regex crate here, such that each
     /// thread gets its own match data to support using a Regex object from
     /// multiple threads simultaneously. If some match data doesn't exist for
     /// a thread, then a new one is created on demand.
     match_data: CachedThreadLocal<RefCell<MatchData>>,
 }

 impl Clone for Regex {
     fn clone(&self) -> Regex {
         Regex {
             config: Arc::clone(&self.config),
             pattern: self.pattern.clone(),
             code: Arc::clone(&self.code),
             capture_names: Arc::clone(&self.capture_names),
             capture_names_idx: Arc::clone(&self.capture_names_idx),
             match_data: CachedThreadLocal::new(),
         }
     }
 }

 impl fmt::Debug for Regex {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "Regex({:?})", self.pattern)
     }
 }

 impl Regex {
     /// Compiles a regular expression using the default configuration.
     ///
     /// Once compiled, it can be used repeatedly to search, split or replace
     /// text in a string.
     ///
     /// If an invalid expression is given, then an error is returned.
     ///
     /// To configure compilation options for the regex, use the
     /// [`RegexBuilder`](struct.RegexBuilder.html).
     pub fn new(pattern: &str) -> Result<Regex, Error> {
         RegexBuilder::new().build(pattern)
     }

     /// Returns true if and only if the regex matches the subject string given.
     ///
     /// # Example
     ///
     /// Test if some text contains at least one word with exactly 13 ASCII word
     /// bytes:
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let text = b"I categorically deny having triskaidekaphobia.";
     /// assert!(Regex::new(r"\b\w{13}\b")?.is_match(text)?);
     /// # Ok(()) }; example().unwrap()
     /// ```
     pub fn is_match(&self, subject: &[u8]) -> Result<bool, Error> {
         self.is_match_at(subject, 0)
     }

     /// Returns the start and end byte range of the leftmost-first match in
     /// `subject`. If no match exists, then `None` is returned.
     ///
     /// # Example
     ///
     /// Find the start and end location of the first word with exactly 13
     /// ASCII word bytes:
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let text = b"I categorically deny having triskaidekaphobia.";
     /// let mat = Regex::new(r"\b\w{13}\b")?.find(text)?.unwrap();
     /// assert_eq!((mat.start(), mat.end()), (2, 15));
     /// # Ok(()) }; example().unwrap()
     /// ```
     pub fn find<'s>(
         &self,
         subject: &'s [u8],
     ) -> Result<Option<Match<'s>>, Error> {
         self.find_at(subject, 0)
     }

     /// Returns an iterator for each successive non-overlapping match in
     /// `subject`, returning the start and end byte indices with respect to
     /// `subject`.
     ///
     /// # Example
     ///
     /// Find the start and end location of every word with exactly 13 ASCII
     /// word bytes:
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let text = b"Retroactively relinquishing remunerations is reprehensible.";
     /// for result in Regex::new(r"\b\w{13}\b")?.find_iter(text) {
     ///     let mat = result?;
     ///     println!("{:?}", mat);
     /// }
     /// # Ok(()) }; example().unwrap()
     /// ```
     pub fn find_iter<'r, 's>(&'r self, subject: &'s [u8]) -> Matches<'r, 's> {
         Matches {
             re: self,
             match_data: self.match_data(),
             subject: subject,
             last_end: 0,
             last_match: None,
         }
     }

     /// Returns the capture groups corresponding to the leftmost-first
     /// match in `subject`. Capture group `0` always corresponds to the entire
     /// match. If no match is found, then `None` is returned.
     ///
     /// # Examples
     ///
     /// Say you have some text with movie names and their release years,
     /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
     /// looking like that, while also extracting the movie name and its release
     /// year separately.
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)")?;
     /// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
     /// let caps = re.captures(text)?.unwrap();
     /// assert_eq!(&caps[1], &b"Citizen Kane"[..]);
     /// assert_eq!(&caps[2], &b"1941"[..]);
     /// assert_eq!(&caps[0], &b"'Citizen Kane' (1941)"[..]);
     /// // You can also access the groups by index using the Index notation.
     /// // Note that this will panic on an invalid index.
     /// assert_eq!(&caps[1], b"Citizen Kane");
     /// assert_eq!(&caps[2], b"1941");
     /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
     /// # Ok(()) }; example().unwrap()
     /// ```
     ///
     /// Note that the full match is at capture group `0`. Each subsequent
     /// capture group is indexed by the order of its opening `(`.
     ///
     /// We can make this example a bit clearer by using *named* capture groups:
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")?;
     /// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
     /// let caps = re.captures(text)?.unwrap();
     /// assert_eq!(&caps["title"], &b"Citizen Kane"[..]);
     /// assert_eq!(&caps["year"], &b"1941"[..]);
     /// assert_eq!(&caps[0], &b"'Citizen Kane' (1941)"[..]);
     /// // You can also access the groups by name using the Index notation.
     /// // Note that this will panic on an invalid group name.
     /// assert_eq!(&caps["title"], b"Citizen Kane");
     /// assert_eq!(&caps["year"], b"1941");
     /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
     /// # Ok(()) }; example().unwrap()
     /// ```
     ///
     /// Here we name the capture groups, which we can access with the `name`
     /// method or the `Index` notation with a `&str`. Note that the named
     /// capture groups are still accessible with `get` or the `Index` notation
     /// with a `usize`.
     ///
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `get(0)` or `[0]`.
     pub fn captures<'s>(
         &self,
         subject: &'s [u8],
     ) -> Result<Option<Captures<'s>>, Error> {
         let mut locs = self.capture_locations();
         Ok(self.captures_read(&mut locs, subject)?.map(move |_| Captures {
             subject: subject,
             locs: locs,
             idx: Arc::clone(&self.capture_names_idx),
         }))
     }

     /// Returns an iterator over all the non-overlapping capture groups matched
     /// in `subject`. This is operationally the same as `find_iter`, except it
     /// yields information about capturing group matches.
     ///
     /// # Example
     ///
     /// We can use this to find all movie titles and their release years in
     /// some text, where the movie is formatted like "'Title' (xxxx)":
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use std::str;
     ///
     /// use pcre2::bytes::Regex;
     ///
     /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")?;
     /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
     /// for result in re.captures_iter(text) {
     ///     let caps = result?;
     ///     let title = str::from_utf8(&caps["title"]).unwrap();
     ///     let year = str::from_utf8(&caps["year"]).unwrap();
     ///     println!("Movie: {:?}, Released: {:?}", title, year);
     /// }
     /// // Output:
     /// // Movie: Citizen Kane, Released: 1941
     /// // Movie: The Wizard of Oz, Released: 1939
     /// // Movie: M, Released: 1931
     /// # Ok(()) }; example().unwrap()
     /// ```
     pub fn captures_iter<'r, 's>(
         &'r self,
         subject: &'s [u8],
     ) -> CaptureMatches<'r, 's> {
         CaptureMatches {
             re: self,
             subject: subject,
             last_end: 0,
             last_match: None,
         }
     }
 }

 /// Advanced or  "lower level" search methods.
 impl Regex {
     /// Returns the same as is_match, but starts the search at the given
     /// offset.
     ///
     /// The significance of the starting point is that it takes the surrounding
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     pub fn is_match_at(
         &self,
         subject: &[u8],
         start: usize,
     ) -> Result<bool, Error> {
         assert!(
             start <= subject.len(),
             "start ({}) must be <= subject.len() ({})",
             start,
             subject.len()
         );

         let mut options = 0;
         if !self.config.utf_check {
             options |= PCRE2_NO_UTF_CHECK;
         }

         let match_data = self.match_data();
         let mut match_data = match_data.borrow_mut();
         // SAFETY: The only unsafe PCRE2 option we potentially use here is
         // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the
         // `disable_utf_check` method, which propagates the safety contract to
         // the caller.
         Ok(unsafe { match_data.find(&self.code, subject, start, options)? })
     }

     /// Returns the same as find, but starts the search at the given
     /// offset.
     ///
     /// The significance of the starting point is that it takes the surrounding
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     pub fn find_at<'s>(
         &self,
         subject: &'s [u8],
         start: usize,
     ) -> Result<Option<Match<'s>>, Error> {
         self.find_at_with_match_data(self.match_data(), subject, start)
     }

     /// Like find_at, but accepts match data instead of acquiring one itself.
     ///
     /// This is useful for implementing the iterator, which permits avoiding
     /// the synchronization overhead of acquiring the match data.
     #[inline(always)]
     fn find_at_with_match_data<'s>(
         &self,
         match_data: &RefCell<MatchData>,
         subject: &'s [u8],
         start: usize,
     ) -> Result<Option<Match<'s>>, Error> {
         assert!(
             start <= subject.len(),
             "start ({}) must be <= subject.len() ({})",
             start,
             subject.len()
         );

         let mut options = 0;
         if !self.config.utf_check {
             options |= PCRE2_NO_UTF_CHECK;
         }

         let mut match_data = match_data.borrow_mut();
         // SAFETY: The only unsafe PCRE2 option we potentially use here is
         // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the
         // `disable_utf_check` method, which propagates the safety contract to
         // the caller.
         if unsafe { !match_data.find(&self.code, subject, start, options)? } {
             return Ok(None);
         }
         let ovector = match_data.ovector();
         let (s, e) = (ovector[0], ovector[1]);
         Ok(Some(Match::new(&subject[s..e], s, e)))
     }

     /// This is like `captures`, but uses
     /// [`CaptureLocations`](struct.CaptureLocations.html)
     /// instead of
     /// [`Captures`](struct.Captures.html) in order to amortize allocations.
     ///
     /// To create a `CaptureLocations` value, use the
     /// `Regex::capture_locations` method.
     ///
     /// This returns the overall match if this was successful, which is always
     /// equivalent to the `0`th capture group.
     pub fn captures_read<'s>(
         &self,
         locs: &mut CaptureLocations,
         subject: &'s [u8],
     ) -> Result<Option<Match<'s>>, Error> {
         self.captures_read_at(locs, subject, 0)
     }

     /// Returns the same as `captures_read`, but starts the search at the given
     /// offset and populates the capture locations given.
     ///
     /// The significance of the starting point is that it takes the surrounding
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     pub fn captures_read_at<'s>(
         &self,
         locs: &mut CaptureLocations,
         subject: &'s [u8],
         start: usize,
     ) -> Result<Option<Match<'s>>, Error> {
         assert!(
             start <= subject.len(),
             "start ({}) must be <= subject.len() ({})",
             start,
             subject.len()
         );

         let mut options = 0;
         if !self.config.utf_check {
             options |= PCRE2_NO_UTF_CHECK;
         }
         // SAFETY: The only unsafe PCRE2 option we potentially use here is
         // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the
         // `disable_utf_check` method, which propagates the safety contract to
         // the caller.
         if unsafe { !locs.data.find(&self.code, subject, start, options)? } {
             return Ok(None);
         }
         let ovector = locs.data.ovector();
         let (s, e) = (ovector[0], ovector[1]);
         Ok(Some(Match::new(&subject[s..e], s, e)))
     }
 }

 /// Auxiliary methods.
 impl Regex {
     /// Returns the original pattern string for this regex.
     pub fn as_str(&self) -> &str {
         &self.pattern
     }

     /// Returns a sequence of all capturing groups and their names, if present.
     ///
     /// The length of the slice returned is always equal to the result of
     /// `captures_len`, which is the number of capturing groups (including the
     /// capturing group for the entire pattern).
     ///
     /// Each entry in the slice is the name of the corresponding capturing
     /// group, if one exists. The first capturing group (at index `0`) is
     /// always unnamed.
     ///
     /// Capturing groups are indexed by the order of the opening parenthesis.
     pub fn capture_names(&self) -> &[Option<String>] {
         &self.capture_names
     }

     /// Returns the number of capturing groups in the pattern.
     ///
     /// This is always 1 more than the number of syntactic groups in the
     /// pattern, since the first group always corresponds to the entire match.
     pub fn captures_len(&self) -> usize {
         self.code.capture_count().expect("a valid capture count from PCRE2")
     }

     /// Returns an empty set of capture locations that can be reused in
     /// multiple calls to `captures_read` or `captures_read_at`.
     pub fn capture_locations(&self) -> CaptureLocations {
         CaptureLocations {
             code: Arc::clone(&self.code),
             data: self.new_match_data(),
         }
     }

     fn match_data(&self) -> &RefCell<MatchData> {
         let create = || RefCell::new(self.new_match_data());
         self.match_data.get_or(create)
     }

     fn new_match_data(&self) -> MatchData {
         MatchData::new(self.config.match_config.clone(), &self.code)
     }
 }

 /// CaptureLocations is a low level representation of the raw offsets of each
 /// submatch.
 ///
 /// Primarily, this type is useful when using `Regex` APIs such as
 /// `captures_read`, which permits amortizing the allocation in which capture
 /// match locations are stored.
 ///
 /// In order to build a value of this type, you'll need to call the
 /// `capture_locations` method on the `Regex` being used to execute the search.
 /// The value returned can then be reused in subsequent searches.
 pub struct CaptureLocations {
     code: Arc<Code>,
     data: MatchData,
 }

 impl Clone for CaptureLocations {
     fn clone(&self) -> CaptureLocations {
         CaptureLocations {
             code: Arc::clone(&self.code),
             data: MatchData::new(self.data.config().clone(), &self.code),
         }
     }
 }

 impl fmt::Debug for CaptureLocations {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut offsets: Vec<Option<usize>> = vec![];
         for &offset in self.data.ovector() {
             if offset == PCRE2_UNSET {
                 offsets.push(None);
             } else {
                 offsets.push(Some(offset));
             }
         }
         write!(f, "CaptureLocations(")?;
         f.debug_list().entries(offsets).finish()?;
         write!(f, ")")
     }
 }

 impl CaptureLocations {
     /// Returns the start and end positions of the Nth capture group.
     ///
     /// This returns `None` if `i` is not a valid capture group or if the
     /// capture group did not match anything.
     ///
     /// The positions returned are always byte indices with respect to the
     /// original subject string matched.
     #[inline]
     pub fn get(&self, i: usize) -> Option<(usize, usize)> {
         let ovec = self.data.ovector();
         let s = match ovec.get(i * 2) {
             None => return None,
             Some(&s) if s == PCRE2_UNSET => return None,
             Some(&s) => s,
         };
         let e = match ovec.get(i * 2 + 1) {
             None => return None,
             Some(&e) if e == PCRE2_UNSET => return None,
             Some(&e) => e,
         };
         Some((s, e))
     }

     /// Returns the total number of capturing groups.
     ///
     /// This is always at least `1` since every regex has at least `1`
     /// capturing group that corresponds to the entire match.
     #[inline]
     pub fn len(&self) -> usize {
         self.data.ovector().len() / 2
     }
 }

 /// Captures represents a group of captured byte strings for a single match.
 ///
 /// The 0th capture always corresponds to the entire match. Each subsequent
 /// index corresponds to the next capture group in the regex. If a capture
 /// group is named, then the matched byte string is *also* available via the
 /// `name` method. (Note that the 0th capture is always unnamed and so must be
 /// accessed with the `get` method.)
 ///
 /// Positions returned from a capture group are always byte indices.
 ///
 /// `'s` is the lifetime of the matched subject string.
 pub struct Captures<'s> {
     subject: &'s [u8],
     locs: CaptureLocations,
     idx: Arc<HashMap<String, usize>>,
 }

 impl<'s> Captures<'s> {
     /// Returns the match associated with the capture group at index `i`. If
     /// `i` does not correspond to a capture group, or if the capture group
     /// did not participate in the match, then `None` is returned.
     ///
     /// # Examples
     ///
     /// Get the text of the match with a default of an empty string if this
     /// group didn't participate in the match:
     ///
     /// ```rust
     /// # fn example() -> Result<(), ::pcre2::Error> {
     /// use pcre2::bytes::Regex;
     ///
     /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))")?;
     /// let caps = re.captures(b"abc123")?.unwrap();
     ///
     /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
     /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
     /// assert_eq!(text1, &b"123"[..]);
     /// assert_eq!(text2, &b""[..]);
     /// # Ok(()) }; example().unwrap()
     /// ```
     pub fn get(&self, i: usize) -> Option<Match<'s>> {
         self.locs.get(i).map(|(s, e)| Match::new(self.subject, s, e))
     }

     /// Returns the match for the capture group named `name`. If `name` isn't a
     /// valid capture group or didn't match anything, then `None` is returned.
     pub fn name(&self, name: &str) -> Option<Match<'s>> {
         self.idx.get(name).and_then(|&i| self.get(i))
     }

     /// Returns the number of captured groups.
     ///
     /// This is always at least `1`, since every regex has at least one capture
     /// group that corresponds to the full match.
     #[inline]
     pub fn len(&self) -> usize {
         self.locs.len()
     }
 }

 impl<'s> fmt::Debug for Captures<'s> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
     }
 }

 struct CapturesDebug<'c, 's: 'c>(&'c Captures<'s>);

 impl<'c, 's> fmt::Debug for CapturesDebug<'c, 's> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         fn escape_bytes(bytes: &[u8]) -> String {
             let mut s = String::new();
             for &b in bytes {
                 s.push_str(&escape_byte(b));
             }
             s
         }

         fn escape_byte(byte: u8) -> String {
             use std::ascii::escape_default;

             let escaped: Vec<u8> = escape_default(byte).collect();
             String::from_utf8_lossy(&escaped).into_owned()
         }

         // We'd like to show something nice here, even if it means an
         // allocation to build a reverse index.
         let slot_to_name: HashMap<&usize, &String> =
             self.0.idx.iter().map(|(a, b)| (b, a)).collect();
         let mut map = f.debug_map();
         for slot in 0..self.0.len() {
             let m = self.0.locs.get(slot).map(|(s, e)| {
                 escape_bytes(&self.0.subject[s..e])
             });
             if let Some(name) = slot_to_name.get(&slot) {
                 map.entry(&name, &m);
             } else {
                 map.entry(&slot, &m);
             }
         }
         map.finish()
     }
 }

 /// Get a group by index.
 ///
 /// `'s` is the lifetime of the matched subject string.
 ///
 /// The subject can't outlive the `Captures` object if this method is
 /// used, because of how `Index` is defined (normally `a[i]` is part
 /// of `a` and can't outlive it); to do that, use `get()` instead.
 ///
 /// # Panics
 ///
 /// If there is no group at the given index.
 impl<'s> Index<usize> for Captures<'s> {
     type Output = [u8];

     fn index(&self, i: usize) -> &[u8] {
         self.get(i).map(|m| m.as_bytes())
             .unwrap_or_else(|| panic!("no group at index '{}'", i))
     }
 }

 /// Get a group by name.
 ///
 /// `'s` is the lifetime of the matched subject string and `'i` is the lifetime
 /// of the group name (the index).
 ///
 /// The text can't outlive the `Captures` object if this method is
 /// used, because of how `Index` is defined (normally `a[i]` is part
 /// of `a` and can't outlive it); to do that, use `name` instead.
 ///
 /// # Panics
 ///
 /// If there is no group named by the given value.
 impl<'s, 'i> Index<&'i str> for Captures<'s> {
     type Output = [u8];

     fn index<'a>(&'a self, name: &'i str) -> &'a [u8] {
         self.name(name).map(|m| m.as_bytes())
             .unwrap_or_else(|| panic!("no group named '{}'", name))
     }
 }

 /// An iterator over all non-overlapping matches for a particular subject
 /// string.
 ///
 /// The iterator yields matches (if no error occurred while searching)
 /// corresponding to the start and end of the match. The indices are byte
 /// offsets. The iterator stops when no more matches can be found.
 ///
 /// `'r` is the lifetime of the compiled regular expression and `'s` is the
 /// lifetime of the subject string.
 pub struct Matches<'r, 's> {
     re: &'r Regex,
     match_data: &'r RefCell<MatchData>,
     subject: &'s [u8],
     last_end: usize,
     last_match: Option<usize>,
 }

 impl<'r, 's> Iterator for Matches<'r, 's> {
     type Item = Result<Match<'s>, Error>;

     fn next(&mut self) -> Option<Result<Match<'s>, Error>> {
         if self.last_end > self.subject.len() {
             return None;
         }
         let res = self.re.find_at_with_match_data(
             self.match_data,
             self.subject,
             self.last_end,
         );
         let m = match res {
             Err(err) => return Some(Err(err)),
             Ok(None) => return None,
             Ok(Some(m)) => m,
         };
         if m.start() == m.end() {
             // This is an empty match. To ensure we make progress, start
             // the next search at the smallest possible starting position
             // of the next match following this one.
             self.last_end = m.end() + 1;
             // Don't accept empty matches immediately following a match.
             // Just move on to the next match.
             if Some(m.end()) == self.last_match {
                 return self.next();
             }
         } else {
             self.last_end = m.end();
         }
         self.last_match = Some(m.end());
         Some(Ok(m))
     }
 }

 /// An iterator that yields all non-overlapping capture groups matching a
 /// particular regular expression.
 ///
 /// The iterator stops when no more matches can be found.
 ///
 /// `'r` is the lifetime of the compiled regular expression and `'s` is the
 /// lifetime of the subject string.
 pub struct CaptureMatches<'r, 's> {
     re: &'r Regex,
     subject: &'s [u8],
     last_end: usize,
     last_match: Option<usize>,
 }

 impl<'r, 's> Iterator for CaptureMatches<'r, 's> {
     type Item = Result<Captures<'s>, Error>;

     fn next(&mut self) -> Option<Result<Captures<'s>, Error>> {
         if self.last_end > self.subject.len() {
             return None;
         }
         let mut locs = self.re.capture_locations();
         let res = self.re.captures_read_at(
             &mut locs,
             self.subject,
             self.last_end,
         );
         let m = match res {
             Err(err) => return Some(Err(err)),
             Ok(None) => return None,
             Ok(Some(m)) => m,
         };
         if m.start() == m.end() {
             // This is an empty match. To ensure we make progress, start
             // the next search at the smallest possible starting position
             // of the next match following this one.
             self.last_end = m.end() + 1;
             // Don't accept empty matches immediately following a match.
             // Just move on to the next match.
             if Some(m.end()) == self.last_match {
                 return self.next();
             }
         } else {
             self.last_end = m.end();
         }
         self.last_match = Some(m.end());
         Some(Ok(Captures {
             subject: self.subject,
             locs: locs,
             idx: Arc::clone(&self.re.capture_names_idx),
         }))
     }
 }

 #[cfg(test)]
 mod tests {
     use super::{Regex, RegexBuilder};
     use crate::is_jit_available;

     fn b(string: &str) -> &[u8] {
         string.as_bytes()
     }

     fn find_iter_tuples(re: &Regex, subject: &[u8]) -> Vec<(usize, usize)> {
         let mut tuples = vec![];
         for result in re.find_iter(subject) {
             let m = result.unwrap();
             tuples.push((m.start(), m.end()));
         }
         tuples
     }

     fn cap_iter_tuples(re: &Regex, subject: &[u8]) -> Vec<(usize, usize)> {
         let mut tuples = vec![];
         for result in re.captures_iter(subject) {
             let caps = result.unwrap();
             let m = caps.get(0).unwrap();
             tuples.push((m.start(), m.end()));
         }
         tuples
     }

     #[test]
     fn caseless() {
         let re = RegexBuilder::new()
             .caseless(true)
             .build("a")
             .unwrap();
         assert!(re.is_match(b("A")).unwrap());

         let re = RegexBuilder::new()
             .caseless(true)
             .ucp(true)
             .build("β")
             .unwrap();
         assert!(re.is_match(b("Β")).unwrap());
     }

     #[test]
     fn crlf() {
         let re = RegexBuilder::new()
             .crlf(true)
             .build("a$")
             .unwrap();
         let m = re.find(b("a\r\n")).unwrap().unwrap();
         assert_eq!(m.as_pair(), (0, 1));
     }

     #[test]
     fn dotall() {
         let re = RegexBuilder::new()
             .dotall(false)
             .build(".")
             .unwrap();
         assert!(!re.is_match(b("\n")).unwrap());

         let re = RegexBuilder::new()
             .dotall(true)
             .build(".")
             .unwrap();
         assert!(re.is_match(b("\n")).unwrap());
     }

     #[test]
     fn extended() {
         let re = RegexBuilder::new()
             .extended(true)
             .build("a b c")
             .unwrap();
         assert!(re.is_match(b("abc")).unwrap());
     }

     #[test]
     fn multi_line() {
         let re = RegexBuilder::new()
             .multi_line(false)
             .build("^abc$")
             .unwrap();
         assert!(!re.is_match(b("foo\nabc\nbar")).unwrap());

         let re = RegexBuilder::new()
             .multi_line(true)
             .build("^abc$")
             .unwrap();
         assert!(re.is_match(b("foo\nabc\nbar")).unwrap());
     }

     #[test]
     fn ucp() {
         let re = RegexBuilder::new()
             .ucp(false)
             .build(r"\w")
             .unwrap();
         assert!(!re.is_match(b("β")).unwrap());

         let re = RegexBuilder::new()
             .ucp(true)
             .build(r"\w")
             .unwrap();
         assert!(re.is_match(b("β")).unwrap());
     }

     #[test]
     fn utf() {
         let re = RegexBuilder::new()
             .utf(false)
             .build(".")
             .unwrap();
         assert_eq!(re.find(b("β")).unwrap().unwrap().as_pair(), (0, 1));

         let re = RegexBuilder::new()
             .utf(true)
             .build(".")
             .unwrap();
         assert_eq!(re.find(b("β")).unwrap().unwrap().as_pair(), (0, 2));
     }

     #[test]
     fn jit4lyfe() {
         if is_jit_available() {
             let re = RegexBuilder::new()
                 .jit(true)
                 .build(r"\w")
                 .unwrap();
             assert!(re.is_match(b("a")).unwrap());
         } else {
             // Check that if JIT isn't enabled, then we get an error if we
             // require JIT.
             RegexBuilder::new()
                 .jit(true)
                 .build(r"\w")
                 .unwrap_err();
         }
     }

     // Unlike jit4lyfe, this tests that everything works when requesting the
     // JIT only if it's available. In jit4lyfe, we require the JIT or fail.
     // If the JIT isn't available, then in this test, we simply don't use it.
     #[test]
     fn jit_if_available() {
         let re = RegexBuilder::new()
             .jit_if_available(true)
             .build(r"\w")
             .unwrap();
         assert!(re.is_match(b("a")).unwrap());
     }

     // This tests a regression caused a segfault in the pcre2 library
     // https://github.com/BurntSushi/rust-pcre2/issues/10
     #[test]
     fn jit_test_lazy_alloc_subject() {
         let subject: Vec<u8> = vec![];

         let re = RegexBuilder::new()
             .jit_if_available(true)
             .build(r"xxxx|xxxx|xxxx")
             .unwrap();
         assert!(!re.is_match(&subject).unwrap());
     }

     #[test]
     fn utf_with_invalid_data() {
         let re = RegexBuilder::new()
             .build(r".")
             .unwrap();
         assert_eq!(re.find(b"\xFF").unwrap().unwrap().as_pair(), (0, 1));

         let re = RegexBuilder::new()
             .utf(true)
             .build(r".")
             .unwrap();
         assert!(re.find(b"\xFF").is_err());
     }

     #[test]
     fn capture_names() {
         let re = RegexBuilder::new()
             .build(
                 r"(?P<foo>abc)|(def)|(?P<a>ghi)|(?P<springsteen>jkl)"
             )
             .unwrap();
         assert_eq!(re.capture_names().to_vec(), vec![
             None,
             Some("foo".to_string()),
             None,
             Some("a".to_string()),
             Some("springsteen".to_string()),
         ]);

         // Test our internal map as well.
         assert_eq!(re.capture_names_idx.len(), 3);
         assert_eq!(re.capture_names_idx["foo"], 1);
         assert_eq!(re.capture_names_idx["a"], 3);
         assert_eq!(re.capture_names_idx["springsteen"], 4);
     }

     #[test]
     fn captures_get() {
         let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
         let caps = re.captures(b"abc123").unwrap().unwrap();

         let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes());
         let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes());
         assert_eq!(text1, &b"123"[..]);
         assert_eq!(text2, &b""[..]);
     }

     #[test]
     fn find_iter_empty() {
         let re = Regex::new(r"(?m:^)").unwrap();
         assert_eq!(find_iter_tuples(&re, b""), vec![(0, 0)]);
         assert_eq!(find_iter_tuples(&re, b"\n"), vec![(0, 0)]);
         assert_eq!(find_iter_tuples(&re, b"\n\n"), vec![(0, 0), (1, 1)]);
         assert_eq!(find_iter_tuples(&re, b"\na\n"), vec![(0, 0), (1, 1)]);
         assert_eq!(find_iter_tuples(&re, b"\na\n\n"), vec![
             (0, 0), (1, 1), (3, 3),
         ]);
     }

     #[test]
     fn captures_iter_empty() {
         let re = Regex::new(r"(?m:^)").unwrap();
         assert_eq!(cap_iter_tuples(&re, b""), vec![(0, 0)]);
         assert_eq!(cap_iter_tuples(&re, b"\n"), vec![(0, 0)]);
         assert_eq!(cap_iter_tuples(&re, b"\n\n"), vec![(0, 0), (1, 1)]);
         assert_eq!(cap_iter_tuples(&re, b"\na\n"), vec![(0, 0), (1, 1)]);
         assert_eq!(cap_iter_tuples(&re, b"\na\n\n"), vec![
             (0, 0), (1, 1), (3, 3),
         ]);
     }

     #[test]
     fn max_jit_stack_size_does_something() {
         if !is_jit_available() {
             return;
         }

         let hundred = "\
             ABCDEFGHIJKLMNOPQRSTUVWXY\
             ABCDEFGHIJKLMNOPQRSTUVWXY\
             ABCDEFGHIJKLMNOPQRSTUVWXY\
             ABCDEFGHIJKLMNOPQRSTUVWXY\
         ";
         let hay = format!("{}", hundred.repeat(100));

         // First, try a regex that checks that we can blow the JIT stack limit.
         let re = RegexBuilder::new()
             .ucp(true)
             .jit(true)
             .max_jit_stack_size(Some(1))
             .build(r"((((\w{10})){100}))+")
             .unwrap();
         let result = re.is_match(hay.as_bytes());
         if result.is_ok() {
             // Skip this test, since for some reason we weren't able to blow
             // the stack limit.
             return;
         }
         let err = result.unwrap_err();
         assert!(err.to_string().contains("JIT stack limit reached"));

         // Now bump up the JIT stack limit and check that it succeeds.
         let re = RegexBuilder::new()
             .ucp(true)
             .jit(true)
             .max_jit_stack_size(Some(1<<20))
             .build(r"((((\w{10})){100}))+")
             .unwrap();
         assert!(re.is_match(hay.as_bytes()).unwrap());
     }
 }