| use std::cell::RefCell; |
| use std::collections::HashMap; |
| use std::fmt; |
| use std::ops::Index; |
| use std::sync::Arc; |
| |
| use log::debug; |
| use pcre2_sys::{ |
| PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MULTILINE, |
| PCRE2_UCP, PCRE2_UTF, PCRE2_NO_UTF_CHECK, PCRE2_UNSET, |
| PCRE2_NEWLINE_ANYCRLF, |
| }; |
| use thread_local::CachedThreadLocal; |
| |
| use crate::error::Error; |
| use crate::ffi::{Code, CompileContext, MatchConfig, MatchData}; |
| |
| /// Match represents a single match of a regex in a subject string. |
| /// |
| /// The lifetime parameter `'s` refers to the lifetime of the matched portion |
| /// of the subject string. |
| #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
| pub struct Match<'s> { |
| subject: &'s [u8], |
| start: usize, |
| end: usize, |
| } |
| |
| impl<'s> Match<'s> { |
| /// Returns the starting byte offset of the match in the subject. |
| #[inline] |
| pub fn start(&self) -> usize { |
| self.start |
| } |
| |
| /// Returns the ending byte offset of the match in the subject. |
| #[inline] |
| pub fn end(&self) -> usize { |
| self.end |
| } |
| |
| /// Returns the matched portion of the subject string. |
| #[inline] |
| pub fn as_bytes(&self) -> &'s [u8] { |
| &self.subject[self.start..self.end] |
| } |
| |
| /// Creates a new match from the given subject string and byte offsets. |
| fn new(subject: &'s [u8], start: usize, end: usize) -> Match<'s> { |
| Match { subject, start, end } |
| } |
| |
| #[cfg(test)] |
| fn as_pair(&self) -> (usize, usize) { |
| (self.start, self.end) |
| } |
| } |
| |
| #[derive(Clone, Debug)] |
| struct Config { |
| /// PCRE2_CASELESS |
| caseless: bool, |
| /// PCRE2_DOTALL |
| dotall: bool, |
| /// PCRE2_EXTENDED |
| extended: bool, |
| /// PCRE2_MULTILINE |
| multi_line: bool, |
| /// PCRE2_NEWLINE_ANYCRLF |
| crlf: bool, |
| /// PCRE2_UCP |
| ucp: bool, |
| /// PCRE2_UTF |
| utf: bool, |
| /// PCRE2_NO_UTF_CHECK |
| utf_check: bool, |
| /// use pcre2_jit_compile |
| jit: JITChoice, |
| /// Match-time specific configuration knobs. |
| match_config: MatchConfig, |
| } |
| |
| #[derive(Clone, Debug)] |
| enum JITChoice { |
| /// Never do JIT compilation. |
| Never, |
| /// Always do JIT compilation and return an error if it fails. |
| Always, |
| /// Attempt to do JIT compilation but silently fall back to non-JIT. |
| Attempt, |
| } |
| |
| impl Default for Config { |
| fn default() -> Config { |
| Config { |
| caseless: false, |
| dotall: false, |
| extended: false, |
| multi_line: false, |
| crlf: false, |
| ucp: false, |
| utf: false, |
| utf_check: true, |
| jit: JITChoice::Never, |
| match_config: MatchConfig::default(), |
| } |
| } |
| } |
| |
| /// A builder for configuring the compilation of a PCRE2 regex. |
| #[derive(Clone, Debug)] |
| pub struct RegexBuilder { |
| config: Config, |
| } |
| |
| impl RegexBuilder { |
| /// Create a new builder with a default configuration. |
| pub fn new() -> RegexBuilder { |
| RegexBuilder { config: Config::default() } |
| } |
| |
| /// Compile the given pattern into a PCRE regex using the current |
| /// configuration. |
| /// |
| /// If there was a problem compiling the pattern, then an error is |
| /// returned. |
| pub fn build(&self, pattern: &str) -> Result<Regex, Error> { |
| let mut options = 0; |
| if self.config.caseless { |
| options |= PCRE2_CASELESS; |
| } |
| if self.config.dotall { |
| options |= PCRE2_DOTALL; |
| } |
| if self.config.extended { |
| options |= PCRE2_EXTENDED; |
| } |
| if self.config.multi_line { |
| options |= PCRE2_MULTILINE; |
| } |
| if self.config.ucp { |
| options |= PCRE2_UCP; |
| options |= PCRE2_UTF; |
| } |
| if self.config.utf { |
| options |= PCRE2_UTF; |
| } |
| |
| let mut ctx = CompileContext::new(); |
| if self.config.crlf { |
| ctx.set_newline(PCRE2_NEWLINE_ANYCRLF) |
| .expect("PCRE2_NEWLINE_ANYCRLF is a legal value"); |
| } |
| |
| let mut code = Code::new(pattern, options, ctx)?; |
| match self.config.jit { |
| JITChoice::Never => {} // fallthrough |
| JITChoice::Always => { |
| code.jit_compile()?; |
| } |
| JITChoice::Attempt => { |
| if let Err(err) = code.jit_compile() { |
| debug!("JIT compilation failed: {}", err); |
| } |
| } |
| } |
| let capture_names = code.capture_names()?; |
| let mut idx = HashMap::new(); |
| for (i, group) in capture_names.iter().enumerate() { |
| if let Some(ref name) = *group { |
| idx.insert(name.to_string(), i); |
| } |
| } |
| Ok(Regex { |
| config: Arc::new(self.config.clone()), |
| pattern: pattern.to_string(), |
| code: Arc::new(code), |
| capture_names: Arc::new(capture_names), |
| capture_names_idx: Arc::new(idx), |
| match_data: CachedThreadLocal::new(), |
| }) |
| } |
| |
| /// Enables case insensitive matching. |
| /// |
| /// If the `utf` option is also set, then Unicode case folding is used |
| /// to determine case insensitivity. When the `utf` option is not set, |
| /// then only standard ASCII case insensitivity is considered. |
| /// |
| /// This option corresponds to the `i` flag. |
| pub fn caseless(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.caseless = yes; |
| self |
| } |
| |
| /// Enables "dot all" matching. |
| /// |
| /// When enabled, the `.` metacharacter in the pattern matches any |
| /// character, include `\n`. When disabled (the default), `.` will match |
| /// any character except for `\n`. |
| /// |
| /// This option corresponds to the `s` flag. |
| pub fn dotall(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.dotall = yes; |
| self |
| } |
| |
| /// Enable "extended" mode in the pattern, where whitespace is ignored. |
| /// |
| /// This option corresponds to the `x` flag. |
| pub fn extended(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.extended = yes; |
| self |
| } |
| |
| /// Enable multiline matching mode. |
| /// |
| /// When enabled, the `^` and `$` anchors will match both at the beginning |
| /// and end of a subject string, in addition to matching at the start of |
| /// a line and the end of a line. When disabled, the `^` and `$` anchors |
| /// will only match at the beginning and end of a subject string. |
| /// |
| /// This option corresponds to the `m` flag. |
| pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.multi_line = yes; |
| self |
| } |
| |
| /// Enable matching of CRLF as a line terminator. |
| /// |
| /// When enabled, anchors such as `^` and `$` will match any of the |
| /// following as a line terminator: `\r`, `\n` or `\r\n`. |
| /// |
| /// This is disabled by default, in which case, only `\n` is recognized as |
| /// a line terminator. |
| pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.crlf = yes; |
| self |
| } |
| |
| /// Enable Unicode matching mode. |
| /// |
| /// When enabled, the following patterns become Unicode aware: `\b`, `\B`, |
| /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`. |
| /// |
| /// When set, this implies UTF matching mode. It is not possible to enable |
| /// Unicode matching mode without enabling UTF matching mode. |
| /// |
| /// This is disabled by default. |
| pub fn ucp(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.ucp = yes; |
| self |
| } |
| |
| /// Enable UTF matching mode. |
| /// |
| /// When enabled, characters are treated as sequences of code units that |
| /// make up a single codepoint instead of as single bytes. For example, |
| /// this will cause `.` to match any single UTF-8 encoded codepoint, where |
| /// as when this is disabled, `.` will any single byte (except for `\n` in |
| /// both cases, unless "dot all" mode is enabled). |
| /// |
| /// Note that when UTF matching mode is enabled, every search performed |
| /// will do a UTF-8 validation check, which can impact performance. The |
| /// UTF-8 check can be disabled via the `disable_utf_check` option, but it |
| /// is undefined behavior to enable UTF matching mode and search invalid |
| /// UTF-8. |
| /// |
| /// This is disabled by default. |
| pub fn utf(&mut self, yes: bool) -> &mut RegexBuilder { |
| self.config.utf = yes; |
| self |
| } |
| |
| /// When UTF matching mode is enabled, this will disable the UTF checking |
| /// that PCRE2 will normally perform automatically. If UTF matching mode |
| /// is not enabled, then this has no effect. |
| /// |
| /// UTF checking is enabled by default when UTF matching mode is enabled. |
| /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2 |
| /// will return an error if you attempt to search a subject string that is |
| /// not valid UTF-8. |
| /// |
| /// # Safety |
| /// |
| /// It is undefined behavior to disable the UTF check in UTF matching mode |
| /// and search a subject string that is not valid UTF-8. When the UTF check |
| /// is disabled, callers must guarantee that the subject string is valid |
| /// UTF-8. |
| pub unsafe fn disable_utf_check(&mut self) -> &mut RegexBuilder { |
| self.config.utf_check = false; |
| self |
| } |
| |
| /// Enable PCRE2's JIT and return an error if it's not available. |
| /// |
| /// This generally speeds up matching quite a bit. The downside is that it |
| /// can increase the time it takes to compile a pattern. |
| /// |
| /// If the JIT isn't available or if JIT compilation returns an error, then |
| /// regex compilation will fail with the corresponding error. |
| /// |
| /// This is disabled by default, and always overrides `jit_if_available`. |
| pub fn jit(&mut self, yes: bool) -> &mut RegexBuilder { |
| if yes { |
| self.config.jit = JITChoice::Always; |
| } else { |
| self.config.jit = JITChoice::Never; |
| } |
| self |
| } |
| |
| /// Enable PCRE2's JIT if it's available. |
| /// |
| /// This generally speeds up matching quite a bit. The downside is that it |
| /// can increase the time it takes to compile a pattern. |
| /// |
| /// If the JIT isn't available or if JIT compilation returns an error, |
| /// then a debug message with the error will be emitted and the regex will |
| /// otherwise silently fall back to non-JIT matching. |
| /// |
| /// This is disabled by default, and always overrides `jit`. |
| pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexBuilder { |
| if yes { |
| self.config.jit = JITChoice::Attempt; |
| } else { |
| self.config.jit = JITChoice::Never; |
| } |
| self |
| } |
| |
| /// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is |
| /// not enabled, then this has no effect. |
| /// |
| /// When `None` is given, no custom JIT stack will be created, and instead, |
| /// the default JIT stack is used. When the default is used, its maximum |
| /// size is 32 KB. |
| /// |
| /// When this is set, then a new JIT stack will be created with the given |
| /// maximum size as its limit. |
| /// |
| /// Increasing the stack size can be useful for larger regular expressions. |
| /// |
| /// By default, this is set to `None`. |
| pub fn max_jit_stack_size( |
| &mut self, |
| bytes: Option<usize>, |
| ) -> &mut RegexBuilder { |
| self.config.match_config.max_jit_stack_size = bytes; |
| self |
| } |
| } |
| |
| /// A compiled PCRE2 regular expression. |
| /// |
| /// This regex is safe to use from multiple threads simultaneously. For top |
| /// performance, it is better to clone a new regex for each thread. |
| pub struct Regex { |
| /// The configuration used to build the regex. |
| config: Arc<Config>, |
| /// The original pattern string. |
| pattern: String, |
| /// The underlying compiled PCRE2 object. |
| code: Arc<Code>, |
| /// The capture group names for this regex. |
| capture_names: Arc<Vec<Option<String>>>, |
| /// A map from capture group name to capture group index. |
| capture_names_idx: Arc<HashMap<String, usize>>, |
| /// Mutable scratch data used by PCRE2 during matching. |
| /// |
| /// We use the same strategy as Rust's regex crate here, such that each |
| /// thread gets its own match data to support using a Regex object from |
| /// multiple threads simultaneously. If some match data doesn't exist for |
| /// a thread, then a new one is created on demand. |
| match_data: CachedThreadLocal<RefCell<MatchData>>, |
| } |
| |
| impl Clone for Regex { |
| fn clone(&self) -> Regex { |
| Regex { |
| config: Arc::clone(&self.config), |
| pattern: self.pattern.clone(), |
| code: Arc::clone(&self.code), |
| capture_names: Arc::clone(&self.capture_names), |
| capture_names_idx: Arc::clone(&self.capture_names_idx), |
| match_data: CachedThreadLocal::new(), |
| } |
| } |
| } |
| |
| impl fmt::Debug for Regex { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| write!(f, "Regex({:?})", self.pattern) |
| } |
| } |
| |
| impl Regex { |
| /// Compiles a regular expression using the default configuration. |
| /// |
| /// Once compiled, it can be used repeatedly to search, split or replace |
| /// text in a string. |
| /// |
| /// If an invalid expression is given, then an error is returned. |
| /// |
| /// To configure compilation options for the regex, use the |
| /// [`RegexBuilder`](struct.RegexBuilder.html). |
| pub fn new(pattern: &str) -> Result<Regex, Error> { |
| RegexBuilder::new().build(pattern) |
| } |
| |
| /// Returns true if and only if the regex matches the subject string given. |
| /// |
| /// # Example |
| /// |
| /// Test if some text contains at least one word with exactly 13 ASCII word |
| /// bytes: |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let text = b"I categorically deny having triskaidekaphobia."; |
| /// assert!(Regex::new(r"\b\w{13}\b")?.is_match(text)?); |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| pub fn is_match(&self, subject: &[u8]) -> Result<bool, Error> { |
| self.is_match_at(subject, 0) |
| } |
| |
| /// Returns the start and end byte range of the leftmost-first match in |
| /// `subject`. If no match exists, then `None` is returned. |
| /// |
| /// # Example |
| /// |
| /// Find the start and end location of the first word with exactly 13 |
| /// ASCII word bytes: |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let text = b"I categorically deny having triskaidekaphobia."; |
| /// let mat = Regex::new(r"\b\w{13}\b")?.find(text)?.unwrap(); |
| /// assert_eq!((mat.start(), mat.end()), (2, 15)); |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| pub fn find<'s>( |
| &self, |
| subject: &'s [u8], |
| ) -> Result<Option<Match<'s>>, Error> { |
| self.find_at(subject, 0) |
| } |
| |
| /// Returns an iterator for each successive non-overlapping match in |
| /// `subject`, returning the start and end byte indices with respect to |
| /// `subject`. |
| /// |
| /// # Example |
| /// |
| /// Find the start and end location of every word with exactly 13 ASCII |
| /// word bytes: |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let text = b"Retroactively relinquishing remunerations is reprehensible."; |
| /// for result in Regex::new(r"\b\w{13}\b")?.find_iter(text) { |
| /// let mat = result?; |
| /// println!("{:?}", mat); |
| /// } |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| pub fn find_iter<'r, 's>(&'r self, subject: &'s [u8]) -> Matches<'r, 's> { |
| Matches { |
| re: self, |
| match_data: self.match_data(), |
| subject: subject, |
| last_end: 0, |
| last_match: None, |
| } |
| } |
| |
| /// Returns the capture groups corresponding to the leftmost-first |
| /// match in `subject`. Capture group `0` always corresponds to the entire |
| /// match. If no match is found, then `None` is returned. |
| /// |
| /// # Examples |
| /// |
| /// Say you have some text with movie names and their release years, |
| /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text |
| /// looking like that, while also extracting the movie name and its release |
| /// year separately. |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)")?; |
| /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; |
| /// let caps = re.captures(text)?.unwrap(); |
| /// assert_eq!(&caps[1], &b"Citizen Kane"[..]); |
| /// assert_eq!(&caps[2], &b"1941"[..]); |
| /// assert_eq!(&caps[0], &b"'Citizen Kane' (1941)"[..]); |
| /// // You can also access the groups by index using the Index notation. |
| /// // Note that this will panic on an invalid index. |
| /// assert_eq!(&caps[1], b"Citizen Kane"); |
| /// assert_eq!(&caps[2], b"1941"); |
| /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| /// |
| /// Note that the full match is at capture group `0`. Each subsequent |
| /// capture group is indexed by the order of its opening `(`. |
| /// |
| /// We can make this example a bit clearer by using *named* capture groups: |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")?; |
| /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; |
| /// let caps = re.captures(text)?.unwrap(); |
| /// assert_eq!(&caps["title"], &b"Citizen Kane"[..]); |
| /// assert_eq!(&caps["year"], &b"1941"[..]); |
| /// assert_eq!(&caps[0], &b"'Citizen Kane' (1941)"[..]); |
| /// // You can also access the groups by name using the Index notation. |
| /// // Note that this will panic on an invalid group name. |
| /// assert_eq!(&caps["title"], b"Citizen Kane"); |
| /// assert_eq!(&caps["year"], b"1941"); |
| /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| /// |
| /// Here we name the capture groups, which we can access with the `name` |
| /// method or the `Index` notation with a `&str`. Note that the named |
| /// capture groups are still accessible with `get` or the `Index` notation |
| /// with a `usize`. |
| /// |
| /// The `0`th capture group is always unnamed, so it must always be |
| /// accessed with `get(0)` or `[0]`. |
| pub fn captures<'s>( |
| &self, |
| subject: &'s [u8], |
| ) -> Result<Option<Captures<'s>>, Error> { |
| let mut locs = self.capture_locations(); |
| Ok(self.captures_read(&mut locs, subject)?.map(move |_| Captures { |
| subject: subject, |
| locs: locs, |
| idx: Arc::clone(&self.capture_names_idx), |
| })) |
| } |
| |
| /// Returns an iterator over all the non-overlapping capture groups matched |
| /// in `subject`. This is operationally the same as `find_iter`, except it |
| /// yields information about capturing group matches. |
| /// |
| /// # Example |
| /// |
| /// We can use this to find all movie titles and their release years in |
| /// some text, where the movie is formatted like "'Title' (xxxx)": |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use std::str; |
| /// |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")?; |
| /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; |
| /// for result in re.captures_iter(text) { |
| /// let caps = result?; |
| /// let title = str::from_utf8(&caps["title"]).unwrap(); |
| /// let year = str::from_utf8(&caps["year"]).unwrap(); |
| /// println!("Movie: {:?}, Released: {:?}", title, year); |
| /// } |
| /// // Output: |
| /// // Movie: Citizen Kane, Released: 1941 |
| /// // Movie: The Wizard of Oz, Released: 1939 |
| /// // Movie: M, Released: 1931 |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| pub fn captures_iter<'r, 's>( |
| &'r self, |
| subject: &'s [u8], |
| ) -> CaptureMatches<'r, 's> { |
| CaptureMatches { |
| re: self, |
| subject: subject, |
| last_end: 0, |
| last_match: None, |
| } |
| } |
| } |
| |
| /// Advanced or "lower level" search methods. |
| impl Regex { |
| /// Returns the same as is_match, but starts the search at the given |
| /// offset. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| pub fn is_match_at( |
| &self, |
| subject: &[u8], |
| start: usize, |
| ) -> Result<bool, Error> { |
| assert!( |
| start <= subject.len(), |
| "start ({}) must be <= subject.len() ({})", |
| start, |
| subject.len() |
| ); |
| |
| let mut options = 0; |
| if !self.config.utf_check { |
| options |= PCRE2_NO_UTF_CHECK; |
| } |
| |
| let match_data = self.match_data(); |
| let mut match_data = match_data.borrow_mut(); |
| // SAFETY: The only unsafe PCRE2 option we potentially use here is |
| // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the |
| // `disable_utf_check` method, which propagates the safety contract to |
| // the caller. |
| Ok(unsafe { match_data.find(&self.code, subject, start, options)? }) |
| } |
| |
| /// Returns the same as find, but starts the search at the given |
| /// offset. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| pub fn find_at<'s>( |
| &self, |
| subject: &'s [u8], |
| start: usize, |
| ) -> Result<Option<Match<'s>>, Error> { |
| self.find_at_with_match_data(self.match_data(), subject, start) |
| } |
| |
| /// Like find_at, but accepts match data instead of acquiring one itself. |
| /// |
| /// This is useful for implementing the iterator, which permits avoiding |
| /// the synchronization overhead of acquiring the match data. |
| #[inline(always)] |
| fn find_at_with_match_data<'s>( |
| &self, |
| match_data: &RefCell<MatchData>, |
| subject: &'s [u8], |
| start: usize, |
| ) -> Result<Option<Match<'s>>, Error> { |
| assert!( |
| start <= subject.len(), |
| "start ({}) must be <= subject.len() ({})", |
| start, |
| subject.len() |
| ); |
| |
| let mut options = 0; |
| if !self.config.utf_check { |
| options |= PCRE2_NO_UTF_CHECK; |
| } |
| |
| let mut match_data = match_data.borrow_mut(); |
| // SAFETY: The only unsafe PCRE2 option we potentially use here is |
| // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the |
| // `disable_utf_check` method, which propagates the safety contract to |
| // the caller. |
| if unsafe { !match_data.find(&self.code, subject, start, options)? } { |
| return Ok(None); |
| } |
| let ovector = match_data.ovector(); |
| let (s, e) = (ovector[0], ovector[1]); |
| Ok(Some(Match::new(&subject[s..e], s, e))) |
| } |
| |
| /// This is like `captures`, but uses |
| /// [`CaptureLocations`](struct.CaptureLocations.html) |
| /// instead of |
| /// [`Captures`](struct.Captures.html) in order to amortize allocations. |
| /// |
| /// To create a `CaptureLocations` value, use the |
| /// `Regex::capture_locations` method. |
| /// |
| /// This returns the overall match if this was successful, which is always |
| /// equivalent to the `0`th capture group. |
| pub fn captures_read<'s>( |
| &self, |
| locs: &mut CaptureLocations, |
| subject: &'s [u8], |
| ) -> Result<Option<Match<'s>>, Error> { |
| self.captures_read_at(locs, subject, 0) |
| } |
| |
| /// Returns the same as `captures_read`, but starts the search at the given |
| /// offset and populates the capture locations given. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| pub fn captures_read_at<'s>( |
| &self, |
| locs: &mut CaptureLocations, |
| subject: &'s [u8], |
| start: usize, |
| ) -> Result<Option<Match<'s>>, Error> { |
| assert!( |
| start <= subject.len(), |
| "start ({}) must be <= subject.len() ({})", |
| start, |
| subject.len() |
| ); |
| |
| let mut options = 0; |
| if !self.config.utf_check { |
| options |= PCRE2_NO_UTF_CHECK; |
| } |
| // SAFETY: The only unsafe PCRE2 option we potentially use here is |
| // PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the |
| // `disable_utf_check` method, which propagates the safety contract to |
| // the caller. |
| if unsafe { !locs.data.find(&self.code, subject, start, options)? } { |
| return Ok(None); |
| } |
| let ovector = locs.data.ovector(); |
| let (s, e) = (ovector[0], ovector[1]); |
| Ok(Some(Match::new(&subject[s..e], s, e))) |
| } |
| } |
| |
| /// Auxiliary methods. |
| impl Regex { |
| /// Returns the original pattern string for this regex. |
| pub fn as_str(&self) -> &str { |
| &self.pattern |
| } |
| |
| /// Returns a sequence of all capturing groups and their names, if present. |
| /// |
| /// The length of the slice returned is always equal to the result of |
| /// `captures_len`, which is the number of capturing groups (including the |
| /// capturing group for the entire pattern). |
| /// |
| /// Each entry in the slice is the name of the corresponding capturing |
| /// group, if one exists. The first capturing group (at index `0`) is |
| /// always unnamed. |
| /// |
| /// Capturing groups are indexed by the order of the opening parenthesis. |
| pub fn capture_names(&self) -> &[Option<String>] { |
| &self.capture_names |
| } |
| |
| /// Returns the number of capturing groups in the pattern. |
| /// |
| /// This is always 1 more than the number of syntactic groups in the |
| /// pattern, since the first group always corresponds to the entire match. |
| pub fn captures_len(&self) -> usize { |
| self.code.capture_count().expect("a valid capture count from PCRE2") |
| } |
| |
| /// Returns an empty set of capture locations that can be reused in |
| /// multiple calls to `captures_read` or `captures_read_at`. |
| pub fn capture_locations(&self) -> CaptureLocations { |
| CaptureLocations { |
| code: Arc::clone(&self.code), |
| data: self.new_match_data(), |
| } |
| } |
| |
| fn match_data(&self) -> &RefCell<MatchData> { |
| let create = || RefCell::new(self.new_match_data()); |
| self.match_data.get_or(create) |
| } |
| |
| fn new_match_data(&self) -> MatchData { |
| MatchData::new(self.config.match_config.clone(), &self.code) |
| } |
| } |
| |
| /// CaptureLocations is a low level representation of the raw offsets of each |
| /// submatch. |
| /// |
| /// Primarily, this type is useful when using `Regex` APIs such as |
| /// `captures_read`, which permits amortizing the allocation in which capture |
| /// match locations are stored. |
| /// |
| /// In order to build a value of this type, you'll need to call the |
| /// `capture_locations` method on the `Regex` being used to execute the search. |
| /// The value returned can then be reused in subsequent searches. |
| pub struct CaptureLocations { |
| code: Arc<Code>, |
| data: MatchData, |
| } |
| |
| impl Clone for CaptureLocations { |
| fn clone(&self) -> CaptureLocations { |
| CaptureLocations { |
| code: Arc::clone(&self.code), |
| data: MatchData::new(self.data.config().clone(), &self.code), |
| } |
| } |
| } |
| |
| impl fmt::Debug for CaptureLocations { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| let mut offsets: Vec<Option<usize>> = vec![]; |
| for &offset in self.data.ovector() { |
| if offset == PCRE2_UNSET { |
| offsets.push(None); |
| } else { |
| offsets.push(Some(offset)); |
| } |
| } |
| write!(f, "CaptureLocations(")?; |
| f.debug_list().entries(offsets).finish()?; |
| write!(f, ")") |
| } |
| } |
| |
| impl CaptureLocations { |
| /// Returns the start and end positions of the Nth capture group. |
| /// |
| /// This returns `None` if `i` is not a valid capture group or if the |
| /// capture group did not match anything. |
| /// |
| /// The positions returned are always byte indices with respect to the |
| /// original subject string matched. |
| #[inline] |
| pub fn get(&self, i: usize) -> Option<(usize, usize)> { |
| let ovec = self.data.ovector(); |
| let s = match ovec.get(i * 2) { |
| None => return None, |
| Some(&s) if s == PCRE2_UNSET => return None, |
| Some(&s) => s, |
| }; |
| let e = match ovec.get(i * 2 + 1) { |
| None => return None, |
| Some(&e) if e == PCRE2_UNSET => return None, |
| Some(&e) => e, |
| }; |
| Some((s, e)) |
| } |
| |
| /// Returns the total number of capturing groups. |
| /// |
| /// This is always at least `1` since every regex has at least `1` |
| /// capturing group that corresponds to the entire match. |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.data.ovector().len() / 2 |
| } |
| } |
| |
| /// Captures represents a group of captured byte strings for a single match. |
| /// |
| /// The 0th capture always corresponds to the entire match. Each subsequent |
| /// index corresponds to the next capture group in the regex. If a capture |
| /// group is named, then the matched byte string is *also* available via the |
| /// `name` method. (Note that the 0th capture is always unnamed and so must be |
| /// accessed with the `get` method.) |
| /// |
| /// Positions returned from a capture group are always byte indices. |
| /// |
| /// `'s` is the lifetime of the matched subject string. |
| pub struct Captures<'s> { |
| subject: &'s [u8], |
| locs: CaptureLocations, |
| idx: Arc<HashMap<String, usize>>, |
| } |
| |
| impl<'s> Captures<'s> { |
| /// Returns the match associated with the capture group at index `i`. If |
| /// `i` does not correspond to a capture group, or if the capture group |
| /// did not participate in the match, then `None` is returned. |
| /// |
| /// # Examples |
| /// |
| /// Get the text of the match with a default of an empty string if this |
| /// group didn't participate in the match: |
| /// |
| /// ```rust |
| /// # fn example() -> Result<(), ::pcre2::Error> { |
| /// use pcre2::bytes::Regex; |
| /// |
| /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))")?; |
| /// let caps = re.captures(b"abc123")?.unwrap(); |
| /// |
| /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); |
| /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); |
| /// assert_eq!(text1, &b"123"[..]); |
| /// assert_eq!(text2, &b""[..]); |
| /// # Ok(()) }; example().unwrap() |
| /// ``` |
| pub fn get(&self, i: usize) -> Option<Match<'s>> { |
| self.locs.get(i).map(|(s, e)| Match::new(self.subject, s, e)) |
| } |
| |
| /// Returns the match for the capture group named `name`. If `name` isn't a |
| /// valid capture group or didn't match anything, then `None` is returned. |
| pub fn name(&self, name: &str) -> Option<Match<'s>> { |
| self.idx.get(name).and_then(|&i| self.get(i)) |
| } |
| |
| /// Returns the number of captured groups. |
| /// |
| /// This is always at least `1`, since every regex has at least one capture |
| /// group that corresponds to the full match. |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.locs.len() |
| } |
| } |
| |
| impl<'s> fmt::Debug for Captures<'s> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() |
| } |
| } |
| |
| struct CapturesDebug<'c, 's: 'c>(&'c Captures<'s>); |
| |
| impl<'c, 's> fmt::Debug for CapturesDebug<'c, 's> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| fn escape_bytes(bytes: &[u8]) -> String { |
| let mut s = String::new(); |
| for &b in bytes { |
| s.push_str(&escape_byte(b)); |
| } |
| s |
| } |
| |
| fn escape_byte(byte: u8) -> String { |
| use std::ascii::escape_default; |
| |
| let escaped: Vec<u8> = escape_default(byte).collect(); |
| String::from_utf8_lossy(&escaped).into_owned() |
| } |
| |
| // We'd like to show something nice here, even if it means an |
| // allocation to build a reverse index. |
| let slot_to_name: HashMap<&usize, &String> = |
| self.0.idx.iter().map(|(a, b)| (b, a)).collect(); |
| let mut map = f.debug_map(); |
| for slot in 0..self.0.len() { |
| let m = self.0.locs.get(slot).map(|(s, e)| { |
| escape_bytes(&self.0.subject[s..e]) |
| }); |
| if let Some(name) = slot_to_name.get(&slot) { |
| map.entry(&name, &m); |
| } else { |
| map.entry(&slot, &m); |
| } |
| } |
| map.finish() |
| } |
| } |
| |
| /// Get a group by index. |
| /// |
| /// `'s` is the lifetime of the matched subject string. |
| /// |
| /// The subject can't outlive the `Captures` object if this method is |
| /// used, because of how `Index` is defined (normally `a[i]` is part |
| /// of `a` and can't outlive it); to do that, use `get()` instead. |
| /// |
| /// # Panics |
| /// |
| /// If there is no group at the given index. |
| impl<'s> Index<usize> for Captures<'s> { |
| type Output = [u8]; |
| |
| fn index(&self, i: usize) -> &[u8] { |
| self.get(i).map(|m| m.as_bytes()) |
| .unwrap_or_else(|| panic!("no group at index '{}'", i)) |
| } |
| } |
| |
| /// Get a group by name. |
| /// |
| /// `'s` is the lifetime of the matched subject string and `'i` is the lifetime |
| /// of the group name (the index). |
| /// |
| /// The text can't outlive the `Captures` object if this method is |
| /// used, because of how `Index` is defined (normally `a[i]` is part |
| /// of `a` and can't outlive it); to do that, use `name` instead. |
| /// |
| /// # Panics |
| /// |
| /// If there is no group named by the given value. |
| impl<'s, 'i> Index<&'i str> for Captures<'s> { |
| type Output = [u8]; |
| |
| fn index<'a>(&'a self, name: &'i str) -> &'a [u8] { |
| self.name(name).map(|m| m.as_bytes()) |
| .unwrap_or_else(|| panic!("no group named '{}'", name)) |
| } |
| } |
| |
| /// An iterator over all non-overlapping matches for a particular subject |
| /// string. |
| /// |
| /// The iterator yields matches (if no error occurred while searching) |
| /// corresponding to the start and end of the match. The indices are byte |
| /// offsets. The iterator stops when no more matches can be found. |
| /// |
| /// `'r` is the lifetime of the compiled regular expression and `'s` is the |
| /// lifetime of the subject string. |
| pub struct Matches<'r, 's> { |
| re: &'r Regex, |
| match_data: &'r RefCell<MatchData>, |
| subject: &'s [u8], |
| last_end: usize, |
| last_match: Option<usize>, |
| } |
| |
| impl<'r, 's> Iterator for Matches<'r, 's> { |
| type Item = Result<Match<'s>, Error>; |
| |
| fn next(&mut self) -> Option<Result<Match<'s>, Error>> { |
| if self.last_end > self.subject.len() { |
| return None; |
| } |
| let res = self.re.find_at_with_match_data( |
| self.match_data, |
| self.subject, |
| self.last_end, |
| ); |
| let m = match res { |
| Err(err) => return Some(Err(err)), |
| Ok(None) => return None, |
| Ok(Some(m)) => m, |
| }; |
| if m.start() == m.end() { |
| // This is an empty match. To ensure we make progress, start |
| // the next search at the smallest possible starting position |
| // of the next match following this one. |
| self.last_end = m.end() + 1; |
| // Don't accept empty matches immediately following a match. |
| // Just move on to the next match. |
| if Some(m.end()) == self.last_match { |
| return self.next(); |
| } |
| } else { |
| self.last_end = m.end(); |
| } |
| self.last_match = Some(m.end()); |
| Some(Ok(m)) |
| } |
| } |
| |
| /// An iterator that yields all non-overlapping capture groups matching a |
| /// particular regular expression. |
| /// |
| /// The iterator stops when no more matches can be found. |
| /// |
| /// `'r` is the lifetime of the compiled regular expression and `'s` is the |
| /// lifetime of the subject string. |
| pub struct CaptureMatches<'r, 's> { |
| re: &'r Regex, |
| subject: &'s [u8], |
| last_end: usize, |
| last_match: Option<usize>, |
| } |
| |
| impl<'r, 's> Iterator for CaptureMatches<'r, 's> { |
| type Item = Result<Captures<'s>, Error>; |
| |
| fn next(&mut self) -> Option<Result<Captures<'s>, Error>> { |
| if self.last_end > self.subject.len() { |
| return None; |
| } |
| let mut locs = self.re.capture_locations(); |
| let res = self.re.captures_read_at( |
| &mut locs, |
| self.subject, |
| self.last_end, |
| ); |
| let m = match res { |
| Err(err) => return Some(Err(err)), |
| Ok(None) => return None, |
| Ok(Some(m)) => m, |
| }; |
| if m.start() == m.end() { |
| // This is an empty match. To ensure we make progress, start |
| // the next search at the smallest possible starting position |
| // of the next match following this one. |
| self.last_end = m.end() + 1; |
| // Don't accept empty matches immediately following a match. |
| // Just move on to the next match. |
| if Some(m.end()) == self.last_match { |
| return self.next(); |
| } |
| } else { |
| self.last_end = m.end(); |
| } |
| self.last_match = Some(m.end()); |
| Some(Ok(Captures { |
| subject: self.subject, |
| locs: locs, |
| idx: Arc::clone(&self.re.capture_names_idx), |
| })) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::{Regex, RegexBuilder}; |
| use crate::is_jit_available; |
| |
| fn b(string: &str) -> &[u8] { |
| string.as_bytes() |
| } |
| |
| fn find_iter_tuples(re: &Regex, subject: &[u8]) -> Vec<(usize, usize)> { |
| let mut tuples = vec![]; |
| for result in re.find_iter(subject) { |
| let m = result.unwrap(); |
| tuples.push((m.start(), m.end())); |
| } |
| tuples |
| } |
| |
| fn cap_iter_tuples(re: &Regex, subject: &[u8]) -> Vec<(usize, usize)> { |
| let mut tuples = vec![]; |
| for result in re.captures_iter(subject) { |
| let caps = result.unwrap(); |
| let m = caps.get(0).unwrap(); |
| tuples.push((m.start(), m.end())); |
| } |
| tuples |
| } |
| |
| #[test] |
| fn caseless() { |
| let re = RegexBuilder::new() |
| .caseless(true) |
| .build("a") |
| .unwrap(); |
| assert!(re.is_match(b("A")).unwrap()); |
| |
| let re = RegexBuilder::new() |
| .caseless(true) |
| .ucp(true) |
| .build("β") |
| .unwrap(); |
| assert!(re.is_match(b("Β")).unwrap()); |
| } |
| |
| #[test] |
| fn crlf() { |
| let re = RegexBuilder::new() |
| .crlf(true) |
| .build("a$") |
| .unwrap(); |
| let m = re.find(b("a\r\n")).unwrap().unwrap(); |
| assert_eq!(m.as_pair(), (0, 1)); |
| } |
| |
| #[test] |
| fn dotall() { |
| let re = RegexBuilder::new() |
| .dotall(false) |
| .build(".") |
| .unwrap(); |
| assert!(!re.is_match(b("\n")).unwrap()); |
| |
| let re = RegexBuilder::new() |
| .dotall(true) |
| .build(".") |
| .unwrap(); |
| assert!(re.is_match(b("\n")).unwrap()); |
| } |
| |
| #[test] |
| fn extended() { |
| let re = RegexBuilder::new() |
| .extended(true) |
| .build("a b c") |
| .unwrap(); |
| assert!(re.is_match(b("abc")).unwrap()); |
| } |
| |
| #[test] |
| fn multi_line() { |
| let re = RegexBuilder::new() |
| .multi_line(false) |
| .build("^abc$") |
| .unwrap(); |
| assert!(!re.is_match(b("foo\nabc\nbar")).unwrap()); |
| |
| let re = RegexBuilder::new() |
| .multi_line(true) |
| .build("^abc$") |
| .unwrap(); |
| assert!(re.is_match(b("foo\nabc\nbar")).unwrap()); |
| } |
| |
| #[test] |
| fn ucp() { |
| let re = RegexBuilder::new() |
| .ucp(false) |
| .build(r"\w") |
| .unwrap(); |
| assert!(!re.is_match(b("β")).unwrap()); |
| |
| let re = RegexBuilder::new() |
| .ucp(true) |
| .build(r"\w") |
| .unwrap(); |
| assert!(re.is_match(b("β")).unwrap()); |
| } |
| |
| #[test] |
| fn utf() { |
| let re = RegexBuilder::new() |
| .utf(false) |
| .build(".") |
| .unwrap(); |
| assert_eq!(re.find(b("β")).unwrap().unwrap().as_pair(), (0, 1)); |
| |
| let re = RegexBuilder::new() |
| .utf(true) |
| .build(".") |
| .unwrap(); |
| assert_eq!(re.find(b("β")).unwrap().unwrap().as_pair(), (0, 2)); |
| } |
| |
| #[test] |
| fn jit4lyfe() { |
| if is_jit_available() { |
| let re = RegexBuilder::new() |
| .jit(true) |
| .build(r"\w") |
| .unwrap(); |
| assert!(re.is_match(b("a")).unwrap()); |
| } else { |
| // Check that if JIT isn't enabled, then we get an error if we |
| // require JIT. |
| RegexBuilder::new() |
| .jit(true) |
| .build(r"\w") |
| .unwrap_err(); |
| } |
| } |
| |
| // Unlike jit4lyfe, this tests that everything works when requesting the |
| // JIT only if it's available. In jit4lyfe, we require the JIT or fail. |
| // If the JIT isn't available, then in this test, we simply don't use it. |
| #[test] |
| fn jit_if_available() { |
| let re = RegexBuilder::new() |
| .jit_if_available(true) |
| .build(r"\w") |
| .unwrap(); |
| assert!(re.is_match(b("a")).unwrap()); |
| } |
| |
| // This tests a regression caused a segfault in the pcre2 library |
| // https://github.com/BurntSushi/rust-pcre2/issues/10 |
| #[test] |
| fn jit_test_lazy_alloc_subject() { |
| let subject: Vec<u8> = vec![]; |
| |
| let re = RegexBuilder::new() |
| .jit_if_available(true) |
| .build(r"xxxx|xxxx|xxxx") |
| .unwrap(); |
| assert!(!re.is_match(&subject).unwrap()); |
| } |
| |
| #[test] |
| fn utf_with_invalid_data() { |
| let re = RegexBuilder::new() |
| .build(r".") |
| .unwrap(); |
| assert_eq!(re.find(b"\xFF").unwrap().unwrap().as_pair(), (0, 1)); |
| |
| let re = RegexBuilder::new() |
| .utf(true) |
| .build(r".") |
| .unwrap(); |
| assert!(re.find(b"\xFF").is_err()); |
| } |
| |
| #[test] |
| fn capture_names() { |
| let re = RegexBuilder::new() |
| .build( |
| r"(?P<foo>abc)|(def)|(?P<a>ghi)|(?P<springsteen>jkl)" |
| ) |
| .unwrap(); |
| assert_eq!(re.capture_names().to_vec(), vec![ |
| None, |
| Some("foo".to_string()), |
| None, |
| Some("a".to_string()), |
| Some("springsteen".to_string()), |
| ]); |
| |
| // Test our internal map as well. |
| assert_eq!(re.capture_names_idx.len(), 3); |
| assert_eq!(re.capture_names_idx["foo"], 1); |
| assert_eq!(re.capture_names_idx["a"], 3); |
| assert_eq!(re.capture_names_idx["springsteen"], 4); |
| } |
| |
| #[test] |
| fn captures_get() { |
| let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); |
| let caps = re.captures(b"abc123").unwrap().unwrap(); |
| |
| let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); |
| let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); |
| assert_eq!(text1, &b"123"[..]); |
| assert_eq!(text2, &b""[..]); |
| } |
| |
| #[test] |
| fn find_iter_empty() { |
| let re = Regex::new(r"(?m:^)").unwrap(); |
| assert_eq!(find_iter_tuples(&re, b""), vec![(0, 0)]); |
| assert_eq!(find_iter_tuples(&re, b"\n"), vec![(0, 0)]); |
| assert_eq!(find_iter_tuples(&re, b"\n\n"), vec![(0, 0), (1, 1)]); |
| assert_eq!(find_iter_tuples(&re, b"\na\n"), vec![(0, 0), (1, 1)]); |
| assert_eq!(find_iter_tuples(&re, b"\na\n\n"), vec![ |
| (0, 0), (1, 1), (3, 3), |
| ]); |
| } |
| |
| #[test] |
| fn captures_iter_empty() { |
| let re = Regex::new(r"(?m:^)").unwrap(); |
| assert_eq!(cap_iter_tuples(&re, b""), vec![(0, 0)]); |
| assert_eq!(cap_iter_tuples(&re, b"\n"), vec![(0, 0)]); |
| assert_eq!(cap_iter_tuples(&re, b"\n\n"), vec![(0, 0), (1, 1)]); |
| assert_eq!(cap_iter_tuples(&re, b"\na\n"), vec![(0, 0), (1, 1)]); |
| assert_eq!(cap_iter_tuples(&re, b"\na\n\n"), vec![ |
| (0, 0), (1, 1), (3, 3), |
| ]); |
| } |
| |
| #[test] |
| fn max_jit_stack_size_does_something() { |
| if !is_jit_available() { |
| return; |
| } |
| |
| let hundred = "\ |
| ABCDEFGHIJKLMNOPQRSTUVWXY\ |
| ABCDEFGHIJKLMNOPQRSTUVWXY\ |
| ABCDEFGHIJKLMNOPQRSTUVWXY\ |
| ABCDEFGHIJKLMNOPQRSTUVWXY\ |
| "; |
| let hay = format!("{}", hundred.repeat(100)); |
| |
| // First, try a regex that checks that we can blow the JIT stack limit. |
| let re = RegexBuilder::new() |
| .ucp(true) |
| .jit(true) |
| .max_jit_stack_size(Some(1)) |
| .build(r"((((\w{10})){100}))+") |
| .unwrap(); |
| let result = re.is_match(hay.as_bytes()); |
| if result.is_ok() { |
| // Skip this test, since for some reason we weren't able to blow |
| // the stack limit. |
| return; |
| } |
| let err = result.unwrap_err(); |
| assert!(err.to_string().contains("JIT stack limit reached")); |
| |
| // Now bump up the JIT stack limit and check that it succeeds. |
| let re = RegexBuilder::new() |
| .ucp(true) |
| .jit(true) |
| .max_jit_stack_size(Some(1<<20)) |
| .build(r"((((\w{10})){100}))+") |
| .unwrap(); |
| assert!(re.is_match(hay.as_bytes()).unwrap()); |
| } |
| } |