| use alloc::string::String; |
| |
| use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; |
| |
| use crate::{bytes::RegexSetBuilder, Error}; |
| |
| /// Match multiple, possibly overlapping, regexes in a single search. |
| /// |
| /// A regex set corresponds to the union of zero or more regular expressions. |
| /// That is, a regex set will match a haystack when at least one of its |
| /// constituent regexes matches. A regex set as its formulated here provides a |
| /// touch more power: it will also report *which* regular expressions in the |
| /// set match. Indeed, this is the key difference between regex sets and a |
| /// single `Regex` with many alternates, since only one alternate can match at |
| /// a time. |
| /// |
| /// For example, consider regular expressions to match email addresses and |
| /// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a |
| /// regex set is constructed from those regexes, then searching the haystack |
| /// `[email protected]` will report both regexes as matching. Of course, one |
| /// could accomplish this by compiling each regex on its own and doing two |
| /// searches over the haystack. The key advantage of using a regex set is |
| /// that it will report the matching regexes using a *single pass through the |
| /// haystack*. If one has hundreds or thousands of regexes to match repeatedly |
| /// (like a URL router for a complex web application or a user agent matcher), |
| /// then a regex set *can* realize huge performance gains. |
| /// |
| /// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` |
| /// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this |
| /// `RegexSet` is permitted to match invalid UTF-8. |
| /// |
| /// # Limitations |
| /// |
| /// Regex sets are limited to answering the following two questions: |
| /// |
| /// 1. Does any regex in the set match? |
| /// 2. If so, which regexes in the set match? |
| /// |
| /// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask |
| /// (1) instead of (2) since the matching engines can stop after the first |
| /// match is found. |
| /// |
| /// You cannot directly extract [`Match`][crate::bytes::Match] or |
| /// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need |
| /// these operations, the recommended approach is to compile each pattern in |
| /// the set independently and scan the exact same haystack a second time with |
| /// those independently compiled patterns: |
| /// |
| /// ``` |
| /// use regex::bytes::{Regex, RegexSet}; |
| /// |
| /// let patterns = ["foo", "bar"]; |
| /// // Both patterns will match different ranges of this string. |
| /// let hay = b"barfoo"; |
| /// |
| /// // Compile a set matching any of our patterns. |
| /// let set = RegexSet::new(patterns).unwrap(); |
| /// // Compile each pattern independently. |
| /// let regexes: Vec<_> = set |
| /// .patterns() |
| /// .iter() |
| /// .map(|pat| Regex::new(pat).unwrap()) |
| /// .collect(); |
| /// |
| /// // Match against the whole set first and identify the individual |
| /// // matching patterns. |
| /// let matches: Vec<&[u8]> = set |
| /// .matches(hay) |
| /// .into_iter() |
| /// // Dereference the match index to get the corresponding |
| /// // compiled pattern. |
| /// .map(|index| ®exes[index]) |
| /// // To get match locations or any other info, we then have to search the |
| /// // exact same haystack again, using our separately-compiled pattern. |
| /// .map(|re| re.find(hay).unwrap().as_bytes()) |
| /// .collect(); |
| /// |
| /// // Matches arrive in the order the constituent patterns were declared, |
| /// // not the order they appear in the haystack. |
| /// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches); |
| /// ``` |
| /// |
| /// # Performance |
| /// |
| /// A `RegexSet` has the same performance characteristics as `Regex`. Namely, |
| /// search takes `O(m * n)` time, where `m` is proportional to the size of the |
| /// regex set and `n` is proportional to the length of the haystack. |
| /// |
| /// # Trait implementations |
| /// |
| /// The `Default` trait is implemented for `RegexSet`. The default value |
| /// is an empty set. An empty set can also be explicitly constructed via |
| /// [`RegexSet::empty`]. |
| /// |
| /// # Example |
| /// |
| /// This shows how the above two regexes (for matching email addresses and |
| /// domains) might work: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new(&[ |
| /// r"[a-z]+@[a-z]+\.(com|org|net)", |
| /// r"[a-z]+\.(com|org|net)", |
| /// ]).unwrap(); |
| /// |
| /// // Ask whether any regexes in the set match. |
| /// assert!(set.is_match(b"[email protected]")); |
| /// |
| /// // Identify which regexes in the set match. |
| /// let matches: Vec<_> = set.matches(b"[email protected]").into_iter().collect(); |
| /// assert_eq!(vec![0, 1], matches); |
| /// |
| /// // Try again, but with a haystack that only matches one of the regexes. |
| /// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); |
| /// assert_eq!(vec![1], matches); |
| /// |
| /// // Try again, but with a haystack that doesn't match any regex in the set. |
| /// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); |
| /// assert!(matches.is_empty()); |
| /// ``` |
| /// |
| /// Note that it would be possible to adapt the above example to using `Regex` |
| /// with an expression like: |
| /// |
| /// ```text |
| /// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) |
| /// ``` |
| /// |
| /// After a match, one could then inspect the capture groups to figure out |
| /// which alternates matched. The problem is that it is hard to make this |
| /// approach scale when there are many regexes since the overlap between each |
| /// alternate isn't always obvious to reason about. |
| #[derive(Clone)] |
| pub struct RegexSet { |
| pub(crate) meta: meta::Regex, |
| pub(crate) patterns: alloc::sync::Arc<[String]>, |
| } |
| |
| impl RegexSet { |
| /// Create a new regex set with the given regular expressions. |
| /// |
| /// This takes an iterator of `S`, where `S` is something that can produce |
| /// a `&str`. If any of the strings in the iterator are not valid regular |
| /// expressions, then an error is returned. |
| /// |
| /// # Example |
| /// |
| /// Create a new regex set from an iterator of strings: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); |
| /// assert!(set.is_match(b"foo")); |
| /// ``` |
| pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> |
| where |
| S: AsRef<str>, |
| I: IntoIterator<Item = S>, |
| { |
| RegexSetBuilder::new(exprs).build() |
| } |
| |
| /// Create a new empty regex set. |
| /// |
| /// An empty regex never matches anything. |
| /// |
| /// This is a convenience function for `RegexSet::new([])`, but doesn't |
| /// require one to specify the type of the input. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::empty(); |
| /// assert!(set.is_empty()); |
| /// // an empty set matches nothing |
| /// assert!(!set.is_match(b"")); |
| /// ``` |
| pub fn empty() -> RegexSet { |
| let empty: [&str; 0] = []; |
| RegexSetBuilder::new(empty).build().unwrap() |
| } |
| |
| /// Returns true if and only if one of the regexes in this set matches |
| /// the haystack given. |
| /// |
| /// This method should be preferred if you only need to test whether any |
| /// of the regexes in the set should match, but don't care about *which* |
| /// regexes matched. This is because the underlying matching engine will |
| /// quit immediately after seeing the first match instead of continuing to |
| /// find all matches. |
| /// |
| /// Note that as with searches using [`Regex`](crate::bytes::Regex), the |
| /// expression is unanchored by default. That is, if the regex does not |
| /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted |
| /// to match anywhere in the haystack. |
| /// |
| /// # Example |
| /// |
| /// Tests whether a set matches somewhere in a haystack: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); |
| /// assert!(set.is_match(b"foo")); |
| /// assert!(!set.is_match("☃".as_bytes())); |
| /// ``` |
| #[inline] |
| pub fn is_match(&self, haystack: &[u8]) -> bool { |
| self.is_match_at(haystack, 0) |
| } |
| |
| /// Returns true if and only if one of the regexes in this set matches the |
| /// haystack given, with the search starting at the offset given. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| /// |
| /// # Panics |
| /// |
| /// This panics when `start >= haystack.len() + 1`. |
| /// |
| /// # Example |
| /// |
| /// This example shows the significance of `start`. Namely, consider a |
| /// haystack `foobar` and a desire to execute a search starting at offset |
| /// `3`. You could search a substring explicitly, but then the look-around |
| /// assertions won't work correctly. Instead, you can use this method to |
| /// specify the start position of a search. |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); |
| /// let hay = b"foobar"; |
| /// // We get a match here, but it's probably not intended. |
| /// assert!(set.is_match(&hay[3..])); |
| /// // No match because the assertions take the context into account. |
| /// assert!(!set.is_match_at(hay, 3)); |
| /// ``` |
| #[inline] |
| pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { |
| self.meta.is_match(Input::new(haystack).span(start..haystack.len())) |
| } |
| |
| /// Returns the set of regexes that match in the given haystack. |
| /// |
| /// The set returned contains the index of each regex that matches in |
| /// the given haystack. The index is in correspondence with the order of |
| /// regular expressions given to `RegexSet`'s constructor. |
| /// |
| /// The set can also be used to iterate over the matched indices. The order |
| /// of iteration is always ascending with respect to the matching indices. |
| /// |
| /// Note that as with searches using [`Regex`](crate::bytes::Regex), the |
| /// expression is unanchored by default. That is, if the regex does not |
| /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted |
| /// to match anywhere in the haystack. |
| /// |
| /// # Example |
| /// |
| /// Tests which regular expressions match the given haystack: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"\w+", |
| /// r"\d+", |
| /// r"\pL+", |
| /// r"foo", |
| /// r"bar", |
| /// r"barfoo", |
| /// r"foobar", |
| /// ]).unwrap(); |
| /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); |
| /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); |
| /// |
| /// // You can also test whether a particular regex matched: |
| /// let matches = set.matches(b"foobar"); |
| /// assert!(!matches.matched(5)); |
| /// assert!(matches.matched(6)); |
| /// ``` |
| #[inline] |
| pub fn matches(&self, haystack: &[u8]) -> SetMatches { |
| self.matches_at(haystack, 0) |
| } |
| |
| /// Returns the set of regexes that match in the given haystack. |
| /// |
| /// The set returned contains the index of each regex that matches in |
| /// the given haystack. The index is in correspondence with the order of |
| /// regular expressions given to `RegexSet`'s constructor. |
| /// |
| /// The set can also be used to iterate over the matched indices. The order |
| /// of iteration is always ascending with respect to the matching indices. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| /// |
| /// # Panics |
| /// |
| /// This panics when `start >= haystack.len() + 1`. |
| /// |
| /// # Example |
| /// |
| /// Tests which regular expressions match the given haystack: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); |
| /// let hay = b"foobar"; |
| /// // We get matches here, but it's probably not intended. |
| /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); |
| /// assert_eq!(matches, vec![0, 1]); |
| /// // No matches because the assertions take the context into account. |
| /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); |
| /// assert_eq!(matches, vec![]); |
| /// ``` |
| #[inline] |
| pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { |
| let input = Input::new(haystack).span(start..haystack.len()); |
| let mut patset = PatternSet::new(self.meta.pattern_len()); |
| self.meta.which_overlapping_matches(&input, &mut patset); |
| SetMatches(patset) |
| } |
| |
| /// Returns the same as matches, but starts the search at the given |
| /// offset and stores the matches into the slice given. |
| /// |
| /// The significance of the starting point is that it takes the surrounding |
| /// context into consideration. For example, the `\A` anchor can only |
| /// match when `start == 0`. |
| /// |
| /// `matches` must have a length that is at least the number of regexes |
| /// in this set. |
| /// |
| /// This method returns true if and only if at least one member of |
| /// `matches` is true after executing the set against `haystack`. |
| #[doc(hidden)] |
| #[inline] |
| pub fn matches_read_at( |
| &self, |
| matches: &mut [bool], |
| haystack: &[u8], |
| start: usize, |
| ) -> bool { |
| // This is pretty dumb. We should try to fix this, but the |
| // regex-automata API doesn't provide a way to store matches in an |
| // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and |
| // thus not public... But regex-capi currently uses it. We should |
| // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet |
| // is in regex-automata, not regex. So maybe we should just accept a |
| // 'SetMatches', which is basically just a newtype around PatternSet. |
| let mut patset = PatternSet::new(self.meta.pattern_len()); |
| let mut input = Input::new(haystack); |
| input.set_start(start); |
| self.meta.which_overlapping_matches(&input, &mut patset); |
| for pid in patset.iter() { |
| matches[pid] = true; |
| } |
| !patset.is_empty() |
| } |
| |
| /// An alias for `matches_read_at` to preserve backward compatibility. |
| /// |
| /// The `regex-capi` crate used this method, so to avoid breaking that |
| /// crate, we continue to export it as an undocumented API. |
| #[doc(hidden)] |
| #[inline] |
| pub fn read_matches_at( |
| &self, |
| matches: &mut [bool], |
| haystack: &[u8], |
| start: usize, |
| ) -> bool { |
| self.matches_read_at(matches, haystack, start) |
| } |
| |
| /// Returns the total number of regexes in this set. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// assert_eq!(0, RegexSet::empty().len()); |
| /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); |
| /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); |
| /// ``` |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.meta.pattern_len() |
| } |
| |
| /// Returns `true` if this set contains no regexes. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// assert!(RegexSet::empty().is_empty()); |
| /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); |
| /// ``` |
| #[inline] |
| pub fn is_empty(&self) -> bool { |
| self.meta.pattern_len() == 0 |
| } |
| |
| /// Returns the regex patterns that this regex set was constructed from. |
| /// |
| /// This function can be used to determine the pattern for a match. The |
| /// slice returned has exactly as many patterns givens to this regex set, |
| /// and the order of the slice is the same as the order of the patterns |
| /// provided to the set. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new(&[ |
| /// r"\w+", |
| /// r"\d+", |
| /// r"\pL+", |
| /// r"foo", |
| /// r"bar", |
| /// r"barfoo", |
| /// r"foobar", |
| /// ]).unwrap(); |
| /// let matches: Vec<_> = set |
| /// .matches(b"foobar") |
| /// .into_iter() |
| /// .map(|index| &set.patterns()[index]) |
| /// .collect(); |
| /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); |
| /// ``` |
| #[inline] |
| pub fn patterns(&self) -> &[String] { |
| &self.patterns |
| } |
| } |
| |
| impl Default for RegexSet { |
| fn default() -> Self { |
| RegexSet::empty() |
| } |
| } |
| |
| /// A set of matches returned by a regex set. |
| /// |
| /// Values of this type are constructed by [`RegexSet::matches`]. |
| #[derive(Clone, Debug)] |
| pub struct SetMatches(PatternSet); |
| |
| impl SetMatches { |
| /// Whether this set contains any matches. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new(&[ |
| /// r"[a-z]+@[a-z]+\.(com|org|net)", |
| /// r"[a-z]+\.(com|org|net)", |
| /// ]).unwrap(); |
| /// let matches = set.matches(b"[email protected]"); |
| /// assert!(matches.matched_any()); |
| /// ``` |
| #[inline] |
| pub fn matched_any(&self) -> bool { |
| !self.0.is_empty() |
| } |
| |
| /// Whether the regex at the given index matched. |
| /// |
| /// The index for a regex is determined by its insertion order upon the |
| /// initial construction of a `RegexSet`, starting at `0`. |
| /// |
| /// # Panics |
| /// |
| /// If `index` is greater than or equal to the number of regexes in the |
| /// original set that produced these matches. Equivalently, when `index` |
| /// is greater than or equal to [`SetMatches::len`]. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"[a-z]+@[a-z]+\.(com|org|net)", |
| /// r"[a-z]+\.(com|org|net)", |
| /// ]).unwrap(); |
| /// let matches = set.matches(b"example.com"); |
| /// assert!(!matches.matched(0)); |
| /// assert!(matches.matched(1)); |
| /// ``` |
| #[inline] |
| pub fn matched(&self, index: usize) -> bool { |
| self.0.contains(PatternID::new_unchecked(index)) |
| } |
| |
| /// The total number of regexes in the set that created these matches. |
| /// |
| /// **WARNING:** This always returns the same value as [`RegexSet::len`]. |
| /// In particular, it does *not* return the number of elements yielded by |
| /// [`SetMatches::iter`]. The only way to determine the total number of |
| /// matched regexes is to iterate over them. |
| /// |
| /// # Example |
| /// |
| /// Notice that this method returns the total number of regexes in the |
| /// original set, and *not* the total number of regexes that matched. |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"[a-z]+@[a-z]+\.(com|org|net)", |
| /// r"[a-z]+\.(com|org|net)", |
| /// ]).unwrap(); |
| /// let matches = set.matches(b"example.com"); |
| /// // Total number of patterns that matched. |
| /// assert_eq!(1, matches.iter().count()); |
| /// // Total number of patterns in the set. |
| /// assert_eq!(2, matches.len()); |
| /// ``` |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.0.capacity() |
| } |
| |
| /// Returns an iterator over the indices of the regexes that matched. |
| /// |
| /// This will always produces matches in ascending order, where the index |
| /// yielded corresponds to the index of the regex that matched with respect |
| /// to its position when initially building the set. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"[0-9]", |
| /// r"[a-z]", |
| /// r"[A-Z]", |
| /// r"\p{Greek}", |
| /// ]).unwrap(); |
| /// let hay = "βa1".as_bytes(); |
| /// let matches: Vec<_> = set.matches(hay).iter().collect(); |
| /// assert_eq!(matches, vec![0, 1, 3]); |
| /// ``` |
| /// |
| /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so |
| /// this method is not always needed. For example: |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"[0-9]", |
| /// r"[a-z]", |
| /// r"[A-Z]", |
| /// r"\p{Greek}", |
| /// ]).unwrap(); |
| /// let hay = "βa1".as_bytes(); |
| /// let mut matches = vec![]; |
| /// for index in set.matches(hay) { |
| /// matches.push(index); |
| /// } |
| /// assert_eq!(matches, vec![0, 1, 3]); |
| /// ``` |
| #[inline] |
| pub fn iter(&self) -> SetMatchesIter<'_> { |
| SetMatchesIter(self.0.iter()) |
| } |
| } |
| |
| impl IntoIterator for SetMatches { |
| type IntoIter = SetMatchesIntoIter; |
| type Item = usize; |
| |
| fn into_iter(self) -> Self::IntoIter { |
| let it = 0..self.0.capacity(); |
| SetMatchesIntoIter { patset: self.0, it } |
| } |
| } |
| |
| impl<'a> IntoIterator for &'a SetMatches { |
| type IntoIter = SetMatchesIter<'a>; |
| type Item = usize; |
| |
| fn into_iter(self) -> Self::IntoIter { |
| self.iter() |
| } |
| } |
| |
| /// An owned iterator over the set of matches from a regex set. |
| /// |
| /// This will always produces matches in ascending order of index, where the |
| /// index corresponds to the index of the regex that matched with respect to |
| /// its position when initially building the set. |
| /// |
| /// This iterator is created by calling `SetMatches::into_iter` via the |
| /// `IntoIterator` trait. This is automatically done in `for` loops. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use regex::bytes::RegexSet; |
| /// |
| /// let set = RegexSet::new([ |
| /// r"[0-9]", |
| /// r"[a-z]", |
| /// r"[A-Z]", |
| /// r"\p{Greek}", |
| /// ]).unwrap(); |
| /// let hay = "βa1".as_bytes(); |
| /// let mut matches = vec![]; |
| /// for index in set.matches(hay) { |
| /// matches.push(index); |
| /// } |
| /// assert_eq!(matches, vec![0, 1, 3]); |
| /// ``` |
| #[derive(Debug)] |
| pub struct SetMatchesIntoIter { |
| patset: PatternSet, |
| it: core::ops::Range<usize>, |
| } |
| |
| impl Iterator for SetMatchesIntoIter { |
| type Item = usize; |
| |
| fn next(&mut self) -> Option<usize> { |
| loop { |
| let id = self.it.next()?; |
| if self.patset.contains(PatternID::new_unchecked(id)) { |
| return Some(id); |
| } |
| } |
| } |
| |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.it.size_hint() |
| } |
| } |
| |
| impl DoubleEndedIterator for SetMatchesIntoIter { |
| fn next_back(&mut self) -> Option<usize> { |
| loop { |
| let id = self.it.next_back()?; |
| if self.patset.contains(PatternID::new_unchecked(id)) { |
| return Some(id); |
| } |
| } |
| } |
| } |
| |
| impl core::iter::FusedIterator for SetMatchesIntoIter {} |
| |
| /// A borrowed iterator over the set of matches from a regex set. |
| /// |
| /// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that |
| /// created this iterator. |
| /// |
| /// This will always produces matches in ascending order, where the index |
| /// corresponds to the index of the regex that matched with respect to its |
| /// position when initially building the set. |
| /// |
| /// This iterator is created by the [`SetMatches::iter`] method. |
| #[derive(Clone, Debug)] |
| pub struct SetMatchesIter<'a>(PatternSetIter<'a>); |
| |
| impl<'a> Iterator for SetMatchesIter<'a> { |
| type Item = usize; |
| |
| fn next(&mut self) -> Option<usize> { |
| self.0.next().map(|pid| pid.as_usize()) |
| } |
| |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.0.size_hint() |
| } |
| } |
| |
| impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { |
| fn next_back(&mut self) -> Option<usize> { |
| self.0.next_back().map(|pid| pid.as_usize()) |
| } |
| } |
| |
| impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} |
| |
| impl core::fmt::Debug for RegexSet { |
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| write!(f, "RegexSet({:?})", self.patterns()) |
| } |
| } |