vendor/regex-0.1.80/src/re_set.rs - toolchain/rustc - Git at Google

 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 macro_rules! define_set {
     ($name:ident, $exec_build:expr, $text_ty:ty, $as_bytes:expr) => {
         pub mod $name {
             use std::fmt;
             use std::iter;
             use std::slice;
             use std::vec;

             use error::Error;
             use exec::{Exec, ExecBuilder};
             use re_trait::RegularExpression;

 /// Match multiple (possibly overlapping) regular expressions in a single scan.
 ///
 /// A regex set corresponds to the union of two or more regular expressions.
 /// That is, a regex set will match text where at least one of its
 /// constituent regular expressions matches. A regex set as its formulated here
 /// provides a touch more power: it will also report *which* regular
 /// expressions in the set match. Indeed, this is the key difference between
 /// regex sets and a single `Regex` with many alternates, since only one
 /// alternate can match at a time.
 ///
 /// For example, consider regular expressions to match email addresses and
 /// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
 /// regex set is constructed from those regexes, then searching the text
 /// `[email protected]` will report both regexes as matching. Of course, one
 /// could accomplish this by compiling each regex on its own and doing two
 /// searches over the text. The key advantage of using a regex set is that it
 /// will report the matching regexes using a *single pass through the text*.
 /// If one has hundreds or thousands of regexes to match repeatedly (like a URL
 /// router for a complex web application or a user agent matcher), then a regex
 /// set can realize huge performance gains.
 ///
 /// # Example
 ///
 /// This shows how the above two regexes (for matching email addresses and
 /// domains) might work:
 ///
 /// ```rust
 /// # use regex::RegexSet;
 /// let set = RegexSet::new(&[
 ///     r"[a-z]+@[a-z]+\.(com|org|net)",
 ///     r"[a-z]+\.(com|org|net)",
 /// ]).unwrap();
 ///
 /// // Ask whether any regexes in the set match.
 /// assert!(set.is_match("[email protected]"));
 ///
 /// // Identify which regexes in the set match.
 /// let matches: Vec<_> = set.matches("[email protected]").into_iter().collect();
 /// assert_eq!(vec![0, 1], matches);
 ///
 /// // Try again, but with text that only matches one of the regexes.
 /// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
 /// assert_eq!(vec![1], matches);
 ///
 /// // Try again, but with text that doesn't match any regex in the set.
 /// let matches: Vec<_> = set.matches("example").into_iter().collect();
 /// assert!(matches.is_empty());
 /// ```
 ///
 /// Note that it would be possible to adapt the above example to using `Regex`
 /// with an expression like:
 ///
 /// ```ignore
 /// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
 /// ```
 ///
 /// After a match, one could then inspect the capture groups to figure out
 /// which alternates matched. The problem is that it is hard to make this
 /// approach scale when there are many regexes since the overlap between each
 /// alternate isn't always obvious to reason about.
 ///
 /// # Limitations
 ///
 /// Regex sets are limited to answering the following two questions:
 ///
 /// 1. Does any regex in the set match?
 /// 2. If so, which regexes in the set match?
 ///
 /// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
 /// since the matching engines can stop after the first match is found.
 ///
 /// Other features like finding the location of successive matches or their
 /// sub-captures aren't supported. If you need this functionality, the
 /// recommended approach is to compile each regex in the set independently and
 /// selectively match them based on which regexes in the set matched.
 ///
 /// # Performance
 ///
 /// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
 /// search takes `O(mn)` time, where `m` is proportional to the size of the
 /// regex set and `n` is proportional to the length of the search text.
 #[derive(Clone)]
 pub struct RegexSet(Exec);

 impl RegexSet {
     /// Create a new regex set with the given regular expressions.
     ///
     /// This takes an iterator of `S`, where `S` is something that can produce
     /// a `&str`. If any of the strings in the iterator are not valid regular
     /// expressions, then an error is returned.
     ///
     /// # Example
     ///
     /// Create a new regex set from an iterator of strings:
     ///
     /// ```rust
     /// # use regex::RegexSet;
     /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
     /// assert!(set.is_match("foo"));
     /// ```
     pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
             where S: AsRef<str>, I: IntoIterator<Item=S> {
         let exec = try!($exec_build(exprs));
         Ok(RegexSet(exec))
     }

     /// Returns true if and only if one of the regexes in this set matches
     /// the text given.
     ///
     /// This method should be preferred if you only need to test whether any
     /// of the regexes in the set should match, but don't care about *which*
     /// regexes matched. This is because the underlying matching engine will
     /// quit immediately after seeing the first match instead of continuing to
     /// find all matches.
     ///
     /// Note that as with searches using `Regex`, the expression is unanchored
     /// by default. That is, if the regex does not start with `^` or `\A`, or
     /// end with `$` or `\z`, then it is permitted to match anywhere in the
     /// text.
     ///
     /// # Example
     ///
     /// Tests whether a set matches some text:
     ///
     /// ```rust
     /// # use regex::RegexSet;
     /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
     /// assert!(set.is_match("foo"));
     /// assert!(!set.is_match("☃"));
     /// ```
     pub fn is_match(&self, text: $text_ty) -> bool {
         self.0.searcher().is_match_at($as_bytes(text), 0)
     }

     /// Returns the set of regular expressions that match in the given text.
     ///
     /// The set returned contains the index of each regular expression that
     /// matches in the given text. The index is in correspondence with the
     /// order of regular expressions given to `RegexSet`'s constructor.
     ///
     /// The set can also be used to iterate over the matched indices.
     ///
     /// Note that as with searches using `Regex`, the expression is unanchored
     /// by default. That is, if the regex does not start with `^` or `\A`, or
     /// end with `$` or `\z`, then it is permitted to match anywhere in the
     /// text.
     ///
     /// # Example
     ///
     /// Tests which regular expressions match the given text:
     ///
     /// ```rust
     /// # use regex::RegexSet;
     /// let set = RegexSet::new(&[
     ///     r"\w+",
     ///     r"\d+",
     ///     r"\pL+",
     ///     r"foo",
     ///     r"bar",
     ///     r"barfoo",
     ///     r"foobar",
     /// ]).unwrap();
     /// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
     /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
     ///
     /// // You can also test whether a particular regex matched:
     /// let matches = set.matches("foobar");
     /// assert!(!matches.matched(5));
     /// assert!(matches.matched(6));
     /// ```
     pub fn matches(&self, text: $text_ty) -> SetMatches {
         let mut matches = vec![false; self.0.regex_strings().len()];
         let any = self.0.searcher().many_matches_at(
             &mut matches, $as_bytes(text), 0);
         SetMatches {
             matched_any: any,
             matches: matches,
         }
     }

     /// Returns the total number of regular expressions in this set.
     pub fn len(&self) -> usize {
         self.0.regex_strings().len()
     }
 }

 /// A set of matches returned by a regex set.
 #[derive(Clone, Debug)]
 pub struct SetMatches {
     matched_any: bool,
     matches: Vec<bool>,
 }

 impl SetMatches {
     /// Whether this set contains any matches.
     pub fn matched_any(&self) -> bool {
         self.matched_any
     }

     /// Whether the regex at the given index matched.
     ///
     /// The index for a regex is determined by its insertion order upon the
     /// initial construction of a `RegexSet`, starting at `0`.
     ///
     /// # Panics
     ///
     /// If `regex_index` is greater than or equal to `self.len()`.
     pub fn matched(&self, regex_index: usize) -> bool {
         self.matches[regex_index]
     }

     /// The total number of regexes in the set that created these matches.
     pub fn len(&self) -> usize {
         self.matches.len()
     }

     /// Returns an iterator over indexes in the regex that matched.
     pub fn iter(&self) -> SetMatchesIter {
         SetMatchesIter((&*self.matches).into_iter().enumerate())
     }
 }

 impl IntoIterator for SetMatches {
     type IntoIter = SetMatchesIntoIter;
     type Item = usize;

     fn into_iter(self) -> Self::IntoIter {
         SetMatchesIntoIter(self.matches.into_iter().enumerate())
     }
 }

 impl<'a> IntoIterator for &'a SetMatches {
     type IntoIter = SetMatchesIter<'a>;
     type Item = usize;

     fn into_iter(self) -> Self::IntoIter {
         self.iter()
     }
 }

 /// An owned iterator over the set of matches from a regex set.
 pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);

 impl Iterator for SetMatchesIntoIter {
     type Item = usize;

     fn next(&mut self) -> Option<usize> {
         loop {
             match self.0.next() {
                 None => return None,
                 Some((_, false)) => {}
                 Some((i, true)) => return Some(i),
             }
         }
     }
 }

 impl DoubleEndedIterator for SetMatchesIntoIter {
     fn next_back(&mut self) -> Option<usize> {
         loop {
             match self.0.next_back() {
                 None => return None,
                 Some((_, false)) => {}
                 Some((i, true)) => return Some(i),
             }
         }
     }
 }

 /// A borrowed iterator over the set of matches from a regex set.
 ///
 /// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
 #[derive(Clone)]
 pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);

 impl<'a> Iterator for SetMatchesIter<'a> {
     type Item = usize;

     fn next(&mut self) -> Option<usize> {
         loop {
             match self.0.next() {
                 None => return None,
                 Some((_, &false)) => {}
                 Some((i, &true)) => return Some(i),
             }
         }
     }
 }

 impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
     fn next_back(&mut self) -> Option<usize> {
         loop {
             match self.0.next_back() {
                 None => return None,
                 Some((_, &false)) => {}
                 Some((i, &true)) => return Some(i),
             }
         }
     }
 }

 #[doc(hidden)]
 impl From<Exec> for RegexSet {
     fn from(exec: Exec) -> Self {
         RegexSet(exec)
     }
 }

 impl fmt::Debug for RegexSet {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "RegexSet({:?})", self.0.regex_strings())
     }
 }

 #[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() }
 #[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text }
         }
     }
 }

 define_set! {
     unicode,
     |exprs| ExecBuilder::new_many(exprs).build(),
     &str,
     as_bytes_str
 }

 define_set! {
     bytes,
     |exprs| ExecBuilder::new_many(exprs).only_utf8(false).build(),
     &[u8],
     as_bytes_bytes
 }
	// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
	// file at the top-level directory of this distribution and at
	// http://rust-lang.org/COPYRIGHT.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	macro_rules! define_set {
	($name:ident, $exec_build:expr, $text_ty:ty, $as_bytes:expr) => {
	pub mod $name {
	use std::fmt;
	use std::iter;
	use std::slice;
	use std::vec;

	use error::Error;
	use exec::{Exec, ExecBuilder};
	use re_trait::RegularExpression;

	/// Match multiple (possibly overlapping) regular expressions in a single scan.
	///
	/// A regex set corresponds to the union of two or more regular expressions.
	/// That is, a regex set will match text where at least one of its
	/// constituent regular expressions matches. A regex set as its formulated here
	/// provides a touch more power: it will also report which regular
	/// expressions in the set match. Indeed, this is the key difference between
	/// regex sets and a single `Regex` with many alternates, since only one
	/// alternate can match at a time.
	///
	/// For example, consider regular expressions to match email addresses and
	/// domains: `[a-z]+@[a-z]+\.(com\|org\|net)` and `[a-z]+\.(com\|org\|net)`. If a
	/// regex set is constructed from those regexes, then searching the text
	/// `[email protected]` will report both regexes as matching. Of course, one
	/// could accomplish this by compiling each regex on its own and doing two
	/// searches over the text. The key advantage of using a regex set is that it
	/// will report the matching regexes using a single pass through the text.
	/// If one has hundreds or thousands of regexes to match repeatedly (like a URL
	/// router for a complex web application or a user agent matcher), then a regex
	/// set can realize huge performance gains.
	///
	/// # Example
	///
	/// This shows how the above two regexes (for matching email addresses and
	/// domains) might work:
	///
	/// ```rust
	/// # use regex::RegexSet;
	/// let set = RegexSet::new(&[
	/// r"[a-z]+@[a-z]+\.(com\|org\|net)",
	/// r"[a-z]+\.(com\|org\|net)",
	/// ]).unwrap();
	///
	/// // Ask whether any regexes in the set match.
	/// assert!(set.is_match("[email protected]"));
	///
	/// // Identify which regexes in the set match.
	/// let matches: Vec<_> = set.matches("[email protected]").into_iter().collect();
	/// assert_eq!(vec![0, 1], matches);
	///
	/// // Try again, but with text that only matches one of the regexes.
	/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
	/// assert_eq!(vec![1], matches);
	///
	/// // Try again, but with text that doesn't match any regex in the set.
	/// let matches: Vec<_> = set.matches("example").into_iter().collect();
	/// assert!(matches.is_empty());
	/// ```
	///
	/// Note that it would be possible to adapt the above example to using `Regex`
	/// with an expression like:
	///
	/// ```ignore
	/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com\|org\|net)))\|(?P<domain>[a-z]+[.](com\|org\|net))
	/// ```
	///
	/// After a match, one could then inspect the capture groups to figure out
	/// which alternates matched. The problem is that it is hard to make this
	/// approach scale when there are many regexes since the overlap between each
	/// alternate isn't always obvious to reason about.
	///
	/// # Limitations
	///
	/// Regex sets are limited to answering the following two questions:
	///
	/// 1. Does any regex in the set match?
	/// 2. If so, which regexes in the set match?
	///
	/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
	/// since the matching engines can stop after the first match is found.
	///
	/// Other features like finding the location of successive matches or their
	/// sub-captures aren't supported. If you need this functionality, the
	/// recommended approach is to compile each regex in the set independently and
	/// selectively match them based on which regexes in the set matched.
	///
	/// # Performance
	///
	/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
	/// search takes `O(mn)` time, where `m` is proportional to the size of the
	/// regex set and `n` is proportional to the length of the search text.
	#[derive(Clone)]
	pub struct RegexSet(Exec);

	impl RegexSet {
	/// Create a new regex set with the given regular expressions.
	///
	/// This takes an iterator of `S`, where `S` is something that can produce
	/// a `&str`. If any of the strings in the iterator are not valid regular
	/// expressions, then an error is returned.
	///
	/// # Example
	///
	/// Create a new regex set from an iterator of strings:
	///
	/// ```rust
	/// # use regex::RegexSet;
	/// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
	/// assert!(set.is_match("foo"));
	/// ```
	pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
	where S: AsRef<str>, I: IntoIterator<Item=S> {
	let exec = try!($exec_build(exprs));
	Ok(RegexSet(exec))
	}

	/// Returns true if and only if one of the regexes in this set matches
	/// the text given.
	///
	/// This method should be preferred if you only need to test whether any
	/// of the regexes in the set should match, but don't care about which
	/// regexes matched. This is because the underlying matching engine will
	/// quit immediately after seeing the first match instead of continuing to
	/// find all matches.
	///
	/// Note that as with searches using `Regex`, the expression is unanchored
	/// by default. That is, if the regex does not start with `^` or `\A`, or
	/// end with `$` or `\z`, then it is permitted to match anywhere in the
	/// text.
	///
	/// # Example
	///
	/// Tests whether a set matches some text:
	///
	/// ```rust
	/// # use regex::RegexSet;
	/// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
	/// assert!(set.is_match("foo"));
	/// assert!(!set.is_match("☃"));
	/// ```
	pub fn is_match(&self, text: $text_ty) -> bool {
	self.0.searcher().is_match_at($as_bytes(text), 0)
	}

	/// Returns the set of regular expressions that match in the given text.
	///
	/// The set returned contains the index of each regular expression that
	/// matches in the given text. The index is in correspondence with the
	/// order of regular expressions given to `RegexSet`'s constructor.
	///
	/// The set can also be used to iterate over the matched indices.
	///
	/// Note that as with searches using `Regex`, the expression is unanchored
	/// by default. That is, if the regex does not start with `^` or `\A`, or
	/// end with `$` or `\z`, then it is permitted to match anywhere in the
	/// text.
	///
	/// # Example
	///
	/// Tests which regular expressions match the given text:
	///
	/// ```rust
	/// # use regex::RegexSet;
	/// let set = RegexSet::new(&[
	/// r"\w+",
	/// r"\d+",
	/// r"\pL+",
	/// r"foo",
	/// r"bar",
	/// r"barfoo",
	/// r"foobar",
	/// ]).unwrap();
	/// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
	/// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
	///
	/// // You can also test whether a particular regex matched:
	/// let matches = set.matches("foobar");
	/// assert!(!matches.matched(5));
	/// assert!(matches.matched(6));
	/// ```
	pub fn matches(&self, text: $text_ty) -> SetMatches {
	let mut matches = vec![false; self.0.regex_strings().len()];
	let any = self.0.searcher().many_matches_at(
	&mut matches, $as_bytes(text), 0);
	SetMatches {
	matched_any: any,
	matches: matches,
	}
	}

	/// Returns the total number of regular expressions in this set.
	pub fn len(&self) -> usize {
	self.0.regex_strings().len()
	}
	}

	/// A set of matches returned by a regex set.
	#[derive(Clone, Debug)]
	pub struct SetMatches {
	matched_any: bool,
	matches: Vec<bool>,
	}

	impl SetMatches {
	/// Whether this set contains any matches.
	pub fn matched_any(&self) -> bool {
	self.matched_any
	}

	/// Whether the regex at the given index matched.
	///
	/// The index for a regex is determined by its insertion order upon the
	/// initial construction of a `RegexSet`, starting at `0`.
	///
	/// # Panics
	///
	/// If `regex_index` is greater than or equal to `self.len()`.
	pub fn matched(&self, regex_index: usize) -> bool {
	self.matches[regex_index]
	}

	/// The total number of regexes in the set that created these matches.
	pub fn len(&self) -> usize {
	self.matches.len()
	}

	/// Returns an iterator over indexes in the regex that matched.
	pub fn iter(&self) -> SetMatchesIter {
	SetMatchesIter((&*self.matches).into_iter().enumerate())
	}
	}

	impl IntoIterator for SetMatches {
	type IntoIter = SetMatchesIntoIter;
	type Item = usize;

	fn into_iter(self) -> Self::IntoIter {
	SetMatchesIntoIter(self.matches.into_iter().enumerate())
	}
	}

	impl<'a> IntoIterator for &'a SetMatches {
	type IntoIter = SetMatchesIter<'a>;
	type Item = usize;

	fn into_iter(self) -> Self::IntoIter {
	self.iter()
	}
	}

	/// An owned iterator over the set of matches from a regex set.
	pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);

	impl Iterator for SetMatchesIntoIter {
	type Item = usize;

	fn next(&mut self) -> Option<usize> {
	loop {
	match self.0.next() {
	None => return None,
	Some((_, false)) => {}
	Some((i, true)) => return Some(i),
	}
	}
	}
	}

	impl DoubleEndedIterator for SetMatchesIntoIter {
	fn next_back(&mut self) -> Option<usize> {
	loop {
	match self.0.next_back() {
	None => return None,
	Some((_, false)) => {}
	Some((i, true)) => return Some(i),
	}
	}
	}
	}

	/// A borrowed iterator over the set of matches from a regex set.
	///
	/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
	#[derive(Clone)]
	pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);

	impl<'a> Iterator for SetMatchesIter<'a> {
	type Item = usize;

	fn next(&mut self) -> Option<usize> {
	loop {
	match self.0.next() {
	None => return None,
	Some((_, &false)) => {}
	Some((i, &true)) => return Some(i),
	}
	}
	}
	}

	impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
	fn next_back(&mut self) -> Option<usize> {
	loop {
	match self.0.next_back() {
	None => return None,
	Some((_, &false)) => {}
	Some((i, &true)) => return Some(i),
	}
	}
	}
	}

	#[doc(hidden)]
	impl From<Exec> for RegexSet {
	fn from(exec: Exec) -> Self {
	RegexSet(exec)
	}
	}

	impl fmt::Debug for RegexSet {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	write!(f, "RegexSet({:?})", self.0.regex_strings())
	}
	}

	#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() }
	#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text }
	}
	}
	}

	define_set! {
	unicode,
	\|exprs\| ExecBuilder::new_many(exprs).build(),
	&str,
	as_bytes_str
	}

	define_set! {
	bytes,
	\|exprs\| ExecBuilder::new_many(exprs).only_utf8(false).build(),
	&[u8],
	as_bytes_bytes
	}