vendor/regex-0.1.80/src/literals.rs - toolchain/rustc - Git at Google

 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use std::mem;

 use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
 use memchr::{memchr, memchr2, memchr3};
 use syntax;

 use freqs::BYTE_FREQUENCIES;

 use simd_accel::teddy128::{Teddy, is_teddy_128_available};

 /// A prefix extracted from a compiled regular expression.
 ///
 /// A regex prefix is a set of literal strings that *must* be matched at the
 /// beginning of a regex in order for the entire regex to match.
 ///
 /// There are a variety of ways to efficiently scan the search text for a
 /// prefix. Currently, there are three implemented:
 ///
 /// 1. The prefix is a single byte. Just use memchr.
 /// 2. If the prefix is a set of two or more single byte prefixes, then
 ///    a single sparse map is created. Checking if there is a match is a lookup
 ///    in this map for each byte in the search text.
 /// 3. In all other cases, build an Aho-Corasick automaton.
 ///
 /// It's possible that there's room here for other substring algorithms,
 /// such as Boyer-Moore for single-set prefixes greater than 1, or Rabin-Karp
 /// for small sets of same-length prefixes.
 #[derive(Clone, Debug)]
 pub struct LiteralSearcher {
     complete: bool,
     lcp: SingleSearch,
     lcs: SingleSearch,
     matcher: Matcher,
 }

 #[derive(Clone, Debug)]
 enum Matcher {
     /// No literals. (Never advances through the input.)
     Empty,
     /// A set of four or more single byte literals.
     Bytes(SingleByteSet),
     /// A single substring. (Likely using Boyer-Moore with memchr.)
     Single(SingleSearch),
     /// An Aho-Corasick automaton.
     AC(FullAcAutomaton<syntax::Lit>),
     /// A simd accelerated multiple string matcher.
     Teddy128(Teddy),
 }

 impl LiteralSearcher {
     /// Returns a matcher that never matches and never advances the input.
     pub fn empty() -> Self {
         Self::new(syntax::Literals::empty(), Matcher::Empty)
     }

     /// Returns a matcher for literal prefixes from the given set.
     pub fn prefixes(lits: syntax::Literals) -> Self {
         let matcher = Matcher::prefixes(&lits);
         Self::new(lits, matcher)
     }

     /// Returns a matcher for literal suffixes from the given set.
     pub fn suffixes(lits: syntax::Literals) -> Self {
         let matcher = Matcher::suffixes(&lits);
         Self::new(lits, matcher)
     }

     fn new(lits: syntax::Literals, matcher: Matcher) -> Self {
         let complete = lits.all_complete();
         LiteralSearcher {
             complete: complete,
             lcp: SingleSearch::new(lits.longest_common_prefix().to_vec()),
             lcs: SingleSearch::new(lits.longest_common_suffix().to_vec()),
             matcher: matcher,
         }
     }

     /// Returns true if all matches comprise the entire regular expression.
     ///
     /// This does not necessarily mean that a literal match implies a match
     /// of the regular expression. For example, the regular expresison `^a`
     /// is comprised of a single complete literal `a`, but the regular
     /// expression demands that it only match at the beginning of a string.
     pub fn complete(&self) -> bool {
         self.complete && self.len() > 0
     }

     /// Find the position of a literal in `haystack` if it exists.
     #[inline(always)] // reduces constant overhead
     pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> {
         use self::Matcher::*;
         match self.matcher {
             Empty => Some((0, 0)),
             Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
             Single(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
             AC(ref aut) => aut.find(haystack).next().map(|m| (m.start, m.end)),
             Teddy128(ref ted) => ted.find(haystack).map(|m| (m.start, m.end)),
         }
     }

     /// Like find, except matches must start at index `0`.
     pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> {
         for lit in self.iter() {
             if lit.len() > haystack.len() {
                 continue;
             }
             if lit == &haystack[0..lit.len()] {
                 return Some((0, lit.len()));
             }
         }
         None
     }

     /// Like find, except matches must end at index `haystack.len()`.
     pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> {
         for lit in self.iter() {
             if lit.len() > haystack.len() {
                 continue;
             }
             if lit == &haystack[haystack.len() - lit.len()..] {
                 return Some((haystack.len() - lit.len(), haystack.len()));
             }
         }
         None
     }

     /// Returns an iterator over all literals to be matched.
     pub fn iter(&self) -> LiteralIter {
         match self.matcher {
             Matcher::Empty => LiteralIter::Empty,
             Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
             Matcher::Single(ref s) => LiteralIter::Single(&s.pat),
             Matcher::AC(ref ac) => LiteralIter::AC(ac.patterns()),
             Matcher::Teddy128(ref ted) => {
                 LiteralIter::Teddy128(ted.patterns())
             }
         }
     }

     /// Returns a matcher for the longest common prefix of this matcher.
     pub fn lcp(&self) -> &SingleSearch {
         &self.lcp
     }

     /// Returns a matcher for the longest common suffix of this matcher.
     pub fn lcs(&self) -> &SingleSearch {
         &self.lcs
     }

     /// Returns true iff this prefix is empty.
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }

     /// Returns the number of prefixes in this machine.
     pub fn len(&self) -> usize {
         use self::Matcher::*;
         match self.matcher {
             Empty => 0,
             Bytes(ref sset) => sset.dense.len(),
             Single(_) => 1,
             AC(ref aut) => aut.len(),
             Teddy128(ref ted) => ted.len(),
         }
     }

     /// Return the approximate heap usage of literals in bytes.
     pub fn approximate_size(&self) -> usize {
         use self::Matcher::*;
         match self.matcher {
             Empty => 0,
             Bytes(ref sset) => sset.approximate_size(),
             Single(ref single) => single.approximate_size(),
             AC(ref aut) => aut.heap_bytes(),
             Teddy128(ref ted) => ted.approximate_size(),
         }
     }
 }

 impl Matcher {
     fn prefixes(lits: &syntax::Literals) -> Self {
         let sset = SingleByteSet::prefixes(&lits);
         Matcher::new(lits, sset)
     }

     fn suffixes(lits: &syntax::Literals) -> Self {
         let sset = SingleByteSet::suffixes(&lits);
         Matcher::new(lits, sset)
     }

     fn new(lits: &syntax::Literals, sset: SingleByteSet) -> Self {
         if lits.literals().is_empty() {
             return Matcher::Empty;
         }
         if sset.dense.len() >= 26 {
             // Avoid trying to match a large number of single bytes.
             // This is *very* sensitive to a frequency analysis comparison
             // between the bytes in sset and the composition of the haystack.
             // No matter the size of sset, if its members all are rare in the
             // haystack, then it'd be worth using it. How to tune this... IDK.
             // ---AG
             return Matcher::Empty;
         }
         if sset.complete {
             return Matcher::Bytes(sset);
         }
         if lits.literals().len() == 1 {
             let lit = lits.literals()[0].to_vec();
             return Matcher::Single(SingleSearch::new(lit));
         }
         let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
         if is_teddy_128_available() && !is_aho_corasick_fast {
             // Only try Teddy if Aho-Corasick can't use memchr on an ASCII
             // byte. Also, in its current form, Teddy doesn't scale well to
             // lots of literals.
             //
             // We impose the ASCII restriction since an alternation of
             // non-ASCII string literals in the same language is likely to all
             // start with the same byte. Even worse, the corpus being searched
             // probably has a similar composition, which ends up completely
             // negating the benefit of memchr.
             const MAX_TEDDY_LITERALS: usize = 32;
             if lits.literals().len() <= MAX_TEDDY_LITERALS {
                 if let Some(ted) = Teddy::new(lits) {
                     return Matcher::Teddy128(ted);
                 }
             }
             // Fallthrough to ol' reliable Aho-Corasick...
         }
         let pats = lits.literals().to_owned();
         Matcher::AC(AcAutomaton::new(pats).into_full())
     }
 }

 pub enum LiteralIter<'a> {
     Empty,
     Bytes(&'a [u8]),
     Single(&'a [u8]),
     AC(&'a [syntax::Lit]),
     Teddy128(&'a [Vec<u8>]),
 }

 impl<'a> Iterator for LiteralIter<'a> {
     type Item = &'a [u8];

     fn next(&mut self) -> Option<Self::Item> {
         match *self {
             LiteralIter::Empty => None,
             LiteralIter::Bytes(ref mut many) => {
                 if many.is_empty() {
                     None
                 } else {
                     let next = &many[0..1];
                     *many = &many[1..];
                     Some(next)
                 }
             }
             LiteralIter::Single(ref mut one) => {
                 if one.is_empty() {
                     None
                 } else {
                     let next = &one[..];
                     *one = &[];
                     Some(next)
                 }
             }
             LiteralIter::AC(ref mut lits) => {
                 if lits.is_empty() {
                     None
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
                     Some(&**next)
                 }
             }
             LiteralIter::Teddy128(ref mut lits) => {
                 if lits.is_empty() {
                     None
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
                     Some(&**next)
                 }
             }
         }
     }
 }

 #[derive(Clone, Debug)]
 struct SingleByteSet {
     sparse: Vec<bool>,
     dense: Vec<u8>,
     complete: bool,
     all_ascii: bool,
 }

 impl SingleByteSet {
     fn new() -> SingleByteSet {
         SingleByteSet {
             sparse: vec![false; 256],
             dense: vec![],
             complete: true,
             all_ascii: true,
         }
     }

     fn prefixes(lits: &syntax::Literals) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
         for lit in lits.literals() {
             sset.complete = sset.complete && lit.len() == 1;
             if let Some(&b) = lit.get(0) {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
                     }
                     sset.dense.push(b);
                     sset.sparse[b as usize] = true;
                 }
             }
         }
         sset
     }

     fn suffixes(lits: &syntax::Literals) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
         for lit in lits.literals() {
             sset.complete = sset.complete && lit.len() == 1;
             if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
                     }
                     sset.dense.push(b);
                     sset.sparse[b as usize] = true;
                 }
             }
         }
         sset
     }

     /// Faster find that special cases certain sizes to use memchr.
     #[inline(always)] // reduces constant overhead
     fn find(&self, text: &[u8]) -> Option<usize> {
         match self.dense.len() {
             0 => None,
             1 => memchr(self.dense[0], text),
             2 => memchr2(self.dense[0], self.dense[1], text),
             3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text),
             _ => self._find(text),
         }
     }

     /// Generic find that works on any sized set.
     fn _find(&self, haystack: &[u8]) -> Option<usize> {
         for (i, &b) in haystack.iter().enumerate() {
             if self.sparse[b as usize] {
                 return Some(i);
             }
         }
         None
     }

     fn approximate_size(&self) -> usize {
         (self.dense.len() * mem::size_of::<u8>())
         + (self.sparse.len() * mem::size_of::<bool>())
     }
 }

 /// Provides an implementation of fast subtring search.
 ///
 /// This particular implementation is a Boyer-Moore variant, based on the
 /// "tuned boyer moore" search from (Hume & Sunday, 1991). It has been tweaked
 /// slightly to better use memchr.
 ///
 /// memchr is so fast that we do everything we can to keep the loop in memchr
 /// for as long as possible. The easiest way to do this is to intelligently
 /// pick the byte to send to memchr. The best byte is the byte that occurs
 /// least frequently in the haystack. Since doing frequency analysis on the
 /// haystack is far too expensive, we compute a set of fixed frequencies up
 /// front and hard code them in src/freqs.rs. Frequency analysis is done via
 /// scripts/frequencies.py.
 ///
 /// TODO(burntsushi): Add some amount of shifting to this.
 #[derive(Clone, Debug)]
 pub struct SingleSearch {
     /// The pattern.
     pat: Vec<u8>,
     /// The number of Unicode characters in the pattern. This is useful for
     /// determining the effective length of a pattern when deciding which
     /// optimizations to perform. A trailing incomplete UTF-8 sequence counts
     /// as one character.
     char_len: usize,
     /// The rarest byte in the pattern, according to pre-computed frequency
     /// analysis.
     rare1: u8,
     /// The offset of the rarest byte in `pat`.
     rare1i: usize,
     /// The second rarest byte in the pattern, according to pre-computed
     /// frequency analysis. (This may be equivalent to the rarest byte.)
     ///
     /// The second rarest byte is used as a type of guard for quickly detecting
     /// a mismatch after memchr locates an instance of the rarest byte. This
     /// is a hedge against pathological cases where the pre-computed frequency
     /// analysis may be off. (But of course, does not prevent *all*
     /// pathological cases.)
     rare2: u8,
     /// The offset of the second rarest byte in `pat`.
     rare2i: usize,
 }

 impl SingleSearch {
     fn new(pat: Vec<u8>) -> SingleSearch {
         fn freq_rank(b: u8) -> usize { BYTE_FREQUENCIES[b as usize] as usize }

         if pat.is_empty() {
             return SingleSearch::empty();
         }

         // Find the rarest two bytes. Try to make them distinct (but it's not
         // required).
         let mut rare1 = pat[0];
         let mut rare2 = pat[0];
         for b in pat[1..].iter().cloned() {
             if freq_rank(b) < freq_rank(rare1) {
                 rare1 = b;
             }
         }
         for &b in &pat {
             if rare1 == rare2 {
                 rare2 = b
             } else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
                 rare2 = b;
             }
         }

         // And find the offsets of their last occurrences.
         let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap();
         let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap();

         let char_len = char_len_lossy(&pat);
         SingleSearch {
             pat: pat,
             char_len: char_len,
             rare1: rare1,
             rare1i: rare1i,
             rare2: rare2,
             rare2i: rare2i,
         }
     }

     fn empty() -> SingleSearch {
         SingleSearch {
             pat: vec![],
             char_len: 0,
             rare1: 0,
             rare1i: 0,
             rare2: 0,
             rare2i: 0,
         }
     }

     #[inline(always)] // reduces constant overhead
     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
         let pat = &*self.pat;
         if haystack.len() < pat.len() || pat.is_empty() {
             return None;
         }
         let mut i = self.rare1i;
         while i < haystack.len() {
             i += match memchr(self.rare1, &haystack[i..]) {
                 None => return None,
                 Some(i) => i,
             };
             let start = i - self.rare1i;
             let end = start + pat.len();
             if end > haystack.len() {
                 return None;
             }
             let aligned = &haystack[start..end];
             if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
                 return Some(start);
             }
             i += 1;
         }
         None
     }

     #[inline(always)] // reduces constant overhead
     pub fn is_suffix(&self, text: &[u8]) -> bool {
         if text.len() < self.len() {
             return false;
         }
         &text[text.len() - self.len()..] == &*self.pat
     }

     pub fn len(&self) -> usize {
         self.pat.len()
     }

     pub fn char_len(&self) -> usize {
         self.char_len
     }

     fn approximate_size(&self) -> usize {
         self.pat.len() * mem::size_of::<u8>()
     }
 }

 fn char_len_lossy(bytes: &[u8]) -> usize {
     String::from_utf8_lossy(bytes).chars().count()
 }
	// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
	// file at the top-level directory of this distribution and at
	// http://rust-lang.org/COPYRIGHT.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	use std::mem;

	use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
	use memchr::{memchr, memchr2, memchr3};
	use syntax;

	use freqs::BYTE_FREQUENCIES;

	use simd_accel::teddy128::{Teddy, is_teddy_128_available};

	/// A prefix extracted from a compiled regular expression.
	///
	/// A regex prefix is a set of literal strings that must be matched at the
	/// beginning of a regex in order for the entire regex to match.
	///
	/// There are a variety of ways to efficiently scan the search text for a
	/// prefix. Currently, there are three implemented:
	///
	/// 1. The prefix is a single byte. Just use memchr.
	/// 2. If the prefix is a set of two or more single byte prefixes, then
	/// a single sparse map is created. Checking if there is a match is a lookup
	/// in this map for each byte in the search text.
	/// 3. In all other cases, build an Aho-Corasick automaton.
	///
	/// It's possible that there's room here for other substring algorithms,
	/// such as Boyer-Moore for single-set prefixes greater than 1, or Rabin-Karp
	/// for small sets of same-length prefixes.
	#[derive(Clone, Debug)]
	pub struct LiteralSearcher {
	complete: bool,
	lcp: SingleSearch,
	lcs: SingleSearch,
	matcher: Matcher,
	}

	#[derive(Clone, Debug)]
	enum Matcher {
	/// No literals. (Never advances through the input.)
	Empty,
	/// A set of four or more single byte literals.
	Bytes(SingleByteSet),
	/// A single substring. (Likely using Boyer-Moore with memchr.)
	Single(SingleSearch),
	/// An Aho-Corasick automaton.
	AC(FullAcAutomaton<syntax::Lit>),
	/// A simd accelerated multiple string matcher.
	Teddy128(Teddy),
	}

	impl LiteralSearcher {
	/// Returns a matcher that never matches and never advances the input.
	pub fn empty() -> Self {
	Self::new(syntax::Literals::empty(), Matcher::Empty)
	}

	/// Returns a matcher for literal prefixes from the given set.
	pub fn prefixes(lits: syntax::Literals) -> Self {
	let matcher = Matcher::prefixes(&lits);
	Self::new(lits, matcher)
	}

	/// Returns a matcher for literal suffixes from the given set.
	pub fn suffixes(lits: syntax::Literals) -> Self {
	let matcher = Matcher::suffixes(&lits);
	Self::new(lits, matcher)
	}

	fn new(lits: syntax::Literals, matcher: Matcher) -> Self {
	let complete = lits.all_complete();
	LiteralSearcher {
	complete: complete,
	lcp: SingleSearch::new(lits.longest_common_prefix().to_vec()),
	lcs: SingleSearch::new(lits.longest_common_suffix().to_vec()),
	matcher: matcher,
	}
	}

	/// Returns true if all matches comprise the entire regular expression.
	///
	/// This does not necessarily mean that a literal match implies a match
	/// of the regular expression. For example, the regular expresison `^a`
	/// is comprised of a single complete literal `a`, but the regular
	/// expression demands that it only match at the beginning of a string.
	pub fn complete(&self) -> bool {
	self.complete && self.len() > 0
	}

	/// Find the position of a literal in `haystack` if it exists.
	#[inline(always)] // reduces constant overhead
	pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> {
	use self::Matcher::*;
	match self.matcher {
	Empty => Some((0, 0)),
	Bytes(ref sset) => sset.find(haystack).map(\|i\| (i, i + 1)),
	Single(ref s) => s.find(haystack).map(\|i\| (i, i + s.len())),
	AC(ref aut) => aut.find(haystack).next().map(\|m\| (m.start, m.end)),
	Teddy128(ref ted) => ted.find(haystack).map(\|m\| (m.start, m.end)),
	}
	}

	/// Like find, except matches must start at index `0`.
	pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> {
	for lit in self.iter() {
	if lit.len() > haystack.len() {
	continue;
	}
	if lit == &haystack[0..lit.len()] {
	return Some((0, lit.len()));
	}
	}
	None
	}

	/// Like find, except matches must end at index `haystack.len()`.
	pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> {
	for lit in self.iter() {
	if lit.len() > haystack.len() {
	continue;
	}
	if lit == &haystack[haystack.len() - lit.len()..] {
	return Some((haystack.len() - lit.len(), haystack.len()));
	}
	}
	None
	}

	/// Returns an iterator over all literals to be matched.
	pub fn iter(&self) -> LiteralIter {
	match self.matcher {
	Matcher::Empty => LiteralIter::Empty,
	Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
	Matcher::Single(ref s) => LiteralIter::Single(&s.pat),
	Matcher::AC(ref ac) => LiteralIter::AC(ac.patterns()),
	Matcher::Teddy128(ref ted) => {
	LiteralIter::Teddy128(ted.patterns())
	}
	}
	}

	/// Returns a matcher for the longest common prefix of this matcher.
	pub fn lcp(&self) -> &SingleSearch {
	&self.lcp
	}

	/// Returns a matcher for the longest common suffix of this matcher.
	pub fn lcs(&self) -> &SingleSearch {
	&self.lcs
	}

	/// Returns true iff this prefix is empty.
	pub fn is_empty(&self) -> bool {
	self.len() == 0
	}

	/// Returns the number of prefixes in this machine.
	pub fn len(&self) -> usize {
	use self::Matcher::*;
	match self.matcher {
	Empty => 0,
	Bytes(ref sset) => sset.dense.len(),
	Single(_) => 1,
	AC(ref aut) => aut.len(),
	Teddy128(ref ted) => ted.len(),
	}
	}

	/// Return the approximate heap usage of literals in bytes.
	pub fn approximate_size(&self) -> usize {
	use self::Matcher::*;
	match self.matcher {
	Empty => 0,
	Bytes(ref sset) => sset.approximate_size(),
	Single(ref single) => single.approximate_size(),
	AC(ref aut) => aut.heap_bytes(),
	Teddy128(ref ted) => ted.approximate_size(),
	}
	}
	}

	impl Matcher {
	fn prefixes(lits: &syntax::Literals) -> Self {
	let sset = SingleByteSet::prefixes(&lits);
	Matcher::new(lits, sset)
	}

	fn suffixes(lits: &syntax::Literals) -> Self {
	let sset = SingleByteSet::suffixes(&lits);
	Matcher::new(lits, sset)
	}

	fn new(lits: &syntax::Literals, sset: SingleByteSet) -> Self {
	if lits.literals().is_empty() {
	return Matcher::Empty;
	}
	if sset.dense.len() >= 26 {
	// Avoid trying to match a large number of single bytes.
	// This is very sensitive to a frequency analysis comparison
	// between the bytes in sset and the composition of the haystack.
	// No matter the size of sset, if its members all are rare in the
	// haystack, then it'd be worth using it. How to tune this... IDK.
	// ---AG
	return Matcher::Empty;
	}
	if sset.complete {
	return Matcher::Bytes(sset);
	}
	if lits.literals().len() == 1 {
	let lit = lits.literals()[0].to_vec();
	return Matcher::Single(SingleSearch::new(lit));
	}
	let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
	if is_teddy_128_available() && !is_aho_corasick_fast {
	// Only try Teddy if Aho-Corasick can't use memchr on an ASCII
	// byte. Also, in its current form, Teddy doesn't scale well to
	// lots of literals.
	//
	// We impose the ASCII restriction since an alternation of
	// non-ASCII string literals in the same language is likely to all
	// start with the same byte. Even worse, the corpus being searched
	// probably has a similar composition, which ends up completely
	// negating the benefit of memchr.
	const MAX_TEDDY_LITERALS: usize = 32;
	if lits.literals().len() <= MAX_TEDDY_LITERALS {
	if let Some(ted) = Teddy::new(lits) {
	return Matcher::Teddy128(ted);
	}
	}
	// Fallthrough to ol' reliable Aho-Corasick...
	}
	let pats = lits.literals().to_owned();
	Matcher::AC(AcAutomaton::new(pats).into_full())
	}
	}

	pub enum LiteralIter<'a> {
	Empty,
	Bytes(&'a [u8]),
	Single(&'a [u8]),
	AC(&'a [syntax::Lit]),
	Teddy128(&'a [Vec<u8>]),
	}

	impl<'a> Iterator for LiteralIter<'a> {
	type Item = &'a [u8];

	fn next(&mut self) -> Option<Self::Item> {
	match *self {
	LiteralIter::Empty => None,
	LiteralIter::Bytes(ref mut many) => {
	if many.is_empty() {
	None
	} else {
	let next = &many[0..1];
	*many = &many[1..];
	Some(next)
	}
	}
	LiteralIter::Single(ref mut one) => {
	if one.is_empty() {
	None
	} else {
	let next = &one[..];
	*one = &[];
	Some(next)
	}
	}
	LiteralIter::AC(ref mut lits) => {
	if lits.is_empty() {
	None
	} else {
	let next = &lits[0];
	*lits = &lits[1..];
	Some(&**next)
	}
	}
	LiteralIter::Teddy128(ref mut lits) => {
	if lits.is_empty() {
	None
	} else {
	let next = &lits[0];
	*lits = &lits[1..];
	Some(&**next)
	}
	}
	}
	}
	}

	#[derive(Clone, Debug)]
	struct SingleByteSet {
	sparse: Vec<bool>,
	dense: Vec<u8>,
	complete: bool,
	all_ascii: bool,
	}

	impl SingleByteSet {
	fn new() -> SingleByteSet {
	SingleByteSet {
	sparse: vec![false; 256],
	dense: vec![],
	complete: true,
	all_ascii: true,
	}
	}

	fn prefixes(lits: &syntax::Literals) -> SingleByteSet {
	let mut sset = SingleByteSet::new();
	for lit in lits.literals() {
	sset.complete = sset.complete && lit.len() == 1;
	if let Some(&b) = lit.get(0) {
	if !sset.sparse[b as usize] {
	if b > 0x7F {
	sset.all_ascii = false;
	}
	sset.dense.push(b);
	sset.sparse[b as usize] = true;
	}
	}
	}
	sset
	}

	fn suffixes(lits: &syntax::Literals) -> SingleByteSet {
	let mut sset = SingleByteSet::new();
	for lit in lits.literals() {
	sset.complete = sset.complete && lit.len() == 1;
	if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
	if !sset.sparse[b as usize] {
	if b > 0x7F {
	sset.all_ascii = false;
	}
	sset.dense.push(b);
	sset.sparse[b as usize] = true;
	}
	}
	}
	sset
	}

	/// Faster find that special cases certain sizes to use memchr.
	#[inline(always)] // reduces constant overhead
	fn find(&self, text: &[u8]) -> Option<usize> {
	match self.dense.len() {
	0 => None,
	1 => memchr(self.dense[0], text),
	2 => memchr2(self.dense[0], self.dense[1], text),
	3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text),
	_ => self._find(text),
	}
	}

	/// Generic find that works on any sized set.
	fn _find(&self, haystack: &[u8]) -> Option<usize> {
	for (i, &b) in haystack.iter().enumerate() {
	if self.sparse[b as usize] {
	return Some(i);
	}
	}
	None
	}

	fn approximate_size(&self) -> usize {
	(self.dense.len() * mem::size_of::<u8>())
	+ (self.sparse.len() * mem::size_of::<bool>())
	}
	}

	/// Provides an implementation of fast subtring search.
	///
	/// This particular implementation is a Boyer-Moore variant, based on the
	/// "tuned boyer moore" search from (Hume & Sunday, 1991). It has been tweaked
	/// slightly to better use memchr.
	///
	/// memchr is so fast that we do everything we can to keep the loop in memchr
	/// for as long as possible. The easiest way to do this is to intelligently
	/// pick the byte to send to memchr. The best byte is the byte that occurs
	/// least frequently in the haystack. Since doing frequency analysis on the
	/// haystack is far too expensive, we compute a set of fixed frequencies up
	/// front and hard code them in src/freqs.rs. Frequency analysis is done via
	/// scripts/frequencies.py.
	///
	/// TODO(burntsushi): Add some amount of shifting to this.
	#[derive(Clone, Debug)]
	pub struct SingleSearch {
	/// The pattern.
	pat: Vec<u8>,
	/// The number of Unicode characters in the pattern. This is useful for
	/// determining the effective length of a pattern when deciding which
	/// optimizations to perform. A trailing incomplete UTF-8 sequence counts
	/// as one character.
	char_len: usize,
	/// The rarest byte in the pattern, according to pre-computed frequency
	/// analysis.
	rare1: u8,
	/// The offset of the rarest byte in `pat`.
	rare1i: usize,
	/// The second rarest byte in the pattern, according to pre-computed
	/// frequency analysis. (This may be equivalent to the rarest byte.)
	///
	/// The second rarest byte is used as a type of guard for quickly detecting
	/// a mismatch after memchr locates an instance of the rarest byte. This
	/// is a hedge against pathological cases where the pre-computed frequency
	/// analysis may be off. (But of course, does not prevent all
	/// pathological cases.)
	rare2: u8,
	/// The offset of the second rarest byte in `pat`.
	rare2i: usize,
	}

	impl SingleSearch {
	fn new(pat: Vec<u8>) -> SingleSearch {
	fn freq_rank(b: u8) -> usize { BYTE_FREQUENCIES[b as usize] as usize }

	if pat.is_empty() {
	return SingleSearch::empty();
	}

	// Find the rarest two bytes. Try to make them distinct (but it's not
	// required).
	let mut rare1 = pat[0];
	let mut rare2 = pat[0];
	for b in pat[1..].iter().cloned() {
	if freq_rank(b) < freq_rank(rare1) {
	rare1 = b;
	}
	}
	for &b in &pat {
	if rare1 == rare2 {
	rare2 = b
	} else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
	rare2 = b;
	}
	}

	// And find the offsets of their last occurrences.
	let rare1i = pat.iter().rposition(\|&b\| b == rare1).unwrap();
	let rare2i = pat.iter().rposition(\|&b\| b == rare2).unwrap();

	let char_len = char_len_lossy(&pat);
	SingleSearch {
	pat: pat,
	char_len: char_len,
	rare1: rare1,
	rare1i: rare1i,
	rare2: rare2,
	rare2i: rare2i,
	}
	}

	fn empty() -> SingleSearch {
	SingleSearch {
	pat: vec![],
	char_len: 0,
	rare1: 0,
	rare1i: 0,
	rare2: 0,
	rare2i: 0,
	}
	}

	#[inline(always)] // reduces constant overhead
	pub fn find(&self, haystack: &[u8]) -> Option<usize> {
	let pat = &*self.pat;
	if haystack.len() < pat.len() \|\| pat.is_empty() {
	return None;
	}
	let mut i = self.rare1i;
	while i < haystack.len() {
	i += match memchr(self.rare1, &haystack[i..]) {
	None => return None,
	Some(i) => i,
	};
	let start = i - self.rare1i;
	let end = start + pat.len();
	if end > haystack.len() {
	return None;
	}
	let aligned = &haystack[start..end];
	if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
	return Some(start);
	}
	i += 1;
	}
	None
	}

	#[inline(always)] // reduces constant overhead
	pub fn is_suffix(&self, text: &[u8]) -> bool {
	if text.len() < self.len() {
	return false;
	}
	&text[text.len() - self.len()..] == &*self.pat
	}

	pub fn len(&self) -> usize {
	self.pat.len()
	}

	pub fn char_len(&self) -> usize {
	self.char_len
	}

	fn approximate_size(&self) -> usize {
	self.pat.len() * mem::size_of::<u8>()
	}
	}

	fn char_len_lossy(bytes: &[u8]) -> usize {
	String::from_utf8_lossy(bytes).chars().count()
	}