vendor/regex-syntax-0.3.9/src/lib.rs - toolchain/rustc - Git at Google

 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 /*!
 This crate provides a regular expression parser and an abstract syntax for
 regular expressions. The abstract syntax is defined by the `Expr` type. The
 concrete syntax is enumerated in the
 [`regex`](../regex/index.html#syntax)
 crate documentation.

 Note that since this crate is first and foremost an implementation detail for
 the `regex` crate, it may experience more frequent breaking changes. It is
 exposed as a separate crate so that others may use it to do analysis on regular
 expressions or even build their own matching engine.

 # Example: parsing an expression

 Parsing a regular expression can be done with the `Expr::parse` function.

 ```rust
 use regex_syntax::Expr;

 assert_eq!(Expr::parse(r"ab|yz").unwrap(), Expr::Alternate(vec![
     Expr::Literal { chars: vec!['a', 'b'], casei: false },
     Expr::Literal { chars: vec!['y', 'z'], casei: false },
 ]));
 ```

 # Example: inspecting an error

 The parser in this crate provides very detailed error values. For example,
 if an invalid character class range is given:

 ```rust
 use regex_syntax::{Expr, ErrorKind};

 let err = Expr::parse(r"[z-a]").unwrap_err();
 assert_eq!(err.position(), 4);
 assert_eq!(err.kind(), &ErrorKind::InvalidClassRange {
     start: 'z',
     end: 'a',
 });
 ```

 Or unbalanced parentheses:

 ```rust
 use regex_syntax::{Expr, ErrorKind};

 let err = Expr::parse(r"ab(cd").unwrap_err();
 assert_eq!(err.position(), 2);
 assert_eq!(err.kind(), &ErrorKind::UnclosedParen);
 ```
 */

 #![deny(missing_docs)]
 #![cfg_attr(test, deny(warnings))]

 #[cfg(test)] extern crate quickcheck;
 #[cfg(test)] extern crate rand;

 mod literals;
 mod parser;
 mod unicode;

 use std::ascii;
 use std::char;
 use std::cmp::{Ordering, max, min};
 use std::fmt;
 use std::iter::IntoIterator;
 use std::ops::Deref;
 use std::result;
 use std::slice;
 use std::u8;
 use std::vec;

 use unicode::case_folding;

 use self::Expr::*;
 use self::Repeater::*;

 use parser::{Flags, Parser};

 pub use literals::{Literals, Lit};

 /// A regular expression abstract syntax tree.
 ///
 /// An `Expr` represents the abstract syntax of a regular expression.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum Expr {
     /// An empty regex (which never matches any text).
     Empty,
     /// A sequence of one or more literal characters to be matched.
     Literal {
         /// The characters.
         chars: Vec<char>,
         /// Whether to match case insensitively.
         casei: bool,
     },
     /// A sequence of one or more literal bytes to be matched.
     LiteralBytes {
         /// The bytes.
         bytes: Vec<u8>,
         /// Whether to match case insensitively.
         ///
         /// The interpretation of "case insensitive" in this context is
         /// ambiguous since `bytes` can be arbitrary. However, a good heuristic
         /// is to assume that the bytes are ASCII-compatible and do simple
         /// ASCII case folding.
         casei: bool,
     },
     /// Match any character.
     AnyChar,
     /// Match any character, excluding new line (`0xA`).
     AnyCharNoNL,
     /// Match any byte.
     AnyByte,
     /// Match any byte, excluding new line (`0xA`).
     AnyByteNoNL,
     /// A character class.
     Class(CharClass),
     /// A character class with byte ranges only.
     ClassBytes(ByteClass),
     /// Match the start of a line or beginning of input.
     StartLine,
     /// Match the end of a line or end of input.
     EndLine,
     /// Match the beginning of input.
     StartText,
     /// Match the end of input.
     EndText,
     /// Match a word boundary (word character on one side and a non-word
     /// character on the other).
     WordBoundary,
     /// Match a position that is not a word boundary (word or non-word
     /// characters on both sides).
     NotWordBoundary,
     /// Match an ASCII word boundary.
     WordBoundaryAscii,
     /// Match a position that is not an ASCII word boundary.
     NotWordBoundaryAscii,
     /// A group, possibly non-capturing.
     Group {
         /// The expression inside the group.
         e: Box<Expr>,
         /// The capture index (starting at `1`) only for capturing groups.
         i: Option<usize>,
         /// The capture name, only for capturing named groups.
         name: Option<String>,
     },
     /// A repeat operator (`?`, `*`, `+` or `{m,n}`).
     Repeat {
         /// The expression to be repeated. Limited to literals, `.`, classes
         /// or grouped expressions.
         e: Box<Expr>,
         /// The type of repeat operator used.
         r: Repeater,
         /// Whether the repeat is greedy (match the most) or not (match the
         /// least).
         greedy: bool,
     },
     /// A concatenation of expressions. Must be matched one after the other.
     ///
     /// N.B. A concat expression can only appear at the top-level or
     /// immediately inside a group expression.
     Concat(Vec<Expr>),
     /// An alternation of expressions. Only one must match.
     ///
     /// N.B. An alternate expression can only appear at the top-level or
     /// immediately inside a group expression.
     Alternate(Vec<Expr>),
 }

 type CaptureIndex = Option<usize>;

 type CaptureName = Option<String>;

 /// The type of a repeat operator expression.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum Repeater {
     /// Match zero or one (`?`).
     ZeroOrOne,
     /// Match zero or more (`*`).
     ZeroOrMore,
     /// Match one or more (`+`).
     OneOrMore,
     /// Match for at least `min` and at most `max` (`{m,n}`).
     ///
     /// When `max` is `None`, there is no upper bound on the number of matches.
     Range {
         /// Lower bound on the number of matches.
         min: u32,
         /// Optional upper bound on the number of matches.
         max: Option<u32>,
     },
 }

 impl Repeater {
     /// Returns true if and only if this repetition can match the empty string.
     fn matches_empty(&self) -> bool {
         use self::Repeater::*;
         match *self {
             ZeroOrOne => true,
             ZeroOrMore => true,
             OneOrMore => false,
             Range { min, .. } => min == 0,
         }
     }
 }

 /// A character class.
 ///
 /// A character class has a canonical format that the parser guarantees. Its
 /// canonical format is defined by the following invariants:
 ///
 /// 1. Given any Unicode scalar value, it is matched by *at most* one character
 ///    range in a canonical character class.
 /// 2. Every adjacent character range is separated by at least one Unicode
 ///    scalar value.
 /// 3. Given any pair of character ranges `r1` and `r2`, if
 ///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
 ///    character class.
 ///
 /// In sum, any `CharClass` produced by this crate's parser is a sorted
 /// sequence of non-overlapping ranges. This makes it possible to test whether
 /// a character is matched by a class with a binary search.
 ///
 /// If the case insensitive flag was set when parsing a character class, then
 /// simple case folding is done automatically. For example, `(?i)[a-c]` is
 /// automatically translated to `[a-cA-C]`.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CharClass {
     ranges: Vec<ClassRange>,
 }

 /// A single inclusive range in a character class.
 ///
 /// Since range boundaries are defined by Unicode scalar values, the boundaries
 /// can never be in the open interval `(0xD7FF, 0xE000)`. However, a range may
 /// *cover* codepoints that are not scalar values.
 ///
 /// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
 /// for testing whether a character is contained inside a given range.
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct ClassRange {
     /// The start character of the range.
     ///
     /// This must be less than or equal to `end`.
     pub start: char,

     /// The end character of the range.
     ///
     /// This must be greater than or equal to `start`.
     pub end: char,
 }

 /// A byte class for byte ranges only.
 ///
 /// A byte class has a canonical format that the parser guarantees. Its
 /// canonical format is defined by the following invariants:
 ///
 /// 1. Given any byte, it is matched by *at most* one byte range in a canonical
 ///    character class.
 /// 2. Every adjacent byte range is separated by at least one byte.
 /// 3. Given any pair of byte ranges `r1` and `r2`, if
 ///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
 ///    character class.
 ///
 /// In sum, any `ByteClass` produced by this crate's parser is a sorted
 /// sequence of non-overlapping ranges. This makes it possible to test whether
 /// a byte is matched by a class with a binary search.
 ///
 /// If the case insensitive flag was set when parsing a character class,
 /// then simple ASCII-only case folding is done automatically. For example,
 /// `(?i)[a-c]` is automatically translated to `[a-cA-C]`.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct ByteClass {
     ranges: Vec<ByteRange>,
 }

 /// A single inclusive range in a byte class.
 ///
 /// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
 /// for testing whether a byte is contained inside a given range.
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct ByteRange {
     /// The start byte of the range.
     ///
     /// This must be less than or equal to `end`.
     pub start: u8,

     /// The end byte of the range.
     ///
     /// This must be greater than or equal to `end`.
     pub end: u8,
 }

 /// A builder for configuring regular expression parsing.
 ///
 /// This allows setting the default values of flags and other options, such
 /// as the maximum nesting depth.
 #[derive(Clone, Debug)]
 pub struct ExprBuilder {
     flags: Flags,
     nest_limit: usize,
 }

 impl ExprBuilder {
     /// Create a new builder for configuring expression parsing.
     ///
     /// Note that all flags are disabled by default.
     pub fn new() -> ExprBuilder {
         ExprBuilder {
             flags: Flags::default(),
             nest_limit: 200,
         }
     }

     /// Set the default value for the case insensitive (`i`) flag.
     pub fn case_insensitive(mut self, yes: bool) -> ExprBuilder {
         self.flags.casei = yes;
         self
     }

     /// Set the default value for the multi-line matching (`m`) flag.
     pub fn multi_line(mut self, yes: bool) -> ExprBuilder {
         self.flags.multi = yes;
         self
     }

     /// Set the default value for the any character (`s`) flag.
     pub fn dot_matches_new_line(mut self, yes: bool) -> ExprBuilder {
         self.flags.dotnl = yes;
         self
     }

     /// Set the default value for the greedy swap (`U`) flag.
     pub fn swap_greed(mut self, yes: bool) -> ExprBuilder {
         self.flags.swap_greed = yes;
         self
     }

     /// Set the default value for the ignore whitespace (`x`) flag.
     pub fn ignore_whitespace(mut self, yes: bool) -> ExprBuilder {
         self.flags.ignore_space = yes;
         self
     }

     /// Set the default value for the Unicode (`u`) flag.
     ///
     /// If `yes` is false, then `allow_bytes` is set to true.
     pub fn unicode(mut self, yes: bool) -> ExprBuilder {
         self.flags.unicode = yes;
         if !yes {
             self.allow_bytes(true)
         } else {
             self
         }
     }

     /// Whether the parser allows matching arbitrary bytes or not.
     ///
     /// When the `u` flag is disabled (either with this builder or in the
     /// expression itself), the parser switches to interpreting the expression
     /// as matching arbitrary bytes instead of Unicode codepoints. For example,
     /// the expression `(?u:\xFF)` matches the *codepoint* `\xFF`, which
     /// corresponds to the UTF-8 byte sequence `\xCE\xBF`. Conversely,
     /// `(?-u:\xFF)` matches the *byte* `\xFF`, which is not valid UTF-8.
     ///
     /// When `allow_bytes` is disabled (the default), an expression like
     /// `(?-u:\xFF)` will cause the parser to return an error, since it would
     /// otherwise match invalid UTF-8. When enabled, it will be allowed.
     pub fn allow_bytes(mut self, yes: bool) -> ExprBuilder {
         self.flags.allow_bytes = yes;
         self
     }

     /// Set the nesting limit for regular expression parsing.
     ///
     /// Regular expressions that nest more than this limit will result in a
     /// `StackExhausted` error.
     pub fn nest_limit(mut self, limit: usize) -> ExprBuilder {
         self.nest_limit = limit;
         self
     }

     /// Parse a string as a regular expression using the current configuraiton.
     pub fn parse(self, s: &str) -> Result<Expr> {
         Parser::parse(s, self.flags).and_then(|e| e.simplify(self.nest_limit))
     }
 }

 impl Expr {
     /// Parses a string in a regular expression syntax tree.
     ///
     /// This is a convenience method for parsing an expression using the
     /// default configuration. To tweak parsing options (such as which flags
     /// are enabled by default), use the `ExprBuilder` type.
     pub fn parse(s: &str) -> Result<Expr> {
         ExprBuilder::new().parse(s)
     }

     /// Returns true iff the expression can be repeated by a quantifier.
     fn can_repeat(&self) -> bool {
         match *self {
             Literal{..} | LiteralBytes{..}
             | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
             | Class(_) | ClassBytes(_)
             | StartLine | EndLine | StartText | EndText
             | WordBoundary | NotWordBoundary
             | WordBoundaryAscii | NotWordBoundaryAscii
             | Group{..}
             => true,
             _ => false,
         }
     }

     fn simplify(self, nest_limit: usize) -> Result<Expr> {
         fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
             match (es.pop(), e) {
                 (None, e) => es.push(e),
                 (Some(Literal { chars: mut chars1, casei: casei1 }),
                       Literal { chars: chars2, casei: casei2 }) => {
                     if casei1 == casei2 {
                         chars1.extend(chars2);
                         es.push(Literal { chars: chars1, casei: casei1 });
                     } else {
                         es.push(Literal { chars: chars1, casei: casei1 });
                         es.push(Literal { chars: chars2, casei: casei2 });
                     }
                 }
                 (Some(LiteralBytes { bytes: mut bytes1, casei: casei1 }),
                       LiteralBytes { bytes: bytes2, casei: casei2 }) => {
                     if casei1 == casei2 {
                         bytes1.extend(bytes2);
                         es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
                     } else {
                         es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
                         es.push(LiteralBytes { bytes: bytes2, casei: casei2 });
                     }
                 }
                 (Some(e1), e2) => {
                     es.push(e1);
                     es.push(e2);
                 }
             }
         }
         fn simp(expr: Expr, recurse: usize, limit: usize) -> Result<Expr> {
             if recurse > limit {
                 return Err(Error {
                     pos: 0,
                     surround: "".to_owned(),
                     kind: ErrorKind::StackExhausted,
                 });
             }
             let simplify = |e| simp(e, recurse + 1, limit);
             Ok(match expr {
                 Repeat { e, r, greedy } => Repeat {
                     e: Box::new(try!(simplify(*e))),
                     r: r,
                     greedy: greedy,
                 },
                 Group { e, i, name } => {
                     let e = try!(simplify(*e));
                     if i.is_none() && name.is_none() && e.can_repeat() {
                         e
                     } else {
                         Group { e: Box::new(e), i: i, name: name }
                     }
                 }
                 Concat(es) => {
                     let mut new_es = Vec::with_capacity(es.len());
                     for e in es {
                         combine_literals(&mut new_es, try!(simplify(e)));
                     }
                     if new_es.len() == 1 {
                         new_es.pop().unwrap()
                     } else {
                         Concat(new_es)
                     }
                 }
                 Alternate(es) => {
                     let mut new_es = Vec::with_capacity(es.len());
                     for e in es {
                         new_es.push(try!(simplify(e)));
                     }
                     Alternate(new_es)
                 }
                 e => e,
             })
         }
         simp(self, 0, nest_limit)
     }

     /// Returns a set of literal prefixes extracted from this expression.
     pub fn prefixes(&self) -> Literals {
         let mut lits = Literals::empty();
         lits.union_prefixes(self);
         lits
     }

     /// Returns a set of literal suffixes extracted from this expression.
     pub fn suffixes(&self) -> Literals {
         let mut lits = Literals::empty();
         lits.union_suffixes(self);
         lits
     }

     /// Returns true if and only if the expression is required to match from
     /// the beginning of text.
     pub fn is_anchored_start(&self) -> bool {
         match *self {
             Repeat { ref e, r, .. } => {
                 !r.matches_empty() && e.is_anchored_start()
             }
             Group { ref e, .. } => e.is_anchored_start(),
             Concat(ref es) => es[0].is_anchored_start(),
             Alternate(ref es) => es.iter().all(|e| e.is_anchored_start()),
             StartText => true,
             _ => false,
         }
     }

     /// Returns true if and only if the expression has at least one matchable
     /// sub-expression that must match the beginning of text.
     pub fn has_anchored_start(&self) -> bool {
         match *self {
             Repeat { ref e, r, .. } => {
                 !r.matches_empty() && e.has_anchored_start()
             }
             Group { ref e, .. } => e.has_anchored_start(),
             Concat(ref es) => es[0].has_anchored_start(),
             Alternate(ref es) => es.iter().any(|e| e.has_anchored_start()),
             StartText => true,
             _ => false,
         }
     }

     /// Returns true if and only if the expression is required to match at the
     /// end of the text.
     pub fn is_anchored_end(&self) -> bool {
         match *self {
             Repeat { ref e, r, .. } => {
                 !r.matches_empty() && e.is_anchored_end()
             }
             Group { ref e, .. } => e.is_anchored_end(),
             Concat(ref es) => es[es.len() - 1].is_anchored_end(),
             Alternate(ref es) => es.iter().all(|e| e.is_anchored_end()),
             EndText => true,
             _ => false,
         }
     }

     /// Returns true if and only if the expression has at least one matchable
     /// sub-expression that must match the beginning of text.
     pub fn has_anchored_end(&self) -> bool {
         match *self {
             Repeat { ref e, r, .. } => {
                 !r.matches_empty() && e.has_anchored_end()
             }
             Group { ref e, .. } => e.has_anchored_end(),
             Concat(ref es) => es[es.len() - 1].has_anchored_end(),
             Alternate(ref es) => es.iter().any(|e| e.has_anchored_end()),
             EndText => true,
             _ => false,
         }
     }

     /// Returns true if and only if the expression contains sub-expressions
     /// that can match arbitrary bytes.
     pub fn has_bytes(&self) -> bool {
         match *self {
             Repeat { ref e, .. } => e.has_bytes(),
             Group { ref e, .. } => e.has_bytes(),
             Concat(ref es) => es.iter().any(|e| e.has_bytes()),
             Alternate(ref es) => es.iter().any(|e| e.has_bytes()),
             LiteralBytes{..} => true,
             AnyByte | AnyByteNoNL => true,
             ClassBytes(_) => true,
             WordBoundaryAscii | NotWordBoundaryAscii => true,
             _ => false,
         }
     }
 }

 impl Deref for CharClass {
     type Target = Vec<ClassRange>;
     fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
 }

 impl IntoIterator for CharClass {
     type Item = ClassRange;
     type IntoIter = vec::IntoIter<ClassRange>;
     fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
 }

 impl<'a> IntoIterator for &'a CharClass {
     type Item = &'a ClassRange;
     type IntoIter = slice::Iter<'a, ClassRange>;
     fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
 }

 impl CharClass {
     /// Create a new class from an existing set of ranges.
     pub fn new(ranges: Vec<ClassRange>) -> CharClass {
         CharClass { ranges: ranges }
     }

     /// Create an empty class.
     fn empty() -> CharClass {
         CharClass::new(Vec::new())
     }

     /// Returns true if `c` is matched by this character class.
     pub fn matches(&self, c: char) -> bool {
         self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
     }

     /// Removes the given character from the class if it exists.
     ///
     /// Note that this takes `O(n)` time in the number of ranges.
     pub fn remove(&mut self, c: char) {
         let mut i = match self.binary_search_by(|r| c.partial_cmp(r).unwrap()) {
             Ok(i) => i,
             Err(_) => return,
         };
         let mut r = self.ranges.remove(i);
         if r.start == c {
             r.start = inc_char(c);
             if r.start > r.end || c == char::MAX {
                 return;
             }
             self.ranges.insert(i, r);
         } else if r.end == c {
             r.end = dec_char(c);
             if r.end < r.start || c == '\x00' {
                 return;
             }
             self.ranges.insert(0, r);
         } else {
             let (mut r1, mut r2) = (r.clone(), r.clone());
             r1.end = dec_char(c);
             if r1.start <= r1.end {
                 self.ranges.insert(i, r1);
                 i += 1;
             }
             r2.start = inc_char(c);
             if r2.start <= r2.end {
                 self.ranges.insert(i, r2);
             }
         }
     }

     /// Create a new empty class from this one.
     fn to_empty(&self) -> CharClass {
         CharClass { ranges: Vec::with_capacity(self.len()) }
     }

     /// Create a byte class from this character class.
     ///
     /// Codepoints above 0xFF are removed.
     fn to_byte_class(self) -> ByteClass {
         ByteClass::new(
             self.ranges.into_iter()
                        .filter_map(|r| r.to_byte_range())
                        .collect()).canonicalize()
     }

     /// Merge two classes and canonicalize them.
     #[cfg(test)]
     fn merge(mut self, other: CharClass) -> CharClass {
         self.ranges.extend(other);
         self.canonicalize()
     }

     /// Canonicalze any sequence of ranges.
     ///
     /// This is responsible for enforcing the canonical format invariants
     /// as described on the docs for the `CharClass` type.
     fn canonicalize(mut self) -> CharClass {
         // TODO: Save some cycles here by checking if already canonicalized.
         self.ranges.sort();
         let mut ordered = self.to_empty(); // TODO: Do this in place?
         for candidate in self {
             // If the candidate overlaps with an existing range, then it must
             // be the most recent range added because we process the candidates
             // in order.
             if let Some(or) = ordered.ranges.last_mut() {
                 if or.overlapping(candidate) {
                     *or = or.merge(candidate);
                     continue;
                 }
             }
             ordered.ranges.push(candidate);
         }
         ordered
     }

     /// Negates the character class.
     ///
     /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
     /// if and only if `c` does not match `self.negate()`.
     pub fn negate(mut self) -> CharClass {
         fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }

         if self.is_empty() {
             // Inverting an empty range yields all of Unicode.
             return CharClass {
                 ranges: vec![ClassRange { start: '\x00', end: '\u{10ffff}' }],
             };
         }
         self = self.canonicalize();
         let mut inv = self.to_empty();
         if self[0].start > '\x00' {
             inv.ranges.push(range('\x00', dec_char(self[0].start)));
         }
         for win in self.windows(2) {
             inv.ranges.push(range(inc_char(win[0].end),
                                   dec_char(win[1].start)));
         }
         if self[self.len() - 1].end < char::MAX {
             inv.ranges.push(range(inc_char(self[self.len() - 1].end),
                                   char::MAX));
         }
         inv
     }

     /// Apply case folding to this character class.
     ///
     /// N.B. Applying case folding to a negated character class probably
     /// won't produce the expected result. e.g., `(?i)[^x]` really should
     /// match any character sans `x` and `X`, but if `[^x]` is negated
     /// before being case folded, you'll end up matching any character.
     pub fn case_fold(self) -> CharClass {
         let mut folded = self.to_empty();
         for r in self {
             // Applying case folding to a range is expensive because *every*
             // character needs to be examined. Thus, we avoid that drudgery
             // if no character in the current range is in our case folding
             // table.
             if r.needs_case_folding() {
                 folded.ranges.extend(r.case_fold());
             }
             folded.ranges.push(r);
         }
         folded.canonicalize()
     }

     /// Returns the number of characters that match this class.
     fn num_chars(&self) -> usize {
         self.ranges.iter()
             .map(|&r| 1 + (r.end as u32) - (r.start as u32))
             .fold(0, |acc, len| acc + len)
             as usize
     }
 }

 impl ClassRange {
     /// Create a new class range.
     ///
     /// If `end < start`, then the two values are swapped so that
     /// the invariant `start <= end` is preserved.
     fn new(start: char, end: char) -> ClassRange {
         if start <= end {
             ClassRange { start: start, end: end }
         } else {
             ClassRange { start: end, end: start }
         }
     }

     /// Translate this to a byte class.
     ///
     /// If the start codepoint exceeds 0xFF, then this returns `None`.
     ///
     /// If the end codepoint exceeds 0xFF, then it is set to 0xFF.
     fn to_byte_range(self) -> Option<ByteRange> {
         if self.start > '\u{FF}' {
             None
         } else {
             let s = self.start as u8;
             let e = min('\u{FF}', self.end) as u8;
             Some(ByteRange::new(s, e))
         }
     }

     /// Create a range of one character.
     fn one(c: char) -> ClassRange {
         ClassRange { start: c, end: c }
     }

     /// Returns true if and only if the two ranges are overlapping. Note that
     /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
     fn overlapping(self, other: ClassRange) -> bool {
         max(self.start, other.start) <= inc_char(min(self.end, other.end))
     }

     /// Creates a new range representing the union of `self` and `other.
     fn merge(self, other: ClassRange) -> ClassRange {
         ClassRange {
             start: min(self.start, other.start),
             end: max(self.end, other.end),
         }
     }

     /// Returns true if and only if this range contains a character that is
     /// in the case folding table.
     fn needs_case_folding(self) -> bool {
         case_folding::C_plus_S_both_table
         .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
     }

     /// Apply case folding to this range.
     ///
     /// Since case folding might add characters such that the range is no
     /// longer contiguous, this returns multiple class ranges. They are in
     /// canonical order.
     fn case_fold(self) -> Vec<ClassRange> {
         let table = &case_folding::C_plus_S_both_table;
         let (s, e) = (self.start as u32, self.end as u32 + 1);
         let mut start = self.start;
         let mut end = start;
         let mut next_case_fold = '\x00';
         let mut ranges = Vec::with_capacity(10);
         for mut c in (s..e).filter_map(char::from_u32) {
             if c >= next_case_fold {
                 c = match simple_case_fold_both_result(c) {
                     Ok(i) => {
                         for &(c1, c2) in &table[i..] {
                             if c1 != c {
                                 break;
                             }
                             if c2 != inc_char(end) {
                                 ranges.push(ClassRange::new(start, end));
                                 start = c2;
                             }
                             end = c2;
                         }
                         continue;
                     }
                     Err(i) => {
                         if i < table.len() {
                             next_case_fold = table[i].0;
                         } else {
                             next_case_fold = '\u{10FFFF}';
                         }
                         c
                     }
                 };
             }
             // The fast path. We know this character doesn't have an entry
             // in the case folding table.
             if c != inc_char(end) {
                 ranges.push(ClassRange::new(start, end));
                 start = c;
             }
             end = c;
         }
         ranges.push(ClassRange::new(start, end));
         ranges
     }
 }

 impl PartialEq<char> for ClassRange {
     #[inline]
     fn eq(&self, other: &char) -> bool {
         self.start <= *other && *other <= self.end
     }
 }

 impl PartialEq<ClassRange> for char {
     #[inline]
     fn eq(&self, other: &ClassRange) -> bool {
         other.eq(self)
     }
 }

 impl PartialOrd<char> for ClassRange {
     #[inline]
     fn partial_cmp(&self, other: &char) -> Option<Ordering> {
         Some(if self == other {
             Ordering::Equal
         } else if *other > self.end {
             Ordering::Greater
         } else {
             Ordering::Less
         })
     }
 }

 impl PartialOrd<ClassRange> for char {
     #[inline]
     fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
         other.partial_cmp(self).map(|o| o.reverse())
     }
 }

 impl ByteClass {
     /// Create a new class from an existing set of ranges.
     pub fn new(ranges: Vec<ByteRange>) -> ByteClass {
         ByteClass { ranges: ranges }
     }

     /// Returns true if `b` is matched by this byte class.
     pub fn matches(&self, b: u8) -> bool {
         self.binary_search_by(|range| b.partial_cmp(range).unwrap()).is_ok()
     }

     /// Removes the given byte from the class if it exists.
     ///
     /// Note that this takes `O(n)` time in the number of ranges.
     pub fn remove(&mut self, b: u8) {
         let mut i = match self.binary_search_by(|r| b.partial_cmp(r).unwrap()) {
             Ok(i) => i,
             Err(_) => return,
         };
         let mut r = self.ranges.remove(i);
         if r.start == b {
             r.start = b.saturating_add(1);
             if r.start > r.end || b == u8::MAX {
                 return;
             }
             self.ranges.insert(i, r);
         } else if r.end == b {
             r.end = b.saturating_sub(1);
             if r.end < r.start || b == b'\x00' {
                 return;
             }
             self.ranges.insert(0, r);
         } else {
             let (mut r1, mut r2) = (r.clone(), r.clone());
             r1.end = b.saturating_sub(1);
             if r1.start <= r1.end {
                 self.ranges.insert(i, r1);
                 i += 1;
             }
             r2.start = b.saturating_add(1);
             if r2.start <= r2.end {
                 self.ranges.insert(i, r2);
             }
         }
     }

     /// Create a new empty class from this one.
     fn to_empty(&self) -> ByteClass {
         ByteClass { ranges: Vec::with_capacity(self.len()) }
     }

     /// Canonicalze any sequence of ranges.
     ///
     /// This is responsible for enforcing the canonical format invariants
     /// as described on the docs for the `ByteClass` type.
     fn canonicalize(mut self) -> ByteClass {
         // TODO: Save some cycles here by checking if already canonicalized.
         self.ranges.sort();
         let mut ordered = self.to_empty(); // TODO: Do this in place?
         for candidate in self {
             // If the candidate overlaps with an existing range, then it must
             // be the most recent range added because we process the candidates
             // in order.
             if let Some(or) = ordered.ranges.last_mut() {
                 if or.overlapping(candidate) {
                     *or = or.merge(candidate);
                     continue;
                 }
             }
             ordered.ranges.push(candidate);
         }
         ordered
     }

     /// Negates the byte class.
     ///
     /// For all `b` where `b` is a byte, `b` matches `self` if and only if `b`
     /// does not match `self.negate()`.
     pub fn negate(mut self) -> ByteClass {
         fn range(s: u8, e: u8) -> ByteRange { ByteRange::new(s, e) }

         if self.is_empty() {
             // Inverting an empty range yields all bytes.
             return ByteClass {
                 ranges: vec![ByteRange { start: b'\x00', end: b'\xff' }],
             };
         }
         self = self.canonicalize();
         let mut inv = self.to_empty();
         if self[0].start > b'\x00' {
             inv.ranges.push(range(b'\x00', self[0].start.saturating_sub(1)));
         }
         for win in self.windows(2) {
             inv.ranges.push(range(win[0].end.saturating_add(1),
                                   win[1].start.saturating_sub(1)));
         }
         if self[self.len() - 1].end < u8::MAX {
             inv.ranges.push(range(self[self.len() - 1].end.saturating_add(1),
                                   u8::MAX));
         }
         inv
     }

     /// Apply case folding to this byte class.
     ///
     /// This assumes that the bytes in the ranges are ASCII compatible.
     ///
     /// N.B. Applying case folding to a negated character class probably
     /// won't produce the expected result. e.g., `(?i)[^x]` really should
     /// match any character sans `x` and `X`, but if `[^x]` is negated
     /// before being case folded, you'll end up matching any character.
     pub fn case_fold(self) -> ByteClass {
         let mut folded = self.to_empty();
         for r in self {
             folded.ranges.extend(r.case_fold());
         }
         folded.canonicalize()
     }

     /// Returns the number of bytes that match this class.
     fn num_bytes(&self) -> usize {
         self.ranges.iter()
             .map(|&r| 1 + (r.end as u32) - (r.start as u32))
             .fold(0, |acc, len| acc + len)
             as usize
     }
 }

 impl ByteRange {
     /// Create a new class range.
     ///
     /// If `end < start`, then the two values are swapped so that
     /// the invariant `start <= end` is preserved.
     fn new(start: u8, end: u8) -> ByteRange {
         if start <= end {
             ByteRange { start: start, end: end }
         } else {
             ByteRange { start: end, end: start }
         }
     }

     /// Returns true if and only if the two ranges are overlapping. Note that
     /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
     fn overlapping(self, other: ByteRange) -> bool {
         max(self.start, other.start)
         <= min(self.end, other.end).saturating_add(1)
     }

     /// Returns true if and only if the intersection of self and other is non
     /// empty.
     fn is_intersect_empty(self, other: ByteRange) -> bool {
         max(self.start, other.start) > min(self.end, other.end)
     }

     /// Creates a new range representing the union of `self` and `other.
     fn merge(self, other: ByteRange) -> ByteRange {
         ByteRange {
             start: min(self.start, other.start),
             end: max(self.end, other.end),
         }
     }

     /// Apply case folding to this range.
     ///
     /// Since case folding might add bytes such that the range is no
     /// longer contiguous, this returns multiple byte ranges.
     ///
     /// This assumes that the bytes in this range are ASCII compatible.
     fn case_fold(self) -> Vec<ByteRange> {
         // So much easier than Unicode case folding!
         let mut ranges = vec![self];
         if !ByteRange::new(b'a', b'z').is_intersect_empty(self) {
             let lower = max(self.start, b'a');
             let upper = min(self.end, b'z');
             ranges.push(ByteRange::new(lower - 32, upper - 32));
         }
         if !ByteRange::new(b'A', b'Z').is_intersect_empty(self) {
             let lower = max(self.start, b'A');
             let upper = min(self.end, b'Z');
             ranges.push(ByteRange::new(lower + 32, upper + 32));
         }
         ranges
     }
 }

 impl Deref for ByteClass {
     type Target = Vec<ByteRange>;
     fn deref(&self) -> &Vec<ByteRange> { &self.ranges }
 }

 impl IntoIterator for ByteClass {
     type Item = ByteRange;
     type IntoIter = vec::IntoIter<ByteRange>;
     fn into_iter(self) -> vec::IntoIter<ByteRange> { self.ranges.into_iter() }
 }

 impl<'a> IntoIterator for &'a ByteClass {
     type Item = &'a ByteRange;
     type IntoIter = slice::Iter<'a, ByteRange>;
     fn into_iter(self) -> slice::Iter<'a, ByteRange> { self.iter() }
 }

 impl PartialEq<u8> for ByteRange {
     #[inline]
     fn eq(&self, other: &u8) -> bool {
         self.start <= *other && *other <= self.end
     }
 }

 impl PartialEq<ByteRange> for u8 {
     #[inline]
     fn eq(&self, other: &ByteRange) -> bool {
         other.eq(self)
     }
 }

 impl PartialOrd<u8> for ByteRange {
     #[inline]
     fn partial_cmp(&self, other: &u8) -> Option<Ordering> {
         Some(if self == other {
             Ordering::Equal
         } else if *other > self.end {
             Ordering::Greater
         } else {
             Ordering::Less
         })
     }
 }

 impl PartialOrd<ByteRange> for u8 {
     #[inline]
     fn partial_cmp(&self, other: &ByteRange) -> Option<Ordering> {
         other.partial_cmp(self).map(|o| o.reverse())
     }
 }

 /// This implementation of `Display` will write a regular expression from the
 /// syntax tree. It does not write the original string parsed.
 impl fmt::Display for Expr {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match *self {
             Empty => write!(f, ""),
             Literal { ref chars, casei } => {
                 if casei {
                     try!(write!(f, "(?iu:"));
                 } else {
                     try!(write!(f, "(?u:"));
                 }
                 for &c in chars {
                     try!(write!(f, "{}", quote_char(c)));
                 }
                 try!(write!(f, ")"));
                 Ok(())
             }
             LiteralBytes { ref bytes, casei } => {
                 if casei {
                     try!(write!(f, "(?i-u:"));
                 } else {
                     try!(write!(f, "(?-u:"));
                 }
                 for &b in bytes {
                     try!(write!(f, "{}", quote_byte(b)));
                 }
                 try!(write!(f, ")"));
                 Ok(())
             }
             AnyChar => write!(f, "(?su:.)"),
             AnyCharNoNL => write!(f, "(?u:.)"),
             AnyByte => write!(f, "(?s-u:.)"),
             AnyByteNoNL => write!(f, "(?-u:.)"),
             Class(ref cls) => write!(f, "{}", cls),
             ClassBytes(ref cls) => write!(f, "{}", cls),
             StartLine => write!(f, "(?m:^)"),
             EndLine => write!(f, "(?m:$)"),
             StartText => write!(f, r"^"),
             EndText => write!(f, r"$"),
             WordBoundary => write!(f, r"(?u:\b)"),
             NotWordBoundary => write!(f, r"(?u:\B)"),
             WordBoundaryAscii => write!(f, r"(?-u:\b)"),
             NotWordBoundaryAscii => write!(f, r"(?-u:\B)"),
             Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
             Group { ref e, name: None, .. } => write!(f, "({})", e),
             Group { ref e, name: Some(ref n), .. } => {
                 write!(f, "(?P<{}>{})", n, e)
             }
             Repeat { ref e, r, greedy } => {
                 match &**e {
                     &Literal { ref chars, .. } if chars.len() > 1 => {
                         try!(write!(f, "(?:{}){}", e, r))
                     }
                     _ => try!(write!(f, "{}{}", e, r)),
                 }
                 if !greedy { try!(write!(f, "?")); }
                 Ok(())
             }
             Concat(ref es) => {
                 for e in es {
                     try!(write!(f, "{}", e));
                 }
                 Ok(())
             }
             Alternate(ref es) => {
                 for (i, e) in es.iter().enumerate() {
                     if i > 0 { try!(write!(f, "|")); }
                     try!(write!(f, "{}", e));
                 }
                 Ok(())
             }
         }
     }
 }

 impl fmt::Display for Repeater {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match *self {
             ZeroOrOne => write!(f, "?"),
             ZeroOrMore => write!(f, "*"),
             OneOrMore => write!(f, "+"),
             Range { min: s, max: None } => write!(f, "{{{},}}", s),
             Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
             Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
         }
     }
 }

 impl fmt::Display for CharClass {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         try!(write!(f, "(?u:["));
         for range in self.iter() {
             if range.start == '-' || range.end == '-' {
                 try!(write!(f, "-"));
                 break;
             }
         }
         for range in self.iter() {
             let mut range = *range;
             if range.start == '-' {
                 range.start = ((range.start as u8) + 1) as char;
             }
             if range.end == '-' {
                 range.end = ((range.end as u8) - 1) as char;
             }
             if range.start > range.end {
                 continue;
             }
             try!(write!(f, "{}", range));
         }
         try!(write!(f, "])"));
         Ok(())
     }
 }

 impl fmt::Display for ClassRange {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
     }
 }

 impl fmt::Display for ByteClass {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         try!(write!(f, "(?-u:["));
         for range in self.iter() {
             if range.start == b'-' || range.end == b'-' {
                 try!(write!(f, "-"));
                 break;
             }
         }
         for range in self.iter() {
             let mut range = *range;
             if range.start == b'-' {
                 range.start += 1;
             }
             if range.end == b'-' {
                 range.start -= 1;
             }
             if range.start > range.end {
                 continue;
             }
             try!(write!(f, "{}", range));
         }
         try!(write!(f, "])"));
         Ok(())
     }
 }

 impl fmt::Display for ByteRange {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{}-{}", quote_byte(self.start), quote_byte(self.end))
     }
 }

 /// An alias for computations that can return a `Error`.
 pub type Result<T> = ::std::result::Result<T, Error>;

 /// A parse error.
 ///
 /// This includes details about the specific type of error and a rough
 /// approximation of where it occurred.
 #[derive(Clone, Debug, PartialEq)]
 pub struct Error {
     pos: usize,
     surround: String,
     kind: ErrorKind,
 }

 /// The specific type of parse error that can occur.
 #[derive(Clone, Debug, PartialEq)]
 pub enum ErrorKind {
     /// A negation symbol is used twice in flag settings.
     /// e.g., `(?-i-s)`.
     DoubleFlagNegation,
     /// The same capture name was used more than once.
     /// e.g., `(?P<a>.)(?P<a>.)`.
     DuplicateCaptureName(String),
     /// An alternate is empty. e.g., `(|a)`.
     EmptyAlternate,
     /// A capture group name is empty. e.g., `(?P<>a)`.
     EmptyCaptureName,
     /// A negation symbol was not proceded by any flags. e.g., `(?i-)`.
     EmptyFlagNegation,
     /// A group is empty. e.g., `()`.
     EmptyGroup,
     /// An invalid number was used in a counted repetition. e.g., `a{b}`.
     InvalidBase10(String),
     /// An invalid hexadecimal number was used in an escape sequence.
     /// e.g., `\xAG`.
     InvalidBase16(String),
     /// An invalid capture name was used. e.g., `(?P<0a>b)`.
     InvalidCaptureName(String),
     /// An invalid class range was givien. Specifically, when the start of the
     /// range is greater than the end. e.g., `[z-a]`.
     InvalidClassRange {
         /// The first character specified in the range.
         start: char,
         /// The second character specified in the range.
         end: char,
     },
     /// An escape sequence was used in a character class where it is not
     /// allowed. e.g., `[a-\pN]` or `[\A]`.
     InvalidClassEscape(Expr),
     /// An invalid counted repetition min/max was given. e.g., `a{2,1}`.
     InvalidRepeatRange {
         /// The first number specified in the repetition.
         min: u32,
         /// The second number specified in the repetition.
         max: u32,
     },
     /// An invalid Unicode scalar value was used in a long hexadecimal
     /// sequence. e.g., `\x{D800}`.
     InvalidScalarValue(u32),
     /// An empty counted repetition operator. e.g., `a{}`.
     MissingBase10,
     /// A repetition operator was not applied to an expression. e.g., `*`.
     RepeaterExpectsExpr,
     /// A repetition operator was applied to an expression that cannot be
     /// repeated. e.g., `a+*` or `a|*`.
     RepeaterUnexpectedExpr(Expr),
     /// A capture group name that is never closed. e.g., `(?P<a`.
     UnclosedCaptureName(String),
     /// An unclosed hexadecimal literal. e.g., `\x{a`.
     UnclosedHex,
     /// An unclosed parenthesis. e.g., `(a`.
     UnclosedParen,
     /// An unclosed counted repetition operator. e.g., `a{2`.
     UnclosedRepeat,
     /// An unclosed named Unicode class. e.g., `\p{Yi`.
     UnclosedUnicodeName,
     /// Saw end of regex before class was closed. e.g., `[a`.
     UnexpectedClassEof,
     /// Saw end of regex before escape sequence was closed. e.g., `\`.
     UnexpectedEscapeEof,
     /// Saw end of regex before flags were closed. e.g., `(?i`.
     UnexpectedFlagEof,
     /// Saw end of regex before two hexadecimal digits were seen. e.g., `\xA`.
     UnexpectedTwoDigitHexEof,
     /// Unopened parenthesis. e.g., `)`.
     UnopenedParen,
     /// Unrecognized escape sequence. e.g., `\q`.
     UnrecognizedEscape(char),
     /// Unrecognized flag. e.g., `(?a)`.
     UnrecognizedFlag(char),
     /// Unrecognized named Unicode class. e.g., `\p{Foo}`.
     UnrecognizedUnicodeClass(String),
     /// Indicates that the regex uses too much nesting.
     ///
     /// (N.B. This error exists because traversing the Expr is recursive and
     /// an explicit heap allocated stack is not (yet?) used. Regardless, some
     /// sort of limit must be applied to avoid unbounded memory growth.
     StackExhausted,
     /// A disallowed flag was found (e.g., `u`).
     FlagNotAllowed(char),
     /// A Unicode class was used when the Unicode (`u`) flag was disabled.
     UnicodeNotAllowed,
     /// InvalidUtf8 indicates that the expression may match non-UTF-8 bytes.
     /// This never returned if the parser is permitted to allow expressions
     /// that match arbitrary bytes.
     InvalidUtf8,
     /// A character class was constructed such that it is empty.
     /// e.g., `[^\d\D]`.
     EmptyClass,
     /// Hints that destructuring should not be exhaustive.
     ///
     /// This enum may grow additional variants, so this makes sure clients
     /// don't count on exhaustive matching. (Otherwise, adding a new variant
     /// could break existing code.)
     #[doc(hidden)]
     __Nonexhaustive,
 }

 impl Error {
     /// Returns an approximate *character* offset at which the error occurred.
     ///
     /// The character offset may be equal to the number of characters in the
     /// string, in which case it should be interpreted as pointing to the end
     /// of the regex.
     pub fn position(&self) -> usize {
         self.pos
     }

     /// Returns the type of the regex parse error.
     pub fn kind(&self) -> &ErrorKind {
         &self.kind
     }
 }

 impl ErrorKind {
     fn description(&self) -> &str {
         use ErrorKind::*;
         match *self {
             DoubleFlagNegation => "double flag negation",
             DuplicateCaptureName(_) => "duplicate capture name",
             EmptyAlternate => "empty alternate",
             EmptyCaptureName => "empty capture name",
             EmptyFlagNegation => "flag negation without any flags",
             EmptyGroup => "empty group (e.g., '()')",
             InvalidBase10(_) => "invalid base 10 number",
             InvalidBase16(_) => "invalid base 16 number",
             InvalidCaptureName(_) => "invalid capture name",
             InvalidClassRange{..} => "invalid character class range",
             InvalidClassEscape(_) => "invalid escape sequence in class",
             InvalidRepeatRange{..} => "invalid counted repetition range",
             InvalidScalarValue(_) => "invalid Unicode scalar value",
             MissingBase10 => "missing count in repetition operator",
             RepeaterExpectsExpr => "repetition operator missing expression",
             RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
             UnclosedCaptureName(_) => "unclosed capture group name",
             UnclosedHex => "unclosed hexadecimal literal",
             UnclosedParen => "unclosed parenthesis",
             UnclosedRepeat => "unclosed counted repetition operator",
             UnclosedUnicodeName => "unclosed Unicode class literal",
             UnexpectedClassEof => "unexpected EOF in character class",
             UnexpectedEscapeEof => "unexpected EOF in escape sequence",
             UnexpectedFlagEof => "unexpected EOF in flags",
             UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
             UnopenedParen => "unopened parenthesis",
             UnrecognizedEscape(_) => "unrecognized escape sequence",
             UnrecognizedFlag(_) => "unrecognized flag",
             UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
             StackExhausted => "stack exhausted, too much nesting",
             FlagNotAllowed(_) => "flag not allowed",
             UnicodeNotAllowed => "Unicode features not allowed",
             InvalidUtf8 => "matching arbitrary bytes is not allowed",
             EmptyClass => "empty character class",
             __Nonexhaustive => unreachable!(),
         }
     }
 }

 impl ::std::error::Error for Error {
     fn description(&self) -> &str {
         self.kind.description()
     }
 }

 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         if let ErrorKind::StackExhausted = self.kind {
             write!(f, "Error parsing regex: {}", self.kind)
         } else {
             write!(
                 f, "Error parsing regex near '{}' at character offset {}: {}",
                 self.surround, self.pos, self.kind)
         }
     }
 }

 impl fmt::Display for ErrorKind {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use ErrorKind::*;
         match *self {
             DoubleFlagNegation =>
                 write!(f, "Only one negation symbol is allowed in flags."),
             DuplicateCaptureName(ref s) =>
                 write!(f, "Capture name '{}' is used more than once.", s),
             EmptyAlternate =>
                 write!(f, "Alternations cannot be empty."),
             EmptyCaptureName =>
                 write!(f, "Capture names cannot be empty."),
             EmptyFlagNegation =>
                 write!(f, "Flag negation requires setting at least one flag."),
             EmptyGroup =>
                 write!(f, "Empty regex groups (e.g., '()') are not allowed."),
             InvalidBase10(ref s) =>
                 write!(f, "Not a valid base 10 number: '{}'", s),
             InvalidBase16(ref s) =>
                 write!(f, "Not a valid base 16 number: '{}'", s),
             InvalidCaptureName(ref s) =>
                 write!(f, "Invalid capture name: '{}'. Capture names must \
                            consist of [_a-zA-Z0-9] and are not allowed to \
                            start with with a number.", s),
             InvalidClassRange { start, end } =>
                 write!(f, "Invalid character class range '{}-{}'. \
                            Character class ranges must start with the smaller \
                            character, but {} > {}", start, end, start, end),
             InvalidClassEscape(ref e) =>
                 write!(f, "Invalid escape sequence in character \
                            class: '{}'.", e),
             InvalidRepeatRange { min, max } =>
                 write!(f, "Invalid counted repetition range: {{{}, {}}}. \
                            Counted repetition ranges must start with the \
                            minimum, but {} > {}", min, max, min, max),
             InvalidScalarValue(c) =>
                 write!(f, "Number does not correspond to a Unicode scalar \
                            value: '{}'.", c),
             MissingBase10 =>
                 write!(f, "Missing maximum in counted reptition operator."),
             RepeaterExpectsExpr =>
                 write!(f, "Missing expression for reptition operator."),
             RepeaterUnexpectedExpr(ref e) =>
                 write!(f, "Invalid application of reptition operator to: \
                           '{}'.", e),
             UnclosedCaptureName(ref s) =>
                 write!(f, "Capture name group for '{}' is not closed. \
                            (Missing a '>'.)", s),
             UnclosedHex =>
                 write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
             UnclosedParen =>
                 write!(f, "Unclosed parenthesis."),
             UnclosedRepeat =>
                 write!(f, "Unclosed counted repetition (missing a '}}')."),
             UnclosedUnicodeName =>
                 write!(f, "Unclosed Unicode literal (missing a '}}')."),
             UnexpectedClassEof =>
                 write!(f, "Character class was not closed before the end of \
                            the regex (missing a ']')."),
             UnexpectedEscapeEof =>
                 write!(f, "Started an escape sequence that didn't finish \
                            before the end of the regex."),
             UnexpectedFlagEof =>
                 write!(f, "Inline flag settings was not closed before the end \
                            of the regex (missing a ')' or ':')."),
             UnexpectedTwoDigitHexEof =>
                 write!(f, "Unexpected end of two digit hexadecimal literal."),
             UnopenedParen =>
                 write!(f, "Unopened parenthesis."),
             UnrecognizedEscape(c) =>
                 write!(f, "Unrecognized escape sequence: '\\{}'.", c),
             UnrecognizedFlag(c) =>
                 write!(f, "Unrecognized flag: '{}'. \
                            (Allowed flags: i, m, s, U, u, x.)", c),
             UnrecognizedUnicodeClass(ref s) =>
                 write!(f, "Unrecognized Unicode class name: '{}'.", s),
             StackExhausted =>
                 write!(f, "Exhausted space required to parse regex with too \
                            much nesting."),
             FlagNotAllowed(flag) =>
                 write!(f, "Use of the flag '{}' is not allowed.", flag),
             UnicodeNotAllowed =>
                 write!(f, "Unicode features are not allowed when the Unicode \
                            (u) flag is not set."),
             InvalidUtf8 =>
                 write!(f, "Matching arbitrary bytes is not allowed."),
             EmptyClass =>
                 write!(f, "Empty character classes are not allowed."),
             __Nonexhaustive => unreachable!(),
         }
     }
 }

 /// The result of binary search on the simple case folding table.
 ///
 /// Note that this binary search is done on the "both" table, such that
 /// the index returned corresponds to the *first* location of `c1` in the
 /// table. The table can then be scanned linearly starting from the position
 /// returned to find other case mappings for `c1`.
 fn simple_case_fold_both_result(c1: char) -> result::Result<usize, usize> {
     let table = &case_folding::C_plus_S_both_table;
     let i = binary_search(table, |&(c2, _)| c1 <= c2);
     if i >= table.len() || table[i].0 != c1 {
         Err(i)
     } else {
         Ok(i)
     }
 }

 /// Binary search to find first element such that `pred(T) == true`.
 ///
 /// Assumes that if `pred(xs[i]) == true` then `pred(xs[i+1]) == true`.
 ///
 /// If all elements yield `pred(T) == false`, then `xs.len()` is returned.
 fn binary_search<T, F>(xs: &[T], mut pred: F) -> usize
         where F: FnMut(&T) -> bool {
     let (mut left, mut right) = (0, xs.len());
     while left < right {
         let mid = (left + right) / 2;
         if pred(&xs[mid]) {
             right = mid;
         } else {
             left = mid + 1;
         }
     }
     left
 }

 /// Escapes all regular expression meta characters in `text`.
 ///
 /// The string returned may be safely used as a literal in a regular
 /// expression.
 pub fn quote(text: &str) -> String {
     let mut quoted = String::with_capacity(text.len());
     for c in text.chars() {
         if parser::is_punct(c) {
             quoted.push('\\');
         }
         quoted.push(c);
     }
     quoted
 }

 fn quote_char(c: char) -> String {
     let mut s = String::new();
     if parser::is_punct(c) {
         s.push('\\');
     }
     s.push(c);
     s
 }

 fn quote_byte(b: u8) -> String {
     if parser::is_punct(b as char) || b == b'\'' || b == b'"' {
         quote_char(b as char)
     } else {
         let escaped: Vec<u8> = ascii::escape_default(b).collect();
         String::from_utf8(escaped).unwrap()
     }
 }

 fn inc_char(c: char) -> char {
     match c {
         char::MAX => char::MAX,
         '\u{D7FF}' => '\u{E000}',
         c => char::from_u32(c as u32 + 1).unwrap(),
     }
 }

 fn dec_char(c: char) -> char {
     match c {
         '\x00' => '\x00',
         '\u{E000}' => '\u{D7FF}',
         c => char::from_u32(c as u32 - 1).unwrap(),
     }
 }

 /// Returns true if and only if `c` is a word character.
 #[doc(hidden)]
 pub fn is_word_char(c: char) -> bool {
     match c {
         '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z'  => true,
         _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
             if c >= start && c <= end {
                 Ordering::Equal
             } else if start > c {
                 Ordering::Greater
             } else {
                 Ordering::Less
             }
         }).is_ok(),
     }
 }

 /// Returns true if and only if `c` is an ASCII word byte.
 #[doc(hidden)]
 pub fn is_word_byte(b: u8) -> bool {
     match b {
         b'_' | b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z'  => true,
         _ => false,
     }
 }

 #[cfg(test)]
 mod properties;

 #[cfg(test)]
 mod tests {
     use {CharClass, ClassRange, ByteClass, ByteRange, Expr};

     fn class(ranges: &[(char, char)]) -> CharClass {
         let ranges = ranges.iter().cloned()
                            .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
         CharClass::new(ranges)
     }

     fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
         let ranges = ranges.iter().cloned()
                            .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
         ByteClass::new(ranges)
     }

     fn e(re: &str) -> Expr { Expr::parse(re).unwrap() }

     #[test]
     fn stack_exhaustion() {
         use std::iter::repeat;

         let open: String = repeat('(').take(200).collect();
         let close: String = repeat(')').take(200).collect();
         assert!(Expr::parse(&format!("{}a{}", open, close)).is_ok());

         let open: String = repeat('(').take(200 + 1).collect();
         let close: String = repeat(')').take(200 + 1).collect();
         assert!(Expr::parse(&format!("{}a{}", open, close)).is_err());
     }

     #[test]
     fn anchored_start() {
         assert!(e("^a").is_anchored_start());
         assert!(e("(^a)").is_anchored_start());
         assert!(e("^a|^b").is_anchored_start());
         assert!(e("(^a)|(^b)").is_anchored_start());
         assert!(e("(^(a|b))").is_anchored_start());

         assert!(!e("^a|b").is_anchored_start());
         assert!(!e("a|^b").is_anchored_start());
     }

     #[test]
     fn anchored_end() {
         assert!(e("a$").is_anchored_end());
         assert!(e("(a$)").is_anchored_end());
         assert!(e("a$|b$").is_anchored_end());
         assert!(e("(a$)|(b$)").is_anchored_end());
         assert!(e("((a|b)$)").is_anchored_end());

         assert!(!e("a$|b").is_anchored_end());
         assert!(!e("a|b$").is_anchored_end());
     }

     #[test]
     fn class_canon_no_change() {
         let cls = class(&[('a', 'c'), ('x', 'z')]);
         assert_eq!(cls.clone().canonicalize(), cls);
     }

     #[test]
     fn class_canon_unordered() {
         let cls = class(&[('x', 'z'), ('a', 'c')]);
         assert_eq!(cls.canonicalize(), class(&[
             ('a', 'c'), ('x', 'z'),
         ]));
     }

     #[test]
     fn class_canon_overlap() {
         let cls = class(&[('x', 'z'), ('w', 'y')]);
         assert_eq!(cls.canonicalize(), class(&[
             ('w', 'z'),
         ]));
     }

     #[test]
     fn class_canon_overlap_many() {
         let cls = class(&[
             ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
             ('m', 'p'), ('l', 's'),
         ]);
         assert_eq!(cls.clone().canonicalize(), class(&[
             ('a', 'j'), ('l', 's'),
         ]));
     }

     #[test]
     fn class_canon_overlap_boundary() {
         let cls = class(&[('x', 'z'), ('u', 'w')]);
         assert_eq!(cls.canonicalize(), class(&[
             ('u', 'z'),
         ]));
     }

     #[test]
     fn class_canon_extreme_edge_case() {
         let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
         assert_eq!(cls.canonicalize(), class(&[
             ('\x00', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_canon_singles() {
         let cls = class(&[('a', 'a'), ('b', 'b')]);
         assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
     }

     #[test]
     fn class_negate_single() {
         let cls = class(&[('a', 'a')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_singles() {
         let cls = class(&[('a', 'a'), ('b', 'b')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_multiples() {
         let cls = class(&[('a', 'c'), ('x', 'z')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_min_scalar() {
         let cls = class(&[('\x00', 'a')]);
         assert_eq!(cls.negate(), class(&[
             ('\x62', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_max_scalar() {
         let cls = class(&[('a', '\u{10FFFF}')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\x60'),
         ]));
     }

     #[test]
     fn class_negate_everything() {
         let cls = class(&[('\x00', '\u{10FFFF}')]);
         assert_eq!(cls.negate(), class(&[]));
     }

     #[test]
     fn class_negate_everything_sans_one() {
         let cls = class(&[
             ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
         ]);
         assert_eq!(cls.negate(), class(&[
             ('\u{10FFFE}', '\u{10FFFE}'),
         ]));
     }

     #[test]
     fn class_negate_surrogates_min() {
         let cls = class(&[('\x00', '\u{D7FF}')]);
         assert_eq!(cls.negate(), class(&[
             ('\u{E000}', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_surrogates_min_edge() {
         let cls = class(&[('\x00', '\u{D7FE}')]);
         assert_eq!(cls.negate(), class(&[
             ('\u{D7FF}', '\u{10FFFF}'),
         ]));
     }

     #[test]
     fn class_negate_surrogates_max() {
         let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\u{D7FF}'),
         ]));
     }

     #[test]
     fn class_negate_surrogates_max_edge() {
         let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
         assert_eq!(cls.negate(), class(&[
             ('\x00', '\u{E000}'),
         ]));
     }

     #[test]
     fn class_canon_overlap_many_case_fold() {
         let cls = class(&[
             ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
             ('M', 'P'), ('L', 'S'), ('c', 'f'),
         ]);
         assert_eq!(cls.case_fold(), class(&[
             ('A', 'J'), ('L', 'S'),
             ('a', 'j'), ('l', 's'),
             ('\u{17F}', '\u{17F}'),
         ]));

         let cls = bclass(&[
             (b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
             (b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
         ]);
         assert_eq!(cls.case_fold(), bclass(&[
             (b'A', b'J'), (b'L', b'S'),
             (b'a', b'j'), (b'l', b's'),
         ]));
     }

     #[test]
     fn class_fold_az() {
         let cls = class(&[('A', 'Z')]);
         assert_eq!(cls.case_fold(), class(&[
             ('A', 'Z'), ('a', 'z'),
             ('\u{17F}', '\u{17F}'),
             ('\u{212A}', '\u{212A}'),
         ]));
         let cls = class(&[('a', 'z')]);
         assert_eq!(cls.case_fold(), class(&[
             ('A', 'Z'), ('a', 'z'),
             ('\u{17F}', '\u{17F}'),
             ('\u{212A}', '\u{212A}'),
         ]));

         let cls = bclass(&[(b'A', b'Z')]);
         assert_eq!(cls.case_fold(), bclass(&[
             (b'A', b'Z'), (b'a', b'z'),
         ]));
         let cls = bclass(&[(b'a', b'z')]);
         assert_eq!(cls.case_fold(), bclass(&[
             (b'A', b'Z'), (b'a', b'z'),
         ]));
     }

     #[test]
     fn class_fold_a_underscore() {
         let cls = class(&[('A', 'A'), ('_', '_')]);
         assert_eq!(cls.clone().canonicalize(), class(&[
             ('A', 'A'), ('_', '_'),
         ]));
         assert_eq!(cls.case_fold(), class(&[
             ('A', 'A'), ('_', '_'), ('a', 'a'),
         ]));

         let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
         assert_eq!(cls.clone().canonicalize(), bclass(&[
             (b'A', b'A'), (b'_', b'_'),
         ]));
         assert_eq!(cls.case_fold(), bclass(&[
             (b'A', b'A'), (b'_', b'_'), (b'a', b'a'),
         ]));
     }

     #[test]
     fn class_fold_a_equals() {
         let cls = class(&[('A', 'A'), ('=', '=')]);
         assert_eq!(cls.clone().canonicalize(), class(&[
             ('=', '='), ('A', 'A'),
         ]));
         assert_eq!(cls.case_fold(), class(&[
             ('=', '='), ('A', 'A'), ('a', 'a'),
         ]));

         let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
         assert_eq!(cls.clone().canonicalize(), bclass(&[
             (b'=', b'='), (b'A', b'A'),
         ]));
         assert_eq!(cls.case_fold(), bclass(&[
             (b'=', b'='), (b'A', b'A'), (b'a', b'a'),
         ]));
     }

     #[test]
     fn class_fold_no_folding_needed() {
         let cls = class(&[('\x00', '\x10')]);
         assert_eq!(cls.case_fold(), class(&[
             ('\x00', '\x10'),
         ]));

         let cls = bclass(&[(b'\x00', b'\x10')]);
         assert_eq!(cls.case_fold(), bclass(&[
             (b'\x00', b'\x10'),
         ]));
     }

     #[test]
     fn class_fold_negated() {
         let cls = class(&[('x', 'x')]);
         assert_eq!(cls.clone().case_fold(), class(&[
             ('X', 'X'), ('x', 'x'),
         ]));
         assert_eq!(cls.case_fold().negate(), class(&[
             ('\x00', 'W'), ('Y', 'w'), ('y', '\u{10FFFF}'),
         ]));

         let cls = bclass(&[(b'x', b'x')]);
         assert_eq!(cls.clone().case_fold(), bclass(&[
             (b'X', b'X'), (b'x', b'x'),
         ]));
         assert_eq!(cls.case_fold().negate(), bclass(&[
             (b'\x00', b'W'), (b'Y', b'w'), (b'y', b'\xff'),
         ]));
     }

     #[test]
     fn class_fold_single_to_multiple() {
         let cls = class(&[('k', 'k')]);
         assert_eq!(cls.case_fold(), class(&[
             ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
         ]));

         let cls = bclass(&[(b'k', b'k')]);
         assert_eq!(cls.case_fold(), bclass(&[
             (b'K', b'K'), (b'k', b'k'),
         ]));
     }

     #[test]
     fn class_fold_at() {
         let cls = class(&[('@', '@')]);
         assert_eq!(cls.clone().canonicalize(), class(&[('@', '@')]));
         assert_eq!(cls.case_fold(), class(&[('@', '@')]));

         let cls = bclass(&[(b'@', b'@')]);
         assert_eq!(cls.clone().canonicalize(), bclass(&[(b'@', b'@')]));
         assert_eq!(cls.case_fold(), bclass(&[(b'@', b'@')]));
     }

     #[test]
     fn roundtrip_class_hypen() {
         let expr = e("[-./]");
         assert_eq!("(?u:[-\\.-/])", expr.to_string());

         let expr = e("(?-u)[-./]");
         assert_eq!("(?-u:[-\\.-/])", expr.to_string());
     }
 }