vendor/regex-syntax-0.3.9/src/parser.rs - toolchain/rustc - Git at Google

 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use std::cmp::{max, min};
 use std::u8;

 use unicode::regex::UNICODE_CLASSES;

 use {
     Expr, Repeater, CharClass, ClassRange,
     CaptureIndex, CaptureName,
     Error, ErrorKind, Result,
 };

 /// Parser state.
 ///
 /// Keeps the entire input in memory and maintains a cursor (char offset).
 ///
 /// It also keeps an expression stack, which is responsible for managing
 /// grouped expressions and flag state.
 #[derive(Debug)]
 pub struct Parser {
     chars: Vec<char>,
     chari: usize,
     stack: Vec<Build>,
     caps: usize,
     names: Vec<String>, // to check for duplicates
     flags: Flags,
 }

 /// Flag state used in the parser.
 #[derive(Clone, Copy, Debug)]
 pub struct Flags {
     /// i
     pub casei: bool,
     /// m
     pub multi: bool,
     /// s
     pub dotnl: bool,
     /// U
     pub swap_greed: bool,
     /// x
     pub ignore_space: bool,
     /// u
     pub unicode: bool,
     /// Not actually a flag, but when disabled, every regex that may not match
     /// UTF-8 exclusively will cause the parser to return an error.
     pub allow_bytes: bool,
 }

 impl Default for Flags {
     fn default() -> Self {
         Flags {
             casei: false,
             multi: false,
             dotnl: false,
             swap_greed: false,
             ignore_space: false,
             unicode: true,
             allow_bytes: false,
         }
     }
 }

 /// An ephemeral type for representing the expression stack.
 ///
 /// Everything on the stack is either a regular expression or a marker
 /// indicating the opening of a group (possibly non-capturing). The opening
 /// of a group copies the current flag state, which is reset on the parser
 /// state once the group closes.
 #[derive(Debug)]
 enum Build {
     Expr(Expr),
     LeftParen {
         i: CaptureIndex,
         name: CaptureName,
         chari: usize,
         old_flags: Flags,
     },
 }

 // Primary expression parsing routines.
 impl Parser {
     pub fn parse(s: &str, flags: Flags) -> Result<Expr> {
         Parser {
             chars: s.chars().collect(),
             chari: 0,
             stack: vec![],
             caps: 0,
             names: vec![],
             flags: flags,
         }.parse_expr()
     }

     // Top-level expression parser.
     //
     // Starts at the beginning of the input and consumes until either the end
     // of input or an error.
     fn parse_expr(mut self) -> Result<Expr> {
         while !self.eof() {
             let build_expr = match self.cur() {
                 '\\' => try!(self.parse_escape()),
                 '|' => { let e = try!(self.alternate()); self.bump(); e }
                 '?' => try!(self.parse_simple_repeat(Repeater::ZeroOrOne)),
                 '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)),
                 '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)),
                 '{' => try!(self.parse_counted_repeat()),
                 '[' => match self.maybe_parse_ascii() {
                     None => try!(self.parse_class()),
                     Some(cls) => Build::Expr(Expr::Class(cls)),
                 },
                 '^' => {
                     if self.flags.multi {
                         self.parse_one(Expr::StartLine)
                     } else {
                         self.parse_one(Expr::StartText)
                     }
                 }
                 '$' => {
                     if self.flags.multi {
                         self.parse_one(Expr::EndLine)
                     } else {
                         self.parse_one(Expr::EndText)
                     }
                 }
                 '.' => {
                     if self.flags.dotnl {
                         if self.flags.unicode {
                             self.parse_one(Expr::AnyChar)
                         } else {
                             if !self.flags.allow_bytes {
                                 return Err(self.err(ErrorKind::InvalidUtf8));
                             }
                             self.parse_one(Expr::AnyByte)
                         }
                     } else {
                         if self.flags.unicode {
                             self.parse_one(Expr::AnyCharNoNL)
                         } else {
                             if !self.flags.allow_bytes {
                                 return Err(self.err(ErrorKind::InvalidUtf8));
                             }
                             self.parse_one(Expr::AnyByteNoNL)
                         }
                     }
                 }
                 '(' => try!(self.parse_group()),
                 ')' => {
                     let (old_flags, e) = try!(self.close_paren());
                     self.bump();
                     self.flags = old_flags;
                     e
                 }
                 _ => {
                     let c = self.bump();
                     try!(self.lit(c))
                 }
             };
             if !build_expr.is_empty() {
                 self.stack.push(build_expr);
             }
         }
         self.finish_concat()
     }

     // Parses an escape sequence, e.g., \Ax
     //
     // Start: `\`
     // End:   `x`
     fn parse_escape(&mut self) -> Result<Build> {
         self.bump();
         if self.eof() {
             return Err(self.err(ErrorKind::UnexpectedEscapeEof));
         }
         let c = self.cur();
         if is_punct(c) {
             let c = self.bump();
             return Ok(try!(self.lit(c)));
         }
         match c {
             'a' => { self.bump(); Ok(try!(self.lit('\x07'))) }
             'f' => { self.bump(); Ok(try!(self.lit('\x0C'))) }
             't' => { self.bump(); Ok(try!(self.lit('\t'))) }
             'n' => { self.bump(); Ok(try!(self.lit('\n'))) }
             'r' => { self.bump(); Ok(try!(self.lit('\r'))) }
             'v' => { self.bump(); Ok(try!(self.lit('\x0B'))) }
             'A' => { self.bump(); Ok(Build::Expr(Expr::StartText)) }
             'z' => { self.bump(); Ok(Build::Expr(Expr::EndText)) }
             'b' => {
                 self.bump();
                 Ok(Build::Expr(if self.flags.unicode {
                     Expr::WordBoundary
                 } else {
                     Expr::WordBoundaryAscii
                 }))
             }
             'B' => {
                 self.bump();
                 Ok(Build::Expr(if self.flags.unicode {
                     Expr::NotWordBoundary
                 } else {
                     Expr::NotWordBoundaryAscii
                 }))
             }
             '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => self.parse_octal(),
             'x' => { self.bump(); self.parse_hex() }
             'p'|'P' => {
                 self.bump();
                 self.parse_unicode_class(c == 'P')
                     .map(|cls| Build::Expr(Expr::Class(cls)))
             }
             'd'|'s'|'w'|'D'|'S'|'W' => {
                 self.bump();
                 Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
             }
             c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
         }
     }

     // Parses a group, e.g., `(abc)`.
     //
     // Start: `(`
     // End:   `a`
     //
     // A more interesting example, `(?P<foo>abc)`.
     //
     // Start: `(`
     // End:   `a`
     fn parse_group(&mut self) -> Result<Build> {
         let chari = self.chari;
         let mut name: CaptureName = None;
         self.bump();
         if self.bump_if("?P<") {
             let n = try!(self.parse_group_name());
             if self.names.iter().any(|n2| n2 == &n) {
                 return Err(self.err(ErrorKind::DuplicateCaptureName(n)));
             }
             self.names.push(n.clone());
             name = Some(n);
         } else if self.bump_if("?") {
             // This can never be capturing. It's either setting flags for
             // the current group, or it's opening a non-capturing group or
             // it's opening a group with a specific set of flags (which is
             // also non-capturing).
             // Anything else is an error.
             return self.parse_group_flags(chari);
         }
         self.caps = checkadd(self.caps, 1);
         Ok(Build::LeftParen {
             i: Some(self.caps),
             name: name,
             chari: chari,
             old_flags: self.flags, // no flags changed if we're here
         })
     }

     // Parses flags (inline or grouped), e.g., `(?s-i:abc)`.
     //
     // Start: `s`
     // End:   `a`
     //
     // Another example, `(?s-i)a`.
     //
     // Start: `s`
     // End:   `a`
     fn parse_group_flags(&mut self, opening_chari: usize) -> Result<Build> {
         let old_flags = self.flags;
         let mut sign = true;
         let mut saw_flag = false;
         loop {
             if self.eof() {
                 // e.g., (?i
                 return Err(self.err(ErrorKind::UnexpectedFlagEof));
             }
             match self.cur() {
                 'i' => { self.flags.casei = sign; saw_flag = true }
                 'm' => { self.flags.multi = sign; saw_flag = true }
                 's' => { self.flags.dotnl = sign; saw_flag = true }
                 'U' => { self.flags.swap_greed = sign; saw_flag = true }
                 'x' => { self.flags.ignore_space = sign; saw_flag = true }
                 'u' => { self.flags.unicode = sign; saw_flag = true }
                 '-' => {
                     if !sign {
                         // e.g., (?-i-s)
                         return Err(self.err(ErrorKind::DoubleFlagNegation));
                     }
                     sign = false;
                     saw_flag = false;
                 }
                 ')' => {
                     if !saw_flag {
                         // e.g., (?)
                         return Err(self.err(ErrorKind::EmptyFlagNegation));
                     }
                     // At this point, we're just changing the flags inside
                     // the current group, which means the old flags have
                     // been saved elsewhere. Our modifications in place are
                     // okey dokey!
                     //
                     // This particular flag expression only has a stateful
                     // impact on a regex's AST, so nothing gets explicitly
                     // added.
                     self.bump();
                     return Ok(Build::Expr(Expr::Empty));
                 }
                 ':' => {
                     if !sign && !saw_flag {
                         // e.g., (?i-:a)
                         // Note that if there's no negation, it's OK not
                         // to see flag, because you end up with a regular
                         // non-capturing group: `(?:a)`.
                         return Err(self.err(ErrorKind::EmptyFlagNegation));
                     }
                     self.bump();
                     return Ok(Build::LeftParen {
                         i: None,
                         name: None,
                         chari: opening_chari,
                         old_flags: old_flags,
                     });
                 }
                 // e.g., (?z:a)
                 c => return Err(self.err(ErrorKind::UnrecognizedFlag(c))),
             }
             self.bump();
         }
     }

     // Parses a group name, e.g., `foo` in `(?P<foo>abc)`.
     //
     // Start: `f`
     // End:   `a`
     fn parse_group_name(&mut self) -> Result<String> {
         let mut name = String::new();
         while !self.eof() && !self.peek_is('>') {
             name.push(self.bump());
         }
         if self.eof() {
             // e.g., (?P<a
             return Err(self.err(ErrorKind::UnclosedCaptureName(name)));
         }
         let all_valid = name.chars().all(is_valid_capture_char);
         match name.chars().next() {
             // e.g., (?P<>a)
             None => Err(self.err(ErrorKind::EmptyCaptureName)),
             Some(c) if (c >= '0' && c <= '9') || !all_valid => {
                 // e.g., (?P<a#>x)
                 // e.g., (?P<1a>x)
                 Err(self.err(ErrorKind::InvalidCaptureName(name)))
             }
             _ => {
                 self.bump(); // for `>`
                 Ok(name)
             }
         }
     }

     // Parses a counted repeition operator, e.g., `a{2,4}?z`.
     //
     // Start: `{`
     // End:   `z`
     fn parse_counted_repeat(&mut self) -> Result<Build> {
         let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., ({5}
         if !e.can_repeat() {
             // e.g., a*{5}
             return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
         }
         self.bump();
         let min = try!(self.parse_decimal(|c| c != ',' && c != '}'));
         let mut max_opt = Some(min);
         if self.bump_if(',') {
             if self.peek_is('}') {
                 max_opt = None;
             } else {
                 let max = try!(self.parse_decimal(|c| c != '}'));
                 if min > max {
                     // e.g., a{2,1}
                     return Err(self.err(ErrorKind::InvalidRepeatRange {
                         min: min,
                         max: max,
                     }));
                 }
                 max_opt = Some(max);
             }
         }
         if !self.bump_if('}') {
             Err(self.err(ErrorKind::UnclosedRepeat))
         } else {
             Ok(Build::Expr(Expr::Repeat {
                 e: Box::new(e),
                 r: Repeater::Range { min: min, max: max_opt },
                 greedy: !self.bump_if('?') ^ self.flags.swap_greed,
             }))
         }
     }

     // Parses a simple repetition operator, e.g., `a+?z`.
     //
     // Start: `+`
     // End:   `z`
     //
     // N.B. "simple" in this context means "not min/max repetition",
     // e.g., `a{1,2}`.
     fn parse_simple_repeat(&mut self, rep: Repeater) -> Result<Build> {
         let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., (*
         if !e.can_repeat() {
             // e.g., a**
             return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
         }
         self.bump();
         Ok(Build::Expr(Expr::Repeat {
             e: Box::new(e),
             r: rep,
             greedy: !self.bump_if('?') ^ self.flags.swap_greed,
         }))
     }

     // Parses a decimal number until the given character, e.g., `a{123,456}`.
     //
     // Start: `1`
     // End:   `,` (where `until == ','`)
     fn parse_decimal<B: Bumpable>(&mut self, until: B) -> Result<u32> {
         match self.bump_get(until) {
             // e.g., a{}
             None => Err(self.err(ErrorKind::MissingBase10)),
             Some(n) => {
                 // e.g., a{xyz
                 // e.g., a{9999999999}
                 let n = n.trim();
                 u32::from_str_radix(n, 10)
                     .map_err(|_| self.err(ErrorKind::InvalidBase10(n.into())))
             }
         }
     }

     // Parses an octal number, up to 3 digits, e.g., `a\123b`
     //
     // Start: `1`
     // End:   `b`
     fn parse_octal(&mut self) -> Result<Build> {
         use std::char;
         let mut i = 0; // counter for limiting octal to 3 digits.
         let n = self.bump_get(|c| { i += 1; i <= 3 && c >= '0' && c <= '7' })
                     .expect("octal string"); // guaranteed at least 1 digit
         // I think both of the following unwraps are impossible to fail.
         // We limit it to a three digit octal number, which maxes out at
         // `0777` or `511` in decimal. Since all digits are in `0...7`, we'll
         // always have a valid `u32` number. Moreover, since all numbers in
         // the range `0...511` are valid Unicode scalar values, it will always
         // be a valid `char`.
         //
         // Hence, we `unwrap` with reckless abandon.
         let n = u32::from_str_radix(&n, 8).ok().expect("valid octal number");
         if !self.flags.unicode {
             return Ok(try!(self.u32_to_one_byte(n)));
         }
         let c = char::from_u32(n).expect("Unicode scalar value");
         Ok(try!(self.lit(c)))
     }

     // Parses a hex number, e.g., `a\x5ab`.
     //
     // Start: `5`
     // End:   `b`
     //
     // And also, `a\x{2603}b`.
     //
     // Start: `{`
     // End:   `b`
     fn parse_hex(&mut self) -> Result<Build> {
         if self.bump_if('{') {
             self.parse_hex_many_digits()
         } else {
             self.parse_hex_two_digits()
         }
     }

     // Parses a many-digit hex number, e.g., `a\x{2603}b`.
     //
     // Start: `2`
     // End:   `b`
     fn parse_hex_many_digits(&mut self) -> Result<Build> {
         use std::char;

         let s = self.bump_get(|c| c != '}').unwrap_or("".into());
         let n = try!(u32::from_str_radix(&s, 16)
                          .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
         if !self.bump_if('}') {
             // e.g., a\x{d
             return Err(self.err(ErrorKind::UnclosedHex));
         }
         if !self.flags.unicode {
             return Ok(try!(self.u32_to_one_byte(n)));
         }
         let c = try!(char::from_u32(n)
                           .ok_or(self.err(ErrorKind::InvalidScalarValue(n))));
         Ok(try!(self.lit(c)))
     }

     // Parses a two-digit hex number, e.g., `a\x5ab`.
     //
     // Start: `5`
     // End:   `b`
     fn parse_hex_two_digits(&mut self) -> Result<Build> {
         use std::char;

         let mut i = 0;
         let s = self.bump_get(|_| { i += 1; i <= 2 }).unwrap_or("".into());
         if s.len() < 2 {
             // e.g., a\x
             // e.g., a\xf
             return Err(self.err(ErrorKind::UnexpectedTwoDigitHexEof));
         }
         let n = try!(u32::from_str_radix(&s, 16)
                          .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
         if !self.flags.unicode {
             return Ok(try!(self.u32_to_one_byte(n)));
         }
         let c = char::from_u32(n).expect("Unicode scalar value");
         Ok(try!(self.lit(c)))
     }

     // Parses a character class, e.g., `[^a-zA-Z0-9]+`.
     //
     // Start: `[`
     // End:   `+`
     fn parse_class(&mut self) -> Result<Build> {
         self.bump();
         let negated = self.bump_if('^');
         let mut class = CharClass::empty();
         while self.bump_if('-') {
             class.ranges.push(ClassRange::one('-'));
         }
         loop {
             if self.eof() {
                 // e.g., [a
                 return Err(self.err(ErrorKind::UnexpectedClassEof));
             }
             match self.cur() {
                 // If no ranges have been added, then `]` is the first
                 // character (sans, perhaps, the `^` symbol), so it should
                 // be interpreted as a `]` instead of a closing class bracket.
                 ']' if class.len() > 0 => { self.bump(); break }
                 '[' => match self.maybe_parse_ascii() {
                     Some(class2) => class.ranges.extend(class2),
                     None => {
                         self.bump();
                         try!(self.parse_class_range(&mut class, '['))
                     }
                 },
                 '\\' => match try!(self.parse_escape()) {
                     Build::Expr(Expr::Class(class2)) => {
                         class.ranges.extend(class2);
                     }
                     Build::Expr(Expr::ClassBytes(class2)) => {
                         for byte_range in class2 {
                             let s = byte_range.start as char;
                             let e = byte_range.end as char;
                             class.ranges.push(ClassRange::new(s, e));
                         }
                     }
                     Build::Expr(Expr::Literal { chars, .. }) => {
                         try!(self.parse_class_range(&mut class, chars[0]));
                     }
                     Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
                         let start = bytes[0] as char;
                         try!(self.parse_class_range(&mut class, start));
                     }
                     Build::Expr(e) => {
                         let err = ErrorKind::InvalidClassEscape(e);
                         return Err(self.err(err));
                     }
                     // Because `parse_escape` can never return `LeftParen`.
                     _ => unreachable!(),
                 },
                 start => {
                     if !self.flags.unicode {
                         let _ = try!(self.codepoint_to_one_byte(start));
                     }
                     self.bump();
                     try!(self.parse_class_range(&mut class, start));
                 }
             }
         }
         class = self.class_transform(negated, class).canonicalize();
         if class.is_empty() {
             return Err(self.err(ErrorKind::EmptyClass));
         }
         Ok(Build::Expr(if self.flags.unicode {
             Expr::Class(class)
         } else {
             Expr::ClassBytes(class.to_byte_class())
         }))
     }

     // Parses a single range in a character class.
     //
     // Since this is a helper for `parse_class`, its signature sticks out.
     // Namely, it requires the start character of the range and the char
     // class to mutate.
     //
     // e.g., `[a-z]`
     //
     // Start: `-` (with start == `a`)
     // End:   `]`
     fn parse_class_range(&mut self, class: &mut CharClass, start: char)
                         -> Result<()> {
         if !self.bump_if('-') {
             // Not a range, so just push a singleton range.
             class.ranges.push(ClassRange::one(start));
             return Ok(());
         }
         if self.eof() {
             // e.g., [a-
             return Err(self.err(ErrorKind::UnexpectedClassEof));
         }
         if self.peek_is(']') {
             // This is the end of the class, so we permit use of `-` as a
             // regular char (just like we do in the beginning).
             class.ranges.push(ClassRange::one(start));
             class.ranges.push(ClassRange::one('-'));
             return Ok(());
         }

         // We have a real range. Just need to check to parse literal and
         // make sure it's a valid range.
         let end = match self.cur() {
             '\\' => match try!(self.parse_escape()) {
                 Build::Expr(Expr::Literal { chars, .. }) => {
                     chars[0]
                 }
                 Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
                     bytes[0] as char
                 }
                 Build::Expr(e) => {
                     return Err(self.err(ErrorKind::InvalidClassEscape(e)));
                 }
                 // Because `parse_escape` can never return `LeftParen`.
                 _ => unreachable!(),
             },
             _ => {
                 let c = self.bump();
                 if !self.flags.unicode {
                     let _ = try!(self.codepoint_to_one_byte(c));
                 }
                 c
             }
         };
         if end < start {
             // e.g., [z-a]
             return Err(self.err(ErrorKind::InvalidClassRange {
                 start: start,
                 end: end,
             }));
         }
         class.ranges.push(ClassRange::new(start, end));
         Ok(())
     }

     // Parses an ASCII class, e.g., `[:alnum:]+`.
     //
     // Start: `[`
     // End:   `+`
     //
     // Also supports negation, e.g., `[:^alnum:]`.
     //
     // This parsing routine is distinct from the others in that it doesn't
     // actually report any errors. Namely, if it fails, then the parser should
     // fall back to parsing a regular class.
     //
     // This method will only make progress in the parser if it succeeds.
     // Otherwise, the input remains where it started.
     fn maybe_parse_ascii(&mut self) -> Option<CharClass> {
         fn parse(p: &mut Parser) -> Option<CharClass> {
             p.bump(); // the `[`
             if !p.bump_if(':') { return None; }
             let negate = p.bump_if('^');
             let name = match p.bump_get(|c| c != ':') {
                 None => return None,
                 Some(name) => name,
             };
             if !p.bump_if(":]") { return None; }
             ascii_class(&name).map(|cls| p.class_transform(negate, cls))
         }
         let start = self.chari;
         match parse(self) {
             None => { self.chari = start; None }
             result => result,
         }
     }

     // Parses a Uncode class name, e.g., `a\pLb`.
     //
     // Start: `L`
     // End:   `b`
     //
     // And also, `a\p{Greek}b`.
     //
     // Start: `{`
     // End:   `b`
     //
     // `negate` is true when the class name is used with `\P`.
     fn parse_unicode_class(&mut self, neg: bool) -> Result<CharClass> {
         let name =
             if self.bump_if('{') {
                 let n = self.bump_get(|c| c != '}').unwrap_or("".into());
                 if n.is_empty() || !self.bump_if('}') {
                     // e.g., \p{Greek
                     return Err(self.err(ErrorKind::UnclosedUnicodeName));
                 }
                 n
             } else {
                 if self.eof() {
                     // e.g., \p
                     return Err(self.err(ErrorKind::UnexpectedEscapeEof));
                 }
                 self.bump().to_string()
             };
         match unicode_class(&name) {
             None => Err(self.err(ErrorKind::UnrecognizedUnicodeClass(name))),
             Some(cls) => {
                 if self.flags.unicode {
                     Ok(self.class_transform(neg, cls))
                 } else {
                     Err(self.err(ErrorKind::UnicodeNotAllowed))
                 }
             }
         }
     }

     // Parses a perl character class with Unicode support.
     //
     // `name` must be one of d, s, w, D, S, W. If not, this function panics.
     //
     // No parser state is changed.
     fn parse_perl_class(&mut self, name: char) -> CharClass {
         use unicode::regex::{PERLD, PERLS, PERLW};
         let (cls, negate) = match (self.flags.unicode, name) {
             (true, 'd') => (raw_class_to_expr(PERLD), false),
             (true, 'D') => (raw_class_to_expr(PERLD), true),
             (true, 's') => (raw_class_to_expr(PERLS), false),
             (true, 'S') => (raw_class_to_expr(PERLS), true),
             (true, 'w') => (raw_class_to_expr(PERLW), false),
             (true, 'W') => (raw_class_to_expr(PERLW), true),
             (false, 'd') => (ascii_class("digit").unwrap(), false),
             (false, 'D') => (ascii_class("digit").unwrap(), true),
             (false, 's') => (ascii_class("space").unwrap(), false),
             (false, 'S') => (ascii_class("space").unwrap(), true),
             (false, 'w') => (ascii_class("word").unwrap(), false),
             (false, 'W') => (ascii_class("word").unwrap(), true),
             _ => unreachable!(),
         };
         self.class_transform(negate, cls)
     }

     // Always bump to the next input and return the given expression as a
     // `Build`.
     //
     // This is mostly for convenience when the surrounding context implies
     // that the next character corresponds to the given expression.
     fn parse_one(&mut self, e: Expr) -> Build {
         self.bump();
         Build::Expr(e)
     }
 }

 // Auxiliary helper methods.
 impl Parser {
     fn chars(&self) -> Chars {
         Chars::new(&self.chars[self.chari..], self.flags.ignore_space)
     }

     fn bump(&mut self) -> char {
         let c = self.cur();
         self.chari = checkadd(self.chari, self.chars().next_count());
         c
     }

     fn cur(&self) -> char { self.chars().next().unwrap() }

     fn eof(&self) -> bool { self.chars().next().is_none() }

     fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
         let n = s.match_end(self);
         if n == 0 {
             None
         } else {
             let end = checkadd(self.chari, n);
             let s = self.chars[self.chari..end]
                         .iter().cloned().collect::<String>();
             self.chari = end;
             Some(s)
         }
     }

     fn bump_if<B: Bumpable>(&mut self, s: B) -> bool {
         let n = s.match_end(self);
         if n == 0 {
             false
         } else {
             self.chari = checkadd(self.chari, n);
             true
         }
     }

     fn peek_is<B: Bumpable>(&self, s: B) -> bool {
         s.match_end(self) > 0
     }

     fn err(&self, kind: ErrorKind) -> Error {
         self.errat(self.chari, kind)
     }

     fn errat(&self, pos: usize, kind: ErrorKind) -> Error {
         Error { pos: pos, surround: self.windowat(pos), kind: kind }
     }

     fn windowat(&self, pos: usize) -> String {
         let s = max(5, pos) - 5;
         let e = min(self.chars.len(), checkadd(pos, 5));
         self.chars[s..e].iter().cloned().collect()
     }

     fn pop(&mut self, expected: ErrorKind) -> Result<Expr> {
         match self.stack.pop() {
             None | Some(Build::LeftParen{..}) => Err(self.err(expected)),
             Some(Build::Expr(e)) => Ok(e),
         }
     }

     // If the current context calls for case insensitivity, then apply
     // case folding. Similarly, if `negate` is `true`, then negate the
     // class. (Negation always proceeds case folding.)
     fn class_transform(&self, negate: bool, mut cls: CharClass) -> CharClass {
         if self.flags.casei {
             cls = cls.case_fold();
         }
         if negate {
             cls = cls.negate();
         }
         cls
     }

     // Translates a Unicode codepoint into a single UTF-8 byte, and returns an
     // error if it's not possible.
     //
     // This will panic if self.flags.unicode == true.
     fn codepoint_to_one_byte(&self, c: char) -> Result<u8> {
         assert!(!self.flags.unicode);
         let bytes = c.to_string().as_bytes().to_owned();
         if bytes.len() > 1 {
             return Err(self.err(ErrorKind::UnicodeNotAllowed));
         }
         Ok(bytes[0])
     }

     // Creates a new byte literal from a single byte.
     //
     // If the given number can't fit into a single byte, then it is assumed
     // to be a Unicode codepoint and an error is returned.
     //
     // This should only be called when the bytes flag is enabled.
     fn u32_to_one_byte(&self, b: u32) -> Result<Build> {
         assert!(!self.flags.unicode);
         if b > u8::MAX as u32 {
             Err(self.err(ErrorKind::UnicodeNotAllowed))
         } else if !self.flags.allow_bytes && b > 0x7F {
             Err(self.err(ErrorKind::InvalidUtf8))
         } else {
             Ok(Build::Expr(Expr::LiteralBytes {
                 bytes: vec![b as u8],
                 casei: self.flags.casei,
             }))
         }
     }

     // Creates a new literal expr from a Unicode codepoint.
     //
     // Creates a byte literal if the `bytes` flag is set.
     fn lit(&self, c: char) -> Result<Build> {
         Ok(Build::Expr(if self.flags.unicode {
             Expr::Literal {
                 chars: vec![c],
                 casei: self.flags.casei,
             }
         } else {
             Expr::LiteralBytes {
                 bytes: vec![try!(self.codepoint_to_one_byte(c))],
                 casei: self.flags.casei,
             }
         }))
     }
 }

 struct Chars<'a> {
     chars: &'a [char],
     cur: usize,
     ignore_space: bool,
 }

 impl<'a> Iterator for Chars<'a> {
     type Item = char;
     fn next(&mut self) -> Option<char> {
         if !self.ignore_space {
             let x = self.c();
             self.advance();
             return x;
         }
         while let Some(c) = self.c() {
             self.advance();
             match c {
                 '\\' => return match self.c() {
                             Some('#') => {self.advance(); Some('#')}
                             _ => Some('\\')
                         },
                 '#'  => loop {
                             match self.c() {
                                 Some(c) => {
                                     self.advance();
                                     if c == '\n' {
                                         break;
                                     }
                                 },
                                 None => return None
                             }
                         },
                 _    => if !c.is_whitespace() {return Some(c);}
             }
         }
         None
     }
 }

 impl<'a> Chars<'a> {
     fn new(chars: &[char], ignore_space: bool) -> Chars {
         Chars {
             chars: chars,
             cur: 0,
             ignore_space: ignore_space,
         }
     }

     fn c(&self) -> Option<char> {
         self.chars.get(self.cur).map(|&c| c)
     }

     fn advance(&mut self) {
         self.cur = checkadd(self.cur, 1);
     }

     fn next_count(&mut self) -> usize {
         self.next();
         self.cur
     }
 }

 // Auxiliary methods for manipulating the expression stack.
 impl Parser {
     // Called whenever an alternate (`|`) is found.
     //
     // This pops the expression stack until:
     //
     //  1. The stack is empty. Pushes an alternation with one arm.
     //  2. An opening parenthesis is found. Leave the parenthesis
     //     on the stack and push an alternation with one arm.
     //  3. An alternate (`|`) is found. Pop the existing alternation,
     //     add an arm and push the modified alternation.
     //
     // Each "arm" in the above corresponds to the concatenation of all
     // popped expressions.
     //
     // In the first two cases, the stack is left in an invalid state
     // because an alternation with one arm is not allowed. This
     // particular state will be detected by `finish_concat` and an
     // error will be reported.
     //
     // In none of the cases is an empty arm allowed. If an empty arm
     // is found, an error is reported.
     fn alternate(&mut self) -> Result<Build> {
         let mut concat = vec![];
         let alts = |es| Ok(Build::Expr(Expr::Alternate(es)));
         loop {
             match self.stack.pop() {
                 None => {
                     if concat.is_empty() {
                         // e.g., |a
                         return Err(self.err(ErrorKind::EmptyAlternate));
                     }
                     return alts(vec![rev_concat(concat)]);
                 }
                 Some(e @ Build::LeftParen{..}) => {
                     if concat.is_empty() {
                         // e.g., (|a)
                         return Err(self.err(ErrorKind::EmptyAlternate));
                     }
                     self.stack.push(e);
                     return alts(vec![rev_concat(concat)]);
                 }
                 Some(Build::Expr(Expr::Alternate(mut es))) => {
                     if concat.is_empty() {
                         // e.g., a||
                         return Err(self.err(ErrorKind::EmptyAlternate));
                     }
                     es.push(rev_concat(concat));
                     return alts(es);
                 }
                 Some(Build::Expr(e)) => { concat.push(e); }
             }
         }
     }

     // Called whenever a closing parenthesis (`)`) is found.
     //
     // This pops the expression stack until:
     //
     //  1. The stack is empty. An error is reported because this
     //     indicates an unopened parenthesis.
     //  2. An opening parenthesis is found. Pop the opening parenthesis
     //     and push a `Group` expression.
     //  3. An alternate (`|`) is found. Pop the existing alternation
     //     and an arm to it in place. Pop one more item from the stack.
     //     If the stack was empty, then report an unopened parenthesis
     //     error, otherwise assume it is an opening parenthesis and
     //     push a `Group` expression with the popped alternation.
     //     (We can assume this is an opening parenthesis because an
     //     alternation either corresponds to the entire Regex or it
     //     corresponds to an entire group. This is guaranteed by the
     //     `alternate` method.)
     //
     // Each "arm" in the above corresponds to the concatenation of all
     // popped expressions.
     //
     // Empty arms nor empty groups are allowed.
     fn close_paren(&mut self) -> Result<(Flags, Build)> {
         let mut concat = vec![];
         loop {
             match self.stack.pop() {
                 // e.g., )
                 None => return Err(self.err(ErrorKind::UnopenedParen)),
                 Some(Build::LeftParen { i, name, old_flags, .. }) => {
                     if concat.is_empty() {
                         // e.g., ()
                         return Err(self.err(ErrorKind::EmptyGroup));
                     }
                     return Ok((old_flags, Build::Expr(Expr::Group {
                         e: Box::new(rev_concat(concat)),
                         i: i,
                         name: name,
                     })));
                 }
                 Some(Build::Expr(Expr::Alternate(mut es))) => {
                     if concat.is_empty() {
                         // e.g., (a|)
                         return Err(self.err(ErrorKind::EmptyAlternate));
                     }
                     es.push(rev_concat(concat));
                     match self.stack.pop() {
                         // e.g., a|b)
                         None => return Err(self.err(ErrorKind::UnopenedParen)),
                         Some(Build::Expr(_)) => unreachable!(),
                         Some(Build::LeftParen { i, name, old_flags, .. }) => {
                             return Ok((old_flags, Build::Expr(Expr::Group {
                                 e: Box::new(Expr::Alternate(es)),
                                 i: i,
                                 name: name,
                             })));
                         }
                     }
                 }
                 Some(Build::Expr(e)) => { concat.push(e); }
             }
         }
     }

     // Called only when the parser reaches the end of input.
     //
     // This pops the expression stack until:
     //
     //  1. The stack is empty. Return concatenation of popped
     //     expressions. This concatenation may be empty!
     //  2. An alternation is found. Pop the alternation and push
     //     a new arm. Return the alternation as the entire Regex.
     //     After this, the stack must be empty, or else there is
     //     an unclosed paren.
     //
     // If an opening parenthesis is popped, then an error is
     // returned since it indicates an unclosed parenthesis.
     fn finish_concat(&mut self) -> Result<Expr> {
         let mut concat = vec![];
         loop {
             match self.stack.pop() {
                 None => { return Ok(rev_concat(concat)); }
                 Some(Build::LeftParen{ chari, ..}) => {
                     // e.g., a(b
                     return Err(self.errat(chari, ErrorKind::UnclosedParen));
                 }
                 Some(Build::Expr(Expr::Alternate(mut es))) => {
                     if concat.is_empty() {
                         // e.g., a|
                         return Err(self.err(ErrorKind::EmptyAlternate));
                     }
                     es.push(rev_concat(concat));
                     // Make sure there are no opening parens remaining.
                     match self.stack.pop() {
                         None => return Ok(Expr::Alternate(es)),
                         Some(Build::LeftParen{ chari, ..}) => {
                             // e.g., (a|b
                             return Err(self.errat(
                                 chari, ErrorKind::UnclosedParen));
                         }
                         e => unreachable!("{:?}", e),
                     }
                 }
                 Some(Build::Expr(e)) => { concat.push(e); }
             }
         }
     }
 }

 impl Build {
     fn is_empty(&self) -> bool {
         match *self {
             Build::Expr(Expr::Empty) => true,
             _ => false,
         }
     }
 }

 // Make it ergonomic to conditionally bump the parser.
 // i.e., `bump_if('a')` or `bump_if("abc")`.
 trait Bumpable {
     fn match_end(self, p: &Parser) -> usize;
 }

 impl Bumpable for char {
     fn match_end(self, p: &Parser) -> usize {
         let mut chars = p.chars();
         if chars.next().map(|c| c == self).unwrap_or(false) {
             chars.cur
         } else {
             0
         }
     }
 }

 impl<'a> Bumpable for &'a str {
     fn match_end(self, p: &Parser) -> usize {
         let mut search = self.chars();
         let mut rest = p.chars();
         let mut count = 0;
         loop {
             match (rest.next(), search.next()) {
                 (Some(c1), Some(c2)) if c1 == c2 => count = rest.cur,
                 (_, None) => return count,
                 _ => return 0,
             }
         }
     }
 }

 impl<F: FnMut(char) -> bool> Bumpable for F {
     fn match_end(mut self, p: &Parser) -> usize {
         let mut chars = p.chars();
         let mut count = 0;
         while let Some(c) = chars.next() {
             if !self(c) {
                 break
             }
             count = chars.cur;
         }
         count
     }
 }

 // Turn a sequence of expressions into a concatenation.
 // This only uses `Concat` if there are 2 or more expressions.
 fn rev_concat(mut exprs: Vec<Expr>) -> Expr {
     if exprs.len() == 0 {
         Expr::Empty
     } else if exprs.len() == 1 {
         exprs.pop().unwrap()
     } else {
         exprs.reverse();
         Expr::Concat(exprs)
     }
 }

 // Returns true if and only if the given character is allowed in a capture
 // name. Note that the first char of a capture name must not be numeric.
 fn is_valid_capture_char(c: char) -> bool {
     c == '_' || (c >= '0' && c <= '9')
     || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 }

 /// Returns true if the give character has significance in a regex.
 pub fn is_punct(c: char) -> bool {
     match c {
         '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
         '[' | ']' | '{' | '}' | '^' | '$' | '#' => true,
         _ => false,
     }
 }

 fn checkadd(x: usize, y: usize) -> usize {
     x.checked_add(y).expect("regex length overflow")
 }

 fn unicode_class(name: &str) -> Option<CharClass> {
     UNICODE_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
         raw_class_to_expr(UNICODE_CLASSES[i].1)
     })
 }

 fn ascii_class(name: &str) -> Option<CharClass> {
     ASCII_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
         raw_class_to_expr(ASCII_CLASSES[i].1)
     })
 }

 fn raw_class_to_expr(raw: &[(char, char)]) -> CharClass {
     let range = |&(s, e)| ClassRange { start: s, end: e };
     CharClass::new(raw.iter().map(range).collect())
 }

 type Class = &'static [(char, char)];
 type NamedClasses = &'static [(&'static str, Class)];

 const ASCII_CLASSES: NamedClasses = &[
     // Classes must be in alphabetical order so that bsearch works.
     // [:alnum:]      alphanumeric (== [0-9A-Za-z])
     // [:alpha:]      alphabetic (== [A-Za-z])
     // [:ascii:]      ASCII (== [\x00-\x7F])
     // [:blank:]      blank (== [\t ])
     // [:cntrl:]      control (== [\x00-\x1F\x7F])
     // [:digit:]      digits (== [0-9])
     // [:graph:]      graphical (== [!-~])
     // [:lower:]      lower case (== [a-z])
     // [:print:]      printable (== [ -~] == [ [:graph:]])
     // [:punct:]      punctuation (== [!-/:-@[-`{-~])
     // [:space:]      whitespace (== [\t\n\v\f\r ])
     // [:upper:]      upper case (== [A-Z])
     // [:word:]       word characters (== [0-9A-Za-z_])
     // [:xdigit:]     hex digit (== [0-9A-Fa-f])
     // Taken from: http://golang.org/pkg/regex/syntax/
     ("alnum", &ALNUM),
     ("alpha", &ALPHA),
     ("ascii", &ASCII),
     ("blank", &BLANK),
     ("cntrl", &CNTRL),
     ("digit", &DIGIT),
     ("graph", &GRAPH),
     ("lower", &LOWER),
     ("print", &PRINT),
     ("punct", &PUNCT),
     ("space", &SPACE),
     ("upper", &UPPER),
     ("word", &WORD),
     ("xdigit", &XDIGIT),
 ];

 const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
 const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
 const ASCII: Class = &[('\x00', '\x7F')];
 const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
 const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
 const DIGIT: Class = &[('0', '9')];
 const GRAPH: Class = &[('!', '~')];
 const LOWER: Class = &[('a', 'z')];
 const PRINT: Class = &[(' ', '~')];
 const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
 const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
                        ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
 const UPPER: Class = &[('A', 'Z')];
 const WORD: Class = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
 const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];

 #[cfg(test)]
 mod tests {
     use {
         CharClass, ClassRange, ByteClass, ByteRange,
         Expr, Repeater,
         ErrorKind,
     };
     use unicode::regex::{PERLD, PERLS, PERLW};
     use super::{LOWER, UPPER, WORD, Flags, Parser, ascii_class};

     static YI: &'static [(char, char)] = &[
         ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
     ];

     fn p(s: &str) -> Expr { Parser::parse(s, Flags::default()).unwrap() }
     fn pf(s: &str, flags: Flags) -> Expr { Parser::parse(s, flags).unwrap() }
     fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
     fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
     fn b<T>(v: T) -> Box<T> { Box::new(v) }
     fn c(es: &[Expr]) -> Expr { Expr::Concat(es.to_vec()) }

     fn pb(s: &str) -> Expr {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         Parser::parse(s, flags).unwrap()
     }

     fn blit(b: u8) -> Expr {
         Expr::LiteralBytes {
             bytes: vec![b],
             casei: false,
         }
     }

     fn bliti(b: u8) -> Expr {
         Expr::LiteralBytes {
             bytes: vec![b],
             casei: true,
         }
     }

     fn class(ranges: &[(char, char)]) -> CharClass {
         let ranges = ranges.iter().cloned()
                            .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
         CharClass::new(ranges)
     }

     fn classes(classes: &[&[(char, char)]]) -> CharClass {
         let mut cls = CharClass::empty();
         for &ranges in classes {
             cls.ranges.extend(class(ranges));
         }
         cls.canonicalize()
     }

     fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
         let ranges = ranges.iter().cloned()
                            .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
         ByteClass::new(ranges)
     }

     fn asciid() -> CharClass {
         ascii_class("digit").unwrap()
     }

     fn asciis() -> CharClass {
         ascii_class("space").unwrap()
     }

     fn asciiw() -> CharClass {
         ascii_class("word").unwrap()
     }

     fn asciid_bytes() -> ByteClass {
         asciid().to_byte_class()
     }

     fn asciis_bytes() -> ByteClass {
         asciis().to_byte_class()
     }

     fn asciiw_bytes() -> ByteClass {
         asciiw().to_byte_class()
     }

     #[test]
     fn empty() {
         assert_eq!(p(""), Expr::Empty);
     }

     #[test]
     fn literal() {
         assert_eq!(p("a"), lit('a'));
         assert_eq!(pb("(?-u)a"), blit(b'a'));
     }

     #[test]
     fn literal_string() {
         assert_eq!(p("ab"), Expr::Concat(vec![lit('a'), lit('b')]));
         assert_eq!(pb("(?-u)ab"), Expr::Concat(vec![blit(b'a'), blit(b'b')]));
     }

     #[test]
     fn start_literal() {
         assert_eq!(p("^a"), Expr::Concat(vec![
             Expr::StartText,
             Expr::Literal { chars: vec!['a'], casei: false },
         ]));
     }

     #[test]
     fn repeat_zero_or_one_greedy() {
         assert_eq!(p("a?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrOne,
             greedy: true,
         });
     }

     #[test]
     fn repeat_zero_or_one_greedy_concat() {
         assert_eq!(p("ab?"), Expr::Concat(vec![
             lit('a'),
             Expr::Repeat {
                 e: b(lit('b')),
                 r: Repeater::ZeroOrOne,
                 greedy: true,
             },
         ]));
     }

     #[test]
     fn repeat_zero_or_one_nongreedy() {
         assert_eq!(p("a??"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrOne,
             greedy: false,
         });
     }

     #[test]
     fn repeat_one_or_more_greedy() {
         assert_eq!(p("a+"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::OneOrMore,
             greedy: true,
         });
     }

     #[test]
     fn repeat_one_or_more_nongreedy() {
         assert_eq!(p("a+?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::OneOrMore,
             greedy: false,
         });
     }

     #[test]
     fn repeat_zero_or_more_greedy() {
         assert_eq!(p("a*"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrMore,
             greedy: true,
         });
     }

     #[test]
     fn repeat_zero_or_more_nongreedy() {
         assert_eq!(p("a*?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrMore,
             greedy: false,
         });
     }

     #[test]
     fn repeat_counted_exact() {
         assert_eq!(p("a{5}"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(5) },
             greedy: true,
         });
     }

     #[test]
     fn repeat_counted_min() {
         assert_eq!(p("a{5,}"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: None },
             greedy: true,
         });
     }

     #[test]
     fn repeat_counted_min_max() {
         assert_eq!(p("a{5,10}"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(10) },
             greedy: true,
         });
     }

     #[test]
     fn repeat_counted_exact_nongreedy() {
         assert_eq!(p("a{5}?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(5) },
             greedy: false,
         });
     }

     #[test]
     fn repeat_counted_min_nongreedy() {
         assert_eq!(p("a{5,}?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: None },
             greedy: false,
         });
     }

     #[test]
     fn repeat_counted_min_max_nongreedy() {
         assert_eq!(p("a{5,10}?"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(10) },
             greedy: false,
         });
     }

     #[test]
     fn repeat_counted_whitespace() {
         assert_eq!(p("a{ 5   }"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(5) },
             greedy: true,
         });
         assert_eq!(p("a{ 5 , 10 }"), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::Range { min: 5, max: Some(10) },
             greedy: true,
         });
     }

     #[test]
     fn group_literal() {
         assert_eq!(p("(a)"), Expr::Group {
             e: b(lit('a')),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn group_literal_concat() {
         assert_eq!(p("(ab)"), Expr::Group {
             e: b(c(&[lit('a'), lit('b')])),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn alt_two() {
         assert_eq!(p("a|b"), Expr::Alternate(vec![lit('a'), lit('b')]));
     }

     #[test]
     fn alt_many() {
         assert_eq!(p("a|b|c"), Expr::Alternate(vec![
             lit('a'), lit('b'), lit('c'),
         ]));
     }

     #[test]
     fn alt_many_concat() {
         assert_eq!(p("ab|bc|cd"), Expr::Alternate(vec![
             c(&[lit('a'), lit('b')]),
             c(&[lit('b'), lit('c')]),
             c(&[lit('c'), lit('d')]),
         ]));
     }

     #[test]
     fn alt_group_two() {
         assert_eq!(p("(a|b)"), Expr::Group {
             e: b(Expr::Alternate(vec![lit('a'), lit('b')])),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn alt_group_many() {
         assert_eq!(p("(a|b|c)"), Expr::Group {
             e: b(Expr::Alternate(vec![lit('a'), lit('b'), lit('c')])),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn alt_group_many_concat() {
         assert_eq!(p("(ab|bc|cd)"), Expr::Group {
             e: b(Expr::Alternate(vec![
                 c(&[lit('a'), lit('b')]),
                 c(&[lit('b'), lit('c')]),
                 c(&[lit('c'), lit('d')]),
             ])),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn alt_group_nested() {
         assert_eq!(p("(ab|(bc|(cd)))"), Expr::Group {
             e: b(Expr::Alternate(vec![
                 c(&[lit('a'), lit('b')]),
                 Expr::Group {
                     e: b(Expr::Alternate(vec![
                         c(&[lit('b'), lit('c')]),
                         Expr::Group {
                             e: b(c(&[lit('c'), lit('d')])),
                             i: Some(3),
                             name: None,
                         }
                     ])),
                     i: Some(2),
                     name: None,
                 },
             ])),
             i: Some(1),
             name: None,
         });
     }

     #[test]
     fn group_name() {
         assert_eq!(p("(?P<foo>a)"), Expr::Group {
             e: b(lit('a')),
             i: Some(1),
             name: Some("foo".into()),
         });
     }

     #[test]
     fn group_no_capture() {
         assert_eq!(p("(?:a)"), Expr::Group {
             e: b(lit('a')),
             i: None,
             name: None,
         });
     }

     #[test]
     fn group_flags() {
         assert_eq!(p("(?i:a)"), Expr::Group {
             e: b(liti('a')),
             i: None,
             name: None,
         });
         assert_eq!(pb("(?i-u:a)"), Expr::Group {
             e: b(bliti(b'a')),
             i: None,
             name: None,
         });
     }

     #[test]
     fn group_flags_returned() {
         assert_eq!(p("(?i:a)a"), c(&[
             Expr::Group {
                 e: b(liti('a')),
                 i: None,
                 name: None,
             },
             lit('a'),
         ]));
         assert_eq!(pb("(?i-u:a)a"), c(&[
             Expr::Group {
                 e: b(bliti(b'a')),
                 i: None,
                 name: None,
             },
             lit('a'),
         ]));
     }

     #[test]
     fn group_flags_retained() {
         assert_eq!(p("(?i)(?-i:a)a"), c(&[
             Expr::Group {
                 e: b(lit('a')),
                 i: None,
                 name: None,
             },
             liti('a'),
         ]));
         assert_eq!(pb("(?i-u)(?u-i:a)a"), c(&[
             Expr::Group {
                 e: b(lit('a')),
                 i: None,
                 name: None,
             },
             bliti(b'a'),
         ]));
     }

     #[test]
     fn flags_inline() {
         assert_eq!(p("(?i)a"), liti('a'));
     }

     #[test]
     fn flags_inline_multiple() {
         assert_eq!(p("(?is)a."), c(&[liti('a'), Expr::AnyChar]));
     }

     #[test]
     fn flags_inline_multiline() {
         assert_eq!(p("(?m)^(?-m)$"), c(&[Expr::StartLine, Expr::EndText]));
     }

     #[test]
     fn flags_inline_swap_greed() {
         assert_eq!(p("(?U)a*a*?(?i-U)a*a*?"), c(&[
             Expr::Repeat {
                 e: b(lit('a')),
                 r: Repeater::ZeroOrMore,
                 greedy: false,
             },
             Expr::Repeat {
                 e: b(lit('a')),
                 r: Repeater::ZeroOrMore,
                 greedy: true,
             },
             Expr::Repeat {
                 e: b(liti('a')),
                 r: Repeater::ZeroOrMore,
                 greedy: true,
             },
             Expr::Repeat {
                 e: b(liti('a')),
                 r: Repeater::ZeroOrMore,
                 greedy: false,
             },
         ]));
     }

     #[test]
     fn flags_inline_multiple_negate_one() {
         assert_eq!(p("(?is)a.(?i-s)a."), c(&[
             liti('a'), Expr::AnyChar, liti('a'), Expr::AnyCharNoNL,
         ]));
     }

     #[test]
     fn any_byte() {
         assert_eq!(
             pb("(?-u).(?u)."), c(&[Expr::AnyByteNoNL, Expr::AnyCharNoNL]));
         assert_eq!(
             pb("(?s)(?-u).(?u)."), c(&[Expr::AnyByte, Expr::AnyChar]));
     }

     #[test]
     fn flags_inline_negate() {
         assert_eq!(p("(?i)a(?-i)a"), c(&[liti('a'), lit('a')]));
     }

     #[test]
     fn flags_group_inline() {
         assert_eq!(p("(a(?i)a)a"), c(&[
             Expr::Group {
                 e: b(c(&[lit('a'), liti('a')])),
                 i: Some(1),
                 name: None,
             },
             lit('a'),
         ]));
     }

     #[test]
     fn flags_group_inline_retain() {
         assert_eq!(p("(?i)((?-i)a)a"), c(&[
             Expr::Group {
                 e: b(lit('a')),
                 i: Some(1),
                 name: None,
             },
             liti('a'),
         ]));
     }

     #[test]
     fn flags_default_casei() {
         let flags = Flags { casei: true, .. Flags::default() };
         assert_eq!(pf("a", flags), liti('a'));
     }

     #[test]
     fn flags_default_multi() {
         let flags = Flags { multi: true, .. Flags::default() };
         assert_eq!(pf("^", flags), Expr::StartLine);
     }

     #[test]
     fn flags_default_dotnl() {
         let flags = Flags { dotnl: true, .. Flags::default() };
         assert_eq!(pf(".", flags), Expr::AnyChar);
     }

     #[test]
     fn flags_default_swap_greed() {
         let flags = Flags { swap_greed: true, .. Flags::default() };
         assert_eq!(pf("a+", flags), Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::OneOrMore,
             greedy: false,
         });
     }

     #[test]
     fn flags_default_ignore_space() {
         let flags = Flags { ignore_space: true, .. Flags::default() };
         assert_eq!(pf(" a ", flags), lit('a'));
     }

     #[test]
     fn escape_simple() {
         assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
             lit('\x07'), lit('\x0C'), lit('\t'),
             lit('\n'), lit('\r'), lit('\x0B'),
         ]));
     }

     #[test]
     fn escape_boundaries() {
         assert_eq!(p(r"\A\z\b\B"), c(&[
             Expr::StartText, Expr::EndText,
             Expr::WordBoundary, Expr::NotWordBoundary,
         ]));
         assert_eq!(pb(r"(?-u)\b\B"), c(&[
             Expr::WordBoundaryAscii, Expr::NotWordBoundaryAscii,
         ]));
     }

     #[test]
     fn escape_punctuation() {
         assert_eq!(p(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
             lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
             lit('('), lit(')'), lit('|'), lit('['), lit(']'),
             lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
         ]));
     }

     #[test]
     fn escape_octal() {
         assert_eq!(p(r"\123"), lit('S'));
         assert_eq!(p(r"\1234"), c(&[lit('S'), lit('4')]));

         assert_eq!(pb(r"(?-u)\377"), blit(0xFF));
     }

     #[test]
     fn escape_hex2() {
         assert_eq!(p(r"\x53"), lit('S'));
         assert_eq!(p(r"\x534"), c(&[lit('S'), lit('4')]));

         assert_eq!(pb(r"(?-u)\xff"), blit(0xFF));
         assert_eq!(pb(r"(?-u)\x00"), blit(0x0));
         assert_eq!(pb(r"(?-u)[\x00]"),
                    Expr::ClassBytes(bclass(&[(b'\x00', b'\x00')])));
         assert_eq!(pb(r"(?-u)[^\x00]"),
                    Expr::ClassBytes(bclass(&[(b'\x01', b'\xFF')])));
     }

     #[test]
     fn escape_hex() {
         assert_eq!(p(r"\x{53}"), lit('S'));
         assert_eq!(p(r"\x{53}4"), c(&[lit('S'), lit('4')]));
         assert_eq!(p(r"\x{2603}"), lit('\u{2603}'));

         assert_eq!(pb(r"(?-u)\x{00FF}"), blit(0xFF));
     }

     #[test]
     fn escape_unicode_name() {
         assert_eq!(p(r"\p{Yi}"), Expr::Class(class(YI)));
     }

     #[test]
     fn escape_unicode_letter() {
         assert_eq!(p(r"\pZ"), Expr::Class(class(&[
             ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
             ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
             ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
             ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
         ])));
     }

     #[test]
     fn escape_unicode_name_case_fold() {
         assert_eq!(p(r"(?i)\p{Yi}"), Expr::Class(class(YI).case_fold()));
     }

     #[test]
     fn escape_unicode_letter_case_fold() {
         assert_eq!(p(r"(?i)\pZ"), Expr::Class(class(&[
             ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
             ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
             ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
             ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
         ]).case_fold()));
     }

     #[test]
     fn escape_unicode_name_negate() {
         assert_eq!(p(r"\P{Yi}"), Expr::Class(class(YI).negate()));
     }

     #[test]
     fn escape_unicode_letter_negate() {
         assert_eq!(p(r"\PZ"), Expr::Class(class(&[
             ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
             ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
             ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
             ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
         ]).negate()));
     }

     #[test]
     fn escape_unicode_name_negate_case_fold() {
         assert_eq!(p(r"(?i)\P{Yi}"),
                    Expr::Class(class(YI).negate().case_fold()));
     }

     #[test]
     fn escape_unicode_letter_negate_case_fold() {
         assert_eq!(p(r"(?i)\PZ"), Expr::Class(class(&[
             ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
             ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
             ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
             ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
         ]).negate().case_fold()));
     }

     #[test]
     fn escape_perl_d() {
         assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
         assert_eq!(pb(r"(?-u)\d"), Expr::Class(asciid()));
     }

     #[test]
     fn escape_perl_s() {
         assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
         assert_eq!(pb(r"(?-u)\s"), Expr::Class(asciis()));
     }

     #[test]
     fn escape_perl_w() {
         assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
         assert_eq!(pb(r"(?-u)\w"), Expr::Class(asciiw()));
     }

     #[test]
     fn escape_perl_d_negate() {
         assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
         assert_eq!(pb(r"(?-u)\D"), Expr::Class(asciid().negate()));
     }

     #[test]
     fn escape_perl_s_negate() {
         assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
         assert_eq!(pb(r"(?-u)\S"), Expr::Class(asciis().negate()));
     }

     #[test]
     fn escape_perl_w_negate() {
         assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
         assert_eq!(pb(r"(?-u)\W"), Expr::Class(asciiw().negate()));
     }

     #[test]
     fn escape_perl_d_case_fold() {
         assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
         assert_eq!(pb(r"(?i-u)\d"), Expr::Class(asciid().case_fold()));
     }

     #[test]
     fn escape_perl_s_case_fold() {
         assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
         assert_eq!(pb(r"(?i-u)\s"), Expr::Class(asciis().case_fold()));
     }

     #[test]
     fn escape_perl_w_case_fold() {
         assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
         assert_eq!(pb(r"(?i-u)\w"), Expr::Class(asciiw().case_fold()));
     }

     #[test]
     fn escape_perl_d_case_fold_negate() {
         assert_eq!(p(r"(?i)\D"),
                    Expr::Class(class(PERLD).case_fold().negate()));
         let bytes = asciid().case_fold().negate();
         assert_eq!(pb(r"(?i-u)\D"), Expr::Class(bytes));
     }

     #[test]
     fn escape_perl_s_case_fold_negate() {
         assert_eq!(p(r"(?i)\S"),
                    Expr::Class(class(PERLS).case_fold().negate()));
         let bytes = asciis().case_fold().negate();
         assert_eq!(pb(r"(?i-u)\S"), Expr::Class(bytes));
     }

     #[test]
     fn escape_perl_w_case_fold_negate() {
         assert_eq!(p(r"(?i)\W"),
                    Expr::Class(class(PERLW).case_fold().negate()));
         let bytes = asciiw().case_fold().negate();
         assert_eq!(pb(r"(?i-u)\W"), Expr::Class(bytes));
     }

     #[test]
     fn class_singleton() {
         assert_eq!(p(r"[a]"), Expr::Class(class(&[('a', 'a')])));
         assert_eq!(p(r"[\x00]"), Expr::Class(class(&[('\x00', '\x00')])));
         assert_eq!(p(r"[\n]"), Expr::Class(class(&[('\n', '\n')])));
         assert_eq!(p("[\n]"), Expr::Class(class(&[('\n', '\n')])));

         assert_eq!(pb(r"(?-u)[a]"), Expr::ClassBytes(bclass(&[(b'a', b'a')])));
         assert_eq!(pb(r"(?-u)[\x00]"), Expr::ClassBytes(bclass(&[(0, 0)])));
         assert_eq!(pb(r"(?-u)[\xFF]"),
                    Expr::ClassBytes(bclass(&[(0xFF, 0xFF)])));
         assert_eq!(pb("(?-u)[\n]"),
                    Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
         assert_eq!(pb(r"(?-u)[\n]"),
                    Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
     }

     #[test]
     fn class_singleton_negate() {
         assert_eq!(p(r"[^a]"), Expr::Class(class(&[
             ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
         ])));
         assert_eq!(p(r"[^\x00]"), Expr::Class(class(&[
             ('\x01', '\u{10FFFF}'),
         ])));
         assert_eq!(p(r"[^\n]"), Expr::Class(class(&[
             ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
         ])));
         assert_eq!(p("[^\n]"), Expr::Class(class(&[
             ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
         ])));

         assert_eq!(pb(r"(?-u)[^a]"), Expr::ClassBytes(bclass(&[
             (0x00, 0x60), (0x62, 0xFF),
         ])));
         assert_eq!(pb(r"(?-u)[^\x00]"), Expr::ClassBytes(bclass(&[
             (0x01, 0xFF),
         ])));
         assert_eq!(pb(r"(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
             (0x00, 0x09), (0x0B, 0xFF),
         ])));
         assert_eq!(pb("(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
             (0x00, 0x09), (0x0B, 0xFF),
         ])));
     }

     #[test]
     fn class_singleton_class() {
         assert_eq!(p(r"[\d]"), Expr::Class(class(PERLD)));
         assert_eq!(p(r"[\p{Yi}]"), Expr::Class(class(YI)));

         let bytes = class(PERLD).to_byte_class();
         assert_eq!(pb(r"(?-u)[\d]"), Expr::ClassBytes(bytes));
     }

     #[test]
     fn class_singleton_class_negate() {
         assert_eq!(p(r"[^\d]"), Expr::Class(class(PERLD).negate()));
         assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
         assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));

         let bytes = asciid_bytes().negate();
         assert_eq!(pb(r"(?-u)[^\d]"), Expr::ClassBytes(bytes));
         let bytes = asciiw_bytes().negate();
         assert_eq!(pb(r"(?-u)[^\w]"), Expr::ClassBytes(bytes));
         let bytes = asciis_bytes().negate();
         assert_eq!(pb(r"(?-u)[^\s]"), Expr::ClassBytes(bytes));
     }

     #[test]
     fn class_singleton_class_negate_negate() {
         assert_eq!(p(r"[^\D]"), Expr::Class(class(PERLD)));
         assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
         assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));

         assert_eq!(pb(r"(?-u)[^\D]"), Expr::ClassBytes(asciid_bytes()));
         assert_eq!(pb(r"(?-u)[^\W]"), Expr::ClassBytes(asciiw_bytes()));
         assert_eq!(pb(r"(?-u)[^\S]"), Expr::ClassBytes(asciis_bytes()));
     }

     #[test]
     fn class_singleton_class_casei() {
         assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
         assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));

         assert_eq!(pb(r"(?i-u)[\d]"),
                    Expr::ClassBytes(asciid_bytes().case_fold()));
     }

     #[test]
     fn class_singleton_class_negate_casei() {
         assert_eq!(p(r"(?i)[^\d]"),
                    Expr::Class(class(PERLD).case_fold().negate()));
         assert_eq!(p(r"(?i)[^\w]"),
                    Expr::Class(class(PERLW).case_fold().negate()));
         assert_eq!(p(r"(?i)[^\s]"),
                    Expr::Class(class(PERLS).case_fold().negate()));

         let bytes = asciid_bytes().case_fold().negate();
         assert_eq!(pb(r"(?i-u)[^\d]"), Expr::ClassBytes(bytes));
         let bytes = asciiw_bytes().case_fold().negate();
         assert_eq!(pb(r"(?i-u)[^\w]"), Expr::ClassBytes(bytes));
         let bytes = asciis_bytes().case_fold().negate();
         assert_eq!(pb(r"(?i-u)[^\s]"), Expr::ClassBytes(bytes));
     }

     #[test]
     fn class_singleton_class_negate_negate_casei() {
         assert_eq!(p(r"(?i)[^\D]"), Expr::Class(class(PERLD).case_fold()));
         assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
         assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));

         assert_eq!(pb(r"(?i-u)[^\D]"),
                    Expr::ClassBytes(asciid_bytes().case_fold()));
         assert_eq!(pb(r"(?i-u)[^\W]"),
                    Expr::ClassBytes(asciiw_bytes().case_fold()));
         assert_eq!(pb(r"(?i-u)[^\S]"),
                    Expr::ClassBytes(asciis_bytes().case_fold()));
     }

     #[test]
     fn class_multiple_class() {
         assert_eq!(p(r"[\d\p{Yi}]"), Expr::Class(classes(&[
             PERLD, YI,
         ])));
     }

     #[test]
     fn class_multiple_class_negate() {
         assert_eq!(p(r"[^\d\p{Yi}]"), Expr::Class(classes(&[
             PERLD, YI,
         ]).negate()));
     }

     #[test]
     fn class_multiple_class_negate_negate() {
         let nperlw = class(PERLW).negate();
         let nyi = class(YI).negate();
         let cls = CharClass::empty().merge(nperlw).merge(nyi);
         assert_eq!(p(r"[^\W\P{Yi}]"), Expr::Class(cls.negate()));
     }

     #[test]
     fn class_multiple_class_casei() {
         assert_eq!(p(r"(?i)[\d\p{Yi}]"), Expr::Class(classes(&[
             PERLD, YI,
         ]).case_fold()));
     }

     #[test]
     fn class_multiple_class_negate_casei() {
         assert_eq!(p(r"(?i)[^\d\p{Yi}]"), Expr::Class(classes(&[
             PERLD, YI,
         ]).case_fold().negate()));
     }

     #[test]
     fn class_multiple_class_negate_negate_casei() {
         let nperlw = class(PERLW).negate();
         let nyi = class(YI).negate();
         let class = CharClass::empty().merge(nperlw).merge(nyi);
         assert_eq!(p(r"(?i)[^\W\P{Yi}]"),
                    Expr::Class(class.case_fold().negate()));
     }

     #[test]
     fn class_class_hypen() {
         assert_eq!(p(r"[\p{Yi}-]"), Expr::Class(classes(&[
             &[('-', '-')], YI,
         ])));
         assert_eq!(p(r"[\p{Yi}-a]"), Expr::Class(classes(&[
             &[('-', '-')], &[('a', 'a')], YI,
         ])));
     }

     #[test]
     fn class_brackets() {
         assert_eq!(p("[]]"), Expr::Class(class(&[(']', ']')])));
         assert_eq!(p("[][]"), Expr::Class(class(&[('[', '['), (']', ']')])));
         assert_eq!(p("[[]]"), Expr::Concat(vec![
             Expr::Class(class(&[('[', '[')])),
             lit(']'),
         ]));
     }

     #[test]
     fn class_brackets_hypen() {
         assert_eq!(p("[]-]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
         assert_eq!(p("[-]]"), Expr::Concat(vec![
             Expr::Class(class(&[('-', '-')])),
             lit(']'),
         ]));
     }

     #[test]
     fn class_overlapping() {
         assert_eq!(p("[a-fd-h]"), Expr::Class(class(&[('a', 'h')])));
         assert_eq!(p("[a-fg-m]"), Expr::Class(class(&[('a', 'm')])));

         assert_eq!(pb("(?-u)[a-fd-h]"),
                    Expr::ClassBytes(bclass(&[(b'a', b'h')])));
         assert_eq!(pb("(?-u)[a-fg-m]"),
                    Expr::ClassBytes(bclass(&[(b'a', b'm')])));
     }

     #[test]
     fn ascii_classes() {
         assert_eq!(p("[:upper:]"), Expr::Class(class(UPPER)));
         assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));

         assert_eq!(pb("(?-u)[:upper:]"), Expr::Class(class(UPPER)));
         assert_eq!(pb("(?-u)[[:upper:]]"),
                    Expr::ClassBytes(class(UPPER).to_byte_class()));
     }

     #[test]
     fn ascii_classes_not() {
         assert_eq!(p("[:abc:]"),
                    Expr::Class(class(&[(':', ':'), ('a', 'c')])));
         assert_eq!(pb("(?-u)[:abc:]"),
                    Expr::ClassBytes(bclass(&[(b':', b':'), (b'a', b'c')])));
     }

     #[test]
     fn ascii_classes_multiple() {
         assert_eq!(p("[[:lower:][:upper:]]"),
                    Expr::Class(classes(&[UPPER, LOWER])));

         assert_eq!(pb("(?-u)[[:lower:][:upper:]]"),
                    Expr::ClassBytes(classes(&[UPPER, LOWER]).to_byte_class()));
     }

     #[test]
     fn ascii_classes_negate() {
         assert_eq!(p("[[:^upper:]]"), Expr::Class(class(UPPER).negate()));
         assert_eq!(p("[^[:^upper:]]"), Expr::Class(class(UPPER)));

         assert_eq!(pb("(?-u)[[:^upper:]]"),
                    Expr::ClassBytes(class(UPPER).to_byte_class().negate()));
         assert_eq!(pb("(?-u)[^[:^upper:]]"),
                    Expr::ClassBytes(class(UPPER).to_byte_class()));
     }

     #[test]
     fn ascii_classes_negate_multiple() {
         let (nlower, nword) = (class(LOWER).negate(), class(WORD).negate());
         let cls = CharClass::empty().merge(nlower).merge(nword);
         assert_eq!(p("[[:^lower:][:^word:]]"), Expr::Class(cls.clone()));
         assert_eq!(p("[^[:^lower:][:^word:]]"), Expr::Class(cls.negate()));
     }

     #[test]
     fn ascii_classes_case_fold() {
         assert_eq!(p("(?i)[:upper:]"), Expr::Class(class(UPPER).case_fold()));
         assert_eq!(p("(?i)[[:upper:]]"),
                    Expr::Class(class(UPPER).case_fold()));

         assert_eq!(pb("(?i-u)[:upper:]"),
                    Expr::Class(class(UPPER).case_fold()));
         assert_eq!(pb("(?i-u)[[:upper:]]"),
                    Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
     }

     #[test]
     fn ascii_classes_negate_case_fold() {
         assert_eq!(p("(?i)[[:^upper:]]"),
                    Expr::Class(class(UPPER).case_fold().negate()));
         assert_eq!(p("(?i)[^[:^upper:]]"),
                    Expr::Class(class(UPPER).case_fold()));

         assert_eq!(pb("(?i-u)[[:^upper:]]"),
                    Expr::ClassBytes(
                        class(UPPER).to_byte_class().case_fold().negate()));
         assert_eq!(pb("(?i-u)[^[:^upper:]]"),
                    Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
     }

     #[test]
     fn single_class_negate_case_fold() {
         assert_eq!(p("(?i)[^x]"),
                    Expr::Class(class(&[('x', 'x')]).case_fold().negate()));

         assert_eq!(pb("(?i-u)[^x]"),
                    Expr::ClassBytes(
                        class(&[('x', 'x')])
                        .to_byte_class().case_fold().negate()));
     }

     #[test]
     fn ignore_space_empty() {
         assert_eq!(p("(?x) "), Expr::Empty);
     }

     #[test]
     fn ignore_space_literal() {
         assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
             lit('a'), lit('b'), lit('c'),
         ]));
     }

     #[test]
     fn ignore_space_literal_off() {
         assert_eq!(p("(?x) a b c(?-x) a"), Expr::Concat(vec![
             lit('a'), lit('b'), lit('c'), lit(' '), lit('a'),
         ]));
     }

     #[test]
     fn ignore_space_class() {
         assert_eq!(p("(?x)[a
         - z
 ]"), Expr::Class(class(&[('a', 'z')])));
         assert_eq!(p("(?x)[  ^   a
         - z
 ]"), Expr::Class(class(&[('a', 'z')]).negate()));
     }

     #[test]
     fn ignore_space_escape() {
         assert_eq!(p(r"(?x)\ d"), Expr::Class(class(PERLD)));
         assert_eq!(p(r"(?x)\
                      D"), Expr::Class(class(PERLD).negate()));
     }

     #[test]
     fn ignore_space_comments() {
         assert_eq!(p(r"(?x)(?P<foo>
     a # comment 1
 )(?P<bar>
     z # comment 2
 )"), Expr::Concat(vec![
         Expr::Group {
             e: Box::new(lit('a')),
             i: Some(1),
             name: Some("foo".into()),
         },
         Expr::Group {
             e: Box::new(lit('z')),
             i: Some(2),
             name: Some("bar".into()),
         },
     ]));
     }

     #[test]
     fn ignore_space_comments_re_enable() {
         assert_eq!(p(r"(?x)a # hi
 (?-x:#) # sweet"), Expr::Concat(vec![
             lit('a'),
             Expr::Group {
                 e: Box::new(lit('#')),
                 i: None,
                 name: None,
             },
         ]));
     }

     #[test]
     fn ignore_space_escape_punctuation() {
         assert_eq!(p(r"(?x)\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
             lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
             lit('('), lit(')'), lit('|'), lit('['), lit(']'),
             lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
         ]));
     }

     #[test]
     fn ignore_space_escape_hash() {
         assert_eq!(p(r"(?x)a\# # hi there"), Expr::Concat(vec![
             lit('a'),
             lit('#'),
         ]));
     }

     // Test every single possible error case.

     macro_rules! test_err {
         ($re:expr, $pos:expr, $kind:expr) => {
             test_err!($re, $pos, $kind, Flags::default());
         };
         ($re:expr, $pos:expr, $kind:expr, $flags:expr) => {{
             let err = Parser::parse($re, $flags).unwrap_err();
             assert_eq!($pos, err.pos);
             assert_eq!($kind, err.kind);
             assert!($re.contains(&err.surround));
         }}
     }

     #[test]
     fn invalid_utf8_not_allowed() {
         // let flags = Flags { unicode: false, .. Flags::default() };
         test_err!(r"(?-u)\xFF", 9, ErrorKind::InvalidUtf8);
         test_err!(r"(?-u).", 5, ErrorKind::InvalidUtf8);
         test_err!(r"(?-u)(?s).", 9, ErrorKind::InvalidUtf8);
         test_err!(r"(?-u)[\x00-\x80]", 15, ErrorKind::InvalidUtf8);
         test_err!(r"(?-u)\222", 9, ErrorKind::InvalidUtf8);
         test_err!(r"(?-u)\x{0080}", 13, ErrorKind::InvalidUtf8);
     }

     #[test]
     fn unicode_char_not_allowed() {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         test_err!("☃(?-u:☃)", 7, ErrorKind::UnicodeNotAllowed, flags);
     }

     #[test]
     fn unicode_class_not_allowed() {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         test_err!(r"☃(?-u:\pL)", 9, ErrorKind::UnicodeNotAllowed, flags);
     }

     #[test]
     fn unicode_class_literal_not_allowed() {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         test_err!(r"(?-u)[☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
         test_err!(r"(?-u)[☃-☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
     }

     #[test]
     fn unicode_hex_not_allowed() {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         test_err!(r"(?-u)\x{FFFF}", 13, ErrorKind::UnicodeNotAllowed, flags);
         test_err!(r"(?-u)\x{100}", 12, ErrorKind::UnicodeNotAllowed, flags);
     }

     #[test]
     fn unicode_octal_not_allowed() {
         let flags = Flags { allow_bytes: true, .. Flags::default() };
         test_err!(r"(?-u)\400", 9, ErrorKind::UnicodeNotAllowed, flags);
     }

     #[test]
     fn error_repeat_no_expr_simple() {
         test_err!("(*", 1, ErrorKind::RepeaterExpectsExpr);
     }

     #[test]
     fn error_repeat_no_expr_counted() {
         test_err!("({5}", 1, ErrorKind::RepeaterExpectsExpr);
     }

     #[test]
     fn error_repeat_beginning_counted() {
         test_err!("{5}", 0, ErrorKind::RepeaterExpectsExpr);
     }

     #[test]
     fn error_repeat_illegal_exprs_simple() {
         test_err!("a**", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrMore,
             greedy: true,
         }));
         test_err!("a|*", 2,
             ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
         );
     }

     #[test]
     fn error_repeat_illegal_exprs_counted() {
         test_err!("a*{5}", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
             e: b(lit('a')),
             r: Repeater::ZeroOrMore,
             greedy: true,
         }));
         test_err!("a|{5}", 2,
             ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
         );
     }

     #[test]
     fn error_repeat_empty_number() {
         test_err!("a{}", 2, ErrorKind::MissingBase10);
     }

     #[test]
     fn error_repeat_eof() {
         test_err!("a{5", 3, ErrorKind::UnclosedRepeat);
     }

     #[test]
     fn error_repeat_empty_number_eof() {
         test_err!("a{xyz", 5, ErrorKind::InvalidBase10("xyz".into()));
         test_err!("a{12,xyz", 8, ErrorKind::InvalidBase10("xyz".into()));
     }

     #[test]
     fn error_repeat_invalid_number() {
         test_err!("a{9999999999}", 12,
                   ErrorKind::InvalidBase10("9999999999".into()));
         test_err!("a{1,9999999999}", 14,
                   ErrorKind::InvalidBase10("9999999999".into()));
     }

     #[test]
     fn error_repeat_invalid_number_extra() {
         test_err!("a{12x}", 5, ErrorKind::InvalidBase10("12x".into()));
         test_err!("a{1,12x}", 7, ErrorKind::InvalidBase10("12x".into()));
     }

     #[test]
     fn error_repeat_invalid_range() {
         test_err!("a{2,1}", 5,
                   ErrorKind::InvalidRepeatRange { min: 2, max: 1 });
     }

     #[test]
     fn error_alternate_empty() {
         test_err!("|a", 0, ErrorKind::EmptyAlternate);
     }

     #[test]
     fn error_alternate_empty_with_group() {
         test_err!("(|a)", 1, ErrorKind::EmptyAlternate);
     }

     #[test]
     fn error_alternate_empty_with_alternate() {
         test_err!("a||", 2, ErrorKind::EmptyAlternate);
     }

     #[test]
     fn error_close_paren_unopened_empty() {
         test_err!(")", 0, ErrorKind::UnopenedParen);
     }

     #[test]
     fn error_close_paren_unopened() {
         test_err!("ab)", 2, ErrorKind::UnopenedParen);
     }

     #[test]
     fn error_close_paren_unopened_with_alt() {
         test_err!("a|b)", 3, ErrorKind::UnopenedParen);
     }

     #[test]
     fn error_close_paren_unclosed_with_alt() {
         test_err!("(a|b", 0, ErrorKind::UnclosedParen);
     }

     #[test]
     fn error_close_paren_empty_alt() {
         test_err!("(a|)", 3, ErrorKind::EmptyAlternate);
     }

     #[test]
     fn error_close_paren_empty_group() {
         test_err!("()", 1, ErrorKind::EmptyGroup);
     }

     #[test]
     fn error_close_paren_empty_group_with_name() {
         test_err!("(?P<foo>)", 8, ErrorKind::EmptyGroup);
     }

     #[test]
     fn error_finish_concat_unclosed() {
         test_err!("ab(xy", 2, ErrorKind::UnclosedParen);
     }

     #[test]
     fn error_finish_concat_empty_alt() {
         test_err!("a|", 2, ErrorKind::EmptyAlternate);
     }

     #[test]
     fn error_group_name_invalid() {
         test_err!("(?P<a#>x)", 6, ErrorKind::InvalidCaptureName("a#".into()));
     }

     #[test]
     fn error_group_name_invalid_leading() {
         test_err!("(?P<1a>a)", 6, ErrorKind::InvalidCaptureName("1a".into()));
     }

     #[test]
     fn error_group_name_unexpected_eof() {
         test_err!("(?P<a", 5, ErrorKind::UnclosedCaptureName("a".into()));
     }

     #[test]
     fn error_group_name_empty() {
         test_err!("(?P<>a)", 4, ErrorKind::EmptyCaptureName);
     }

     #[test]
     fn error_group_opts_unrecognized_flag() {
         test_err!("(?z:a)", 2, ErrorKind::UnrecognizedFlag('z'));
     }

     #[test]
     fn error_group_opts_unexpected_eof() {
         test_err!("(?i", 3, ErrorKind::UnexpectedFlagEof);
     }

     #[test]
     fn error_group_opts_double_negation() {
         test_err!("(?-i-s:a)", 4, ErrorKind::DoubleFlagNegation);
     }

     #[test]
     fn error_group_opts_empty_negation() {
         test_err!("(?i-:a)", 4, ErrorKind::EmptyFlagNegation);
     }

     #[test]
     fn error_group_opts_empty() {
         test_err!("(?)", 2, ErrorKind::EmptyFlagNegation);
     }

     #[test]
     fn error_escape_unexpected_eof() {
         test_err!(r"\", 1, ErrorKind::UnexpectedEscapeEof);
     }

     #[test]
     fn error_escape_unrecognized() {
         test_err!(r"\m", 1, ErrorKind::UnrecognizedEscape('m'));
     }

     #[test]
     fn error_escape_hex2_eof0() {
         test_err!(r"\x", 2, ErrorKind::UnexpectedTwoDigitHexEof);
     }

     #[test]
     fn error_escape_hex2_eof1() {
         test_err!(r"\xA", 3, ErrorKind::UnexpectedTwoDigitHexEof);
     }

     #[test]
     fn error_escape_hex2_invalid() {
         test_err!(r"\xAG", 4, ErrorKind::InvalidBase16("AG".into()));
     }

     #[test]
     fn error_escape_hex_eof0() {
         test_err!(r"\x{", 3, ErrorKind::InvalidBase16("".into()));
     }

     #[test]
     fn error_escape_hex_eof1() {
         test_err!(r"\x{A", 4, ErrorKind::UnclosedHex);
     }

     #[test]
     fn error_escape_hex_invalid() {
         test_err!(r"\x{AG}", 5, ErrorKind::InvalidBase16("AG".into()));
     }

     #[test]
     fn error_escape_hex_invalid_scalar_value_surrogate() {
         test_err!(r"\x{D800}", 8, ErrorKind::InvalidScalarValue(0xD800));
     }

     #[test]
     fn error_escape_hex_invalid_scalar_value_high() {
         test_err!(r"\x{110000}", 10, ErrorKind::InvalidScalarValue(0x110000));
     }

     #[test]
     fn error_escape_hex_invalid_u32() {
         test_err!(r"\x{9999999999}", 13,
                   ErrorKind::InvalidBase16("9999999999".into()));
     }

     #[test]
     fn error_unicode_unclosed() {
         test_err!(r"\p{", 3, ErrorKind::UnclosedUnicodeName);
         test_err!(r"\p{Greek", 8, ErrorKind::UnclosedUnicodeName);
     }

     #[test]
     fn error_unicode_no_letter() {
         test_err!(r"\p", 2, ErrorKind::UnexpectedEscapeEof);
     }

     #[test]
     fn error_unicode_unknown_letter() {
         test_err!(r"\pA", 3, ErrorKind::UnrecognizedUnicodeClass("A".into()));
     }

     #[test]
     fn error_unicode_unknown_name() {
         test_err!(r"\p{Yii}", 7,
                   ErrorKind::UnrecognizedUnicodeClass("Yii".into()));
     }

     #[test]
     fn error_class_eof_empty() {
         test_err!("[", 1, ErrorKind::UnexpectedClassEof);
         test_err!("[^", 2, ErrorKind::UnexpectedClassEof);
     }

     #[test]
     fn error_class_eof_non_empty() {
         test_err!("[a", 2, ErrorKind::UnexpectedClassEof);
         test_err!("[^a", 3, ErrorKind::UnexpectedClassEof);
     }

     #[test]
     fn error_class_eof_range() {
         test_err!("[a-", 3, ErrorKind::UnexpectedClassEof);
         test_err!("[^a-", 4, ErrorKind::UnexpectedClassEof);
         test_err!("[---", 4, ErrorKind::UnexpectedClassEof);
     }

     #[test]
     fn error_class_invalid_escape() {
         test_err!(r"[\pA]", 4,
                   ErrorKind::UnrecognizedUnicodeClass("A".into()));
     }

     #[test]
     fn error_class_valid_escape_not_allowed() {
         test_err!(r"[\A]", 3, ErrorKind::InvalidClassEscape(Expr::StartText));
     }

     #[test]
     fn error_class_range_valid_escape_not_allowed() {
         test_err!(r"[a-\d]", 5,
                   ErrorKind::InvalidClassEscape(Expr::Class(class(PERLD))));
         test_err!(r"[a-\A]", 5,
                   ErrorKind::InvalidClassEscape(Expr::StartText));
         test_err!(r"[\A-a]", 3,
                   ErrorKind::InvalidClassEscape(Expr::StartText));
     }

     #[test]
     fn error_class_invalid_range() {
         test_err!("[z-a]", 4, ErrorKind::InvalidClassRange {
             start: 'z',
             end: 'a',
         });
     }

     #[test]
     fn error_class_empty_range() {
         test_err!("[]", 2, ErrorKind::UnexpectedClassEof);
         test_err!("[^]", 3, ErrorKind::UnexpectedClassEof);
         test_err!(r"[^\d\D]", 7, ErrorKind::EmptyClass);
     }

     #[test]
     fn error_duplicate_capture_name() {
         test_err!("(?P<a>.)(?P<a>.)", 14,
                   ErrorKind::DuplicateCaptureName("a".into()));
     }
 }