vendor/regex-syntax-0.5.6/src/hir/translate.rs - toolchain/rustc - Git at Google

 // Copyright 2018 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 /*!
 Defines a translator that converts an `Ast` to an `Hir`.
 */

 use std::cell::{Cell, RefCell};
 use std::result;

 use ast::{self, Ast, Span, Visitor};
 use hir::{self, Error, ErrorKind, Hir};
 use unicode::{self, ClassQuery};

 type Result<T> = result::Result<T, Error>;

 /// A builder for constructing an AST->HIR translator.
 #[derive(Clone, Debug)]
 pub struct TranslatorBuilder {
     allow_invalid_utf8: bool,
     flags: Flags,
 }

 impl Default for TranslatorBuilder {
     fn default() -> TranslatorBuilder {
         TranslatorBuilder::new()
     }
 }

 impl TranslatorBuilder {
     /// Create a new translator builder with a default c onfiguration.
     pub fn new() -> TranslatorBuilder {
         TranslatorBuilder {
             allow_invalid_utf8: false,
             flags: Flags::default(),
         }
     }

     /// Build a translator using the current configuration.
     pub fn build(&self) -> Translator {
         Translator {
             stack: RefCell::new(vec![]),
             flags: Cell::new(self.flags),
             allow_invalid_utf8: self.allow_invalid_utf8,
         }
     }

     /// When enabled, translation will permit the construction of a regular
     /// expression that may match invalid UTF-8.
     ///
     /// When disabled (the default), the translator is guaranteed to produce
     /// an expression that will only ever match valid UTF-8 (otherwise, the
     /// translator will return an error).
     ///
     /// Note that currently, even when invalid UTF-8 is banned, the translator
     /// will permit a negated ASCII word boundary (i.e., `(?-u:\B)`) even
     /// though it can actually match at invalid UTF-8 boundaries. This bug
     /// will be fixed on the next semver release.
     pub fn allow_invalid_utf8(
         &mut self,
         yes: bool,
     ) -> &mut TranslatorBuilder {
         self.allow_invalid_utf8 = yes;
         self
     }

     /// Enable or disable the case insensitive flag (`i`) by default.
     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.case_insensitive = if yes { Some(true) } else { None };
         self
     }

     /// Enable or disable the multi-line matching flag (`m`) by default.
     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.multi_line = if yes { Some(true) } else { None };
         self
     }

     /// Enable or disable the "dot matches any character" flag (`s`) by
     /// default.
     pub fn dot_matches_new_line(
         &mut self,
         yes: bool,
     ) -> &mut TranslatorBuilder {
         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
         self
     }

     /// Enable or disable the "swap greed" flag (`U`) by default.
     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.swap_greed = if yes { Some(true) } else { None };
         self
     }

     /// Enable or disable the Unicode flag (`u`) by default.
     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.unicode = if yes { None } else { Some(false) };
         self
     }
 }

 /// A translator maps abstract syntax to a high level intermediate
 /// representation.
 ///
 /// A translator may be benefit from reuse. That is, a translator can translate
 /// many abstract syntax trees.
 ///
 /// A `Translator` can be configured in more detail via a
 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
 #[derive(Clone, Debug)]
 pub struct Translator {
     /// Our call stack, but on the heap.
     stack: RefCell<Vec<HirFrame>>,
     /// The current flag settings.
     flags: Cell<Flags>,
     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
     allow_invalid_utf8: bool,
 }

 impl Translator {
     /// Create a new translator using the default configuration.
     pub fn new() -> Translator {
         TranslatorBuilder::new().build()
     }

     /// Translate the given abstract syntax tree (AST) into a high level
     /// intermediate representation (HIR).
     ///
     /// If there was a problem doing the translation, then an HIR-specific
     /// error is returned.
     ///
     /// The original pattern string used to produce the `Ast` *must* also be
     /// provided. The translator does not use the pattern string during any
     /// correct translation, but is used for error reporting.
     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
         ast::visit(ast, TranslatorI::new(self, pattern))
     }
 }

 /// An HirFrame is a single stack frame, represented explicitly, which is
 /// created for each item in the Ast that we traverse.
 ///
 /// Note that technically, this type doesn't represent our entire stack
 /// frame. In particular, the Ast visitor represents any state associated with
 /// traversing the Ast itself.
 #[derive(Clone, Debug)]
 enum HirFrame {
     /// An arbitrary HIR expression. These get pushed whenever we hit a base
     /// case in the Ast. They get popped after an inductive (i.e., recursive)
     /// step is complete.
     Expr(Hir),
     /// A Unicode character class. This frame is mutated as we descend into
     /// the Ast of a character class (which is itself its own mini recursive
     /// structure).
     ClassUnicode(hir::ClassUnicode),
     /// A byte-oriented character class. This frame is mutated as we descend
     /// into the Ast of a character class (which is itself its own mini
     /// recursive structure).
     ///
     /// Byte character classes are created when Unicode mode (`u`) is disabled.
     /// If `allow_invalid_utf8` is disabled (the default), then a byte
     /// character is only permitted to match ASCII text.
     ClassBytes(hir::ClassBytes),
     /// This is pushed on to the stack upon first seeing any kind of group,
     /// indicated by parentheses (including non-capturing groups). It is popped
     /// upon leaving a group.
     Group {
         /// The old active flags, if any, when this group was opened.
         ///
         /// If this group sets flags, then the new active flags are set to the
         /// result of merging the old flags with the flags introduced by this
         /// group.
         ///
         /// When this group is popped, the active flags should be restored to
         /// the flags set here.
         ///
         /// The "active" flags correspond to whatever flags are set in the
         /// Translator.
         old_flags: Option<Flags>,
     },
     /// This is pushed whenever a concatenation is observed. After visiting
     /// every sub-expression in the concatenation, the translator's stack is
     /// popped until it sees a Concat frame.
     Concat,
     /// This is pushed whenever an alternation is observed. After visiting
     /// every sub-expression in the alternation, the translator's stack is
     /// popped until it sees an Alternation frame.
     Alternation,
 }

 impl HirFrame {
     /// Assert that the current stack frame is an Hir expression and return it.
     fn unwrap_expr(self) -> Hir {
         match self {
             HirFrame::Expr(expr) => expr,
             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self)
         }
     }

     /// Assert that the current stack frame is a Unicode class expression and
     /// return it.
     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
         match self {
             HirFrame::ClassUnicode(cls) => cls,
             _ => panic!("tried to unwrap Unicode class \
                          from HirFrame, got: {:?}", self)
         }
     }

     /// Assert that the current stack frame is a byte class expression and
     /// return it.
     fn unwrap_class_bytes(self) -> hir::ClassBytes {
         match self {
             HirFrame::ClassBytes(cls) => cls,
             _ => panic!("tried to unwrap byte class \
                          from HirFrame, got: {:?}", self)
         }
     }

     /// Assert that the current stack frame is a group indicator and return
     /// its corresponding flags (the flags that were active at the time the
     /// group was entered) if they exist.
     fn unwrap_group(self) -> Option<Flags> {
         match self {
             HirFrame::Group { old_flags } => old_flags,
             _ => panic!("tried to unwrap group from HirFrame, got: {:?}", self)
         }
     }
 }

 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
     type Output = Hir;
     type Err = Error;

     fn finish(self) -> Result<Hir> {
         if self.trans().stack.borrow().is_empty() {
             // This can happen if the Ast given consists of a single set of
             // flags. e.g., `(?i)`. /shrug
             return Ok(Hir::empty());
         }
         // ... otherwise, we should have exactly one HIR on the stack.
         assert_eq!(self.trans().stack.borrow().len(), 1);
         Ok(self.pop().unwrap().unwrap_expr())
     }

     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
         match *ast {
             Ast::Class(ast::Class::Bracketed(_)) => {
                 if self.flags().unicode() {
                     let cls = hir::ClassUnicode::empty();
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let cls = hir::ClassBytes::empty();
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             Ast::Group(ref x) => {
                 let old_flags = x.flags().map(|ast| self.set_flags(ast));
                 self.push(HirFrame::Group {
                     old_flags: old_flags,
                 });
             }
             Ast::Concat(ref x) if x.asts.is_empty() => {}
             Ast::Concat(_) => {
                 self.push(HirFrame::Concat);
             }
             Ast::Alternation(ref x) if x.asts.is_empty() => {}
             Ast::Alternation(_) => {
                 self.push(HirFrame::Alternation);
             }
             _ => {}
         }
         Ok(())
     }

     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
         match *ast {
             Ast::Empty(_) => {
                 self.push(HirFrame::Expr(Hir::empty()));
             }
             Ast::Flags(ref x) => {
                 self.set_flags(&x.flags);
             }
             Ast::Literal(ref x) => {
                 self.push(HirFrame::Expr(try!(self.hir_literal(x))));
             }
             Ast::Dot(span) => {
                 self.push(HirFrame::Expr(try!(self.hir_dot(span))));
             }
             Ast::Assertion(ref x) => {
                 self.push(HirFrame::Expr(try!(self.hir_assertion(x))));
             }
             Ast::Class(ast::Class::Perl(ref x)) => {
                 if self.flags().unicode() {
                     let cls = self.hir_perl_unicode_class(x);
                     let hcls = hir::Class::Unicode(cls);
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 } else {
                     let cls = self.hir_perl_byte_class(x);
                     let hcls = hir::Class::Bytes(cls);
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 }
             }
             Ast::Class(ast::Class::Unicode(ref x)) => {
                 let cls = hir::Class::Unicode(try!(self.hir_unicode_class(x)));
                 self.push(HirFrame::Expr(Hir::class(cls)));
             }
             Ast::Class(ast::Class::Bracketed(ref ast)) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     self.unicode_fold_and_negate(ast.negated, &mut cls);
                     if cls.iter().next().is_none() {
                         return Err(self.error(
                             ast.span, ErrorKind::EmptyClassNotAllowed));
                     }
                     let expr = Hir::class(hir::Class::Unicode(cls));
                     self.push(HirFrame::Expr(expr));
                 } else {
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     try!(self.bytes_fold_and_negate(
                         &ast.span, ast.negated, &mut cls));
                     if cls.iter().next().is_none() {
                         return Err(self.error(
                             ast.span, ErrorKind::EmptyClassNotAllowed));
                     }

                     let expr = Hir::class(hir::Class::Bytes(cls));
                     self.push(HirFrame::Expr(expr));
                 }
             }
             Ast::Repetition(ref x) => {
                 let expr = self.pop().unwrap().unwrap_expr();
                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
             }
             Ast::Group(ref x) => {
                 let expr = self.pop().unwrap().unwrap_expr();
                 if let Some(flags) = self.pop().unwrap().unwrap_group() {
                     self.trans().flags.set(flags);
                 }
                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
             }
             Ast::Concat(_) => {
                 let mut exprs = vec![];
                 while let Some(HirFrame::Expr(expr)) = self.pop() {
                     if !expr.kind().is_empty() {
                         exprs.push(expr);
                     }
                 }
                 exprs.reverse();
                 self.push(HirFrame::Expr(Hir::concat(exprs)));
             }
             Ast::Alternation(_) => {
                 let mut exprs = vec![];
                 while let Some(HirFrame::Expr(expr)) = self.pop() {
                     exprs.push(expr);
                 }
                 exprs.reverse();
                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
             }
         }
         Ok(())
     }

     fn visit_class_set_item_pre(
         &mut self,
         ast: &ast::ClassSetItem,
     ) -> Result<()> {
         match *ast {
             ast::ClassSetItem::Bracketed(_) => {
                 if self.flags().unicode() {
                     let cls = hir::ClassUnicode::empty();
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let cls = hir::ClassBytes::empty();
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             // We needn't handle the Union case here since the visitor will
             // do it for us.
             _ => {}
         }
         Ok(())
     }

     fn visit_class_set_item_post(
         &mut self,
         ast: &ast::ClassSetItem,
     ) -> Result<()> {
         match *ast {
             ast::ClassSetItem::Empty(_) => {}
             ast::ClassSetItem::Literal(ref x) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     let byte = try!(self.class_literal_byte(x));
                     cls.push(hir::ClassBytesRange::new(byte, byte));
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             ast::ClassSetItem::Range(ref x) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     let start = try!(self.class_literal_byte(&x.start));
                     let end = try!(self.class_literal_byte(&x.end));
                     cls.push(hir::ClassBytesRange::new(start, end));
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             ast::ClassSetItem::Ascii(ref x) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     for &(s, e) in ascii_class(&x.kind) {
                         cls.push(hir::ClassUnicodeRange::new(s, e));
                     }
                     self.unicode_fold_and_negate(x.negated, &mut cls);
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     for &(s, e) in ascii_class(&x.kind) {
                         cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
                     }
                     try!(self.bytes_fold_and_negate(
                         &x.span, x.negated, &mut cls));
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             ast::ClassSetItem::Unicode(ref x) => {
                 let xcls = try!(self.hir_unicode_class(x));
                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
                 cls.union(&xcls);
                 self.push(HirFrame::ClassUnicode(cls));
             }
             ast::ClassSetItem::Perl(ref x) => {
                 if self.flags().unicode() {
                     let xcls = self.hir_perl_unicode_class(x);
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     cls.union(&xcls);
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
                     let xcls = self.hir_perl_byte_class(x);
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     cls.union(&xcls);
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
             ast::ClassSetItem::Bracketed(ref ast) => {
                 if self.flags().unicode() {
                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
                     self.unicode_fold_and_negate(ast.negated, &mut cls1);

                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
                     cls2.union(&cls1);
                     self.push(HirFrame::ClassUnicode(cls2));
                 } else {
                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
                     try!(self.bytes_fold_and_negate(
                         &ast.span, ast.negated, &mut cls1));

                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
                     cls2.union(&cls1);
                     self.push(HirFrame::ClassBytes(cls2));
                 }
             }
             // This is handled automatically by the visitor.
             ast::ClassSetItem::Union(_) => {}
         }
         Ok(())
     }

     fn visit_class_set_binary_op_pre(
         &mut self,
         _op: &ast::ClassSetBinaryOp,
     ) -> Result<()> {
         if self.flags().unicode() {
             let cls = hir::ClassUnicode::empty();
             self.push(HirFrame::ClassUnicode(cls));
         } else {
             let cls = hir::ClassBytes::empty();
             self.push(HirFrame::ClassBytes(cls));
         }
         Ok(())
     }

     fn visit_class_set_binary_op_in(
         &mut self,
         _op: &ast::ClassSetBinaryOp,
     ) -> Result<()> {
         if self.flags().unicode() {
             let cls = hir::ClassUnicode::empty();
             self.push(HirFrame::ClassUnicode(cls));
         } else {
             let cls = hir::ClassBytes::empty();
             self.push(HirFrame::ClassBytes(cls));
         }
         Ok(())
     }

     fn visit_class_set_binary_op_post(
         &mut self,
         op: &ast::ClassSetBinaryOp,
     ) -> Result<()> {
         use ast::ClassSetBinaryOpKind::*;

         if self.flags().unicode() {
             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
             let mut cls = self.pop().unwrap().unwrap_class_unicode();
             if self.flags().case_insensitive() {
                 rhs.case_fold_simple();
                 lhs.case_fold_simple();
             }
             match op.kind {
                 Intersection => lhs.intersect(&rhs),
                 Difference => lhs.difference(&rhs),
                 SymmetricDifference => lhs.symmetric_difference(&rhs),
             }
             cls.union(&lhs);
             self.push(HirFrame::ClassUnicode(cls));
         } else {
             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
             let mut cls = self.pop().unwrap().unwrap_class_bytes();
             if self.flags().case_insensitive() {
                 rhs.case_fold_simple();
                 lhs.case_fold_simple();
             }
             match op.kind {
                 Intersection => lhs.intersect(&rhs),
                 Difference => lhs.difference(&rhs),
                 SymmetricDifference => lhs.symmetric_difference(&rhs),
             }
             cls.union(&lhs);
             self.push(HirFrame::ClassBytes(cls));
         }
         Ok(())
     }
 }

 /// The internal implementation of a translator.
 ///
 /// This type is responsible for carrying around the original pattern string,
 /// which is not tied to the internal state of a translator.
 ///
 /// A TranslatorI exists for the time it takes to translate a single Ast.
 #[derive(Clone, Debug)]
 struct TranslatorI<'t, 'p> {
     trans: &'t Translator,
     pattern: &'p str,
 }

 impl<'t, 'p> TranslatorI<'t, 'p> {
     /// Build a new internal translator.
     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
         TranslatorI { trans: trans, pattern: pattern }
     }

     /// Return a reference to the underlying translator.
     fn trans(&self) -> &Translator {
         &self.trans
     }

     /// Push the given frame on to the call stack.
     fn push(&self, frame: HirFrame) {
         self.trans().stack.borrow_mut().push(frame);
     }

     /// Pop the top of the call stack. If the call stack is empty, return None.
     fn pop(&self) -> Option<HirFrame> {
         self.trans().stack.borrow_mut().pop()
     }

     /// Create a new error with the given span and error type.
     fn error(&self, span: Span, kind: ErrorKind) -> Error {
         Error { kind: kind, pattern: self.pattern.to_string(), span: span }
     }

     /// Return a copy of the active flags.
     fn flags(&self) -> Flags {
         self.trans().flags.get()
     }

     /// Set the flags of this translator from the flags set in the given AST.
     /// Then, return the old flags.
     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
         let old_flags = self.flags();
         let mut new_flags = Flags::from_ast(ast_flags);
         new_flags.merge(&old_flags);
         self.trans().flags.set(new_flags);
         old_flags
     }

     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
         let ch = match try!(self.literal_to_char(lit)) {
             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
             hir::Literal::Unicode(ch) => ch,
         };
         if self.flags().case_insensitive() {
             self.hir_from_char_case_insensitive(lit.span, ch)
         } else {
             self.hir_from_char(lit.span, ch)
         }
     }

     /// Convert an Ast literal to its scalar representation.
     ///
     /// When Unicode mode is enabled, then this always succeeds and returns a
     /// `char` (Unicode scalar value).
     ///
     /// When Unicode mode is disabled, then a raw byte is returned. If that
     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
     /// an error.
     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
         if self.flags().unicode() {
             return Ok(hir::Literal::Unicode(lit.c));
         }
         let byte = match lit.byte() {
             None => return Ok(hir::Literal::Unicode(lit.c)),
             Some(byte) => byte,
         };
         if byte <= 0x7F {
             return Ok(hir::Literal::Unicode(byte as char));
         }
         if !self.trans().allow_invalid_utf8 {
             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
         }
         Ok(hir::Literal::Byte(byte))
     }

     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
         if !self.flags().unicode() && c.len_utf8() > 1 {
             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
         }
         Ok(Hir::literal(hir::Literal::Unicode(c)))
     }

     fn hir_from_char_case_insensitive(
         &self,
         span: Span,
         c: char,
     ) -> Result<Hir> {
         // If case folding won't do anything, then don't bother trying.
         if !unicode::contains_simple_case_mapping(c, c) {
             return self.hir_from_char(span, c);
         }
         if self.flags().unicode() {
             let mut cls = hir::ClassUnicode::new(vec![
                 hir::ClassUnicodeRange::new(c, c),
             ]);
             cls.case_fold_simple();
             Ok(Hir::class(hir::Class::Unicode(cls)))
         } else {
             if c.len_utf8() > 1 {
                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
             }
             let mut cls = hir::ClassBytes::new(vec![
                 hir::ClassBytesRange::new(c as u8, c as u8),
             ]);
             cls.case_fold_simple();
             Ok(Hir::class(hir::Class::Bytes(cls)))
         }
     }

     fn hir_dot(&self, span: Span) -> Result<Hir> {
         let unicode = self.flags().unicode();
         if !unicode && !self.trans().allow_invalid_utf8 {
             return Err(self.error(span, ErrorKind::InvalidUtf8));
         }
         Ok(if self.flags().dot_matches_new_line() {
             Hir::any(!unicode)
         } else {
             Hir::dot(!unicode)
         })
     }

     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
         let unicode = self.flags().unicode();
         let multi_line = self.flags().multi_line();
         Ok(match asst.kind {
             ast::AssertionKind::StartLine => {
                 Hir::anchor(if multi_line {
                     hir::Anchor::StartLine
                 } else {
                     hir::Anchor::StartText
                 })
             }
             ast::AssertionKind::EndLine => {
                 Hir::anchor(if multi_line {
                     hir::Anchor::EndLine
                 } else {
                     hir::Anchor::EndText
                 })
             }
             ast::AssertionKind::StartText => {
                 Hir::anchor(hir::Anchor::StartText)
             }
             ast::AssertionKind::EndText => {
                 Hir::anchor(hir::Anchor::EndText)
             }
             ast::AssertionKind::WordBoundary => {
                 Hir::word_boundary(if unicode {
                     hir::WordBoundary::Unicode
                 } else {
                     hir::WordBoundary::Ascii
                 })
             }
             ast::AssertionKind::NotWordBoundary => {
                 Hir::word_boundary(if unicode {
                     hir::WordBoundary::UnicodeNegate
                 } else {
                     // It is possible for negated ASCII word boundaries to
                     // match at invalid UTF-8 boundaries, even when searching
                     // valid UTF-8.
                     //
                     // TODO(ag): Enable this error when regex goes to 1.0.
                     // Otherwise, it is too steep of a breaking change.
                     // if !self.trans().allow_invalid_utf8 {
                         // return Err(self.error(
                             // asst.span, ErrorKind::InvalidUtf8));
                     // }
                     hir::WordBoundary::AsciiNegate
                 })
             }
         })
     }

     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
         let kind = match group.kind {
             ast::GroupKind::CaptureIndex(idx) => {
                 hir::GroupKind::CaptureIndex(idx)
             }
             ast::GroupKind::CaptureName(ref capname) => {
                 hir::GroupKind::CaptureName {
                     name: capname.name.clone(),
                     index: capname.index,
                 }
             }
             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
         };
         Hir::group(hir::Group {
             kind: kind,
             hir: Box::new(expr),
         })
     }

     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
         let kind = match rep.op.kind {
             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
             }
             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
             }
             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(m,n)) => {
                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
             }
         };
         let greedy =
             if self.flags().swap_greed() {
                 !rep.greedy
             } else {
                 rep.greedy
             };
         Hir::repetition(hir::Repetition {
             kind: kind,
             greedy: greedy,
             hir: Box::new(expr),
         })
     }

     fn hir_unicode_class(
         &self,
         ast_class: &ast::ClassUnicode,
     ) -> Result<hir::ClassUnicode> {
         use ast::ClassUnicodeKind::*;

         if !self.flags().unicode() {
             return Err(self.error(
                 ast_class.span,
                 ErrorKind::UnicodeNotAllowed,
             ));
         }
         let query = match ast_class.kind {
             OneLetter(name) => ClassQuery::OneLetter(name),
             Named(ref name) => ClassQuery::Binary(name),
             NamedValue { ref name, ref value, .. } => {
                 ClassQuery::ByValue {
                     property_name: name,
                     property_value: value,
                 }
             }
         };
         match unicode::class(query) {
             Ok(mut class) => {
                 self.unicode_fold_and_negate(ast_class.negated, &mut class);
                 Ok(class)
             }
             Err(unicode::Error::PropertyNotFound) => {
                 Err(self.error(
                     ast_class.span,
                     ErrorKind::UnicodePropertyNotFound,
                 ))
             }
             Err(unicode::Error::PropertyValueNotFound) => {
                 Err(self.error(
                     ast_class.span,
                     ErrorKind::UnicodePropertyValueNotFound,
                 ))
             }
         }
     }

     fn hir_perl_unicode_class(
         &self,
         ast_class: &ast::ClassPerl,
     ) -> hir::ClassUnicode {
         use ast::ClassPerlKind::*;
         use unicode_tables::perl_word::PERL_WORD;

         assert!(self.flags().unicode());
         let mut class = match ast_class.kind {
             Digit => {
                 let query = ClassQuery::Binary("Decimal_Number");
                 unicode::class(query).unwrap()
             }
             Space => {
                 let query = ClassQuery::Binary("Whitespace");
                 unicode::class(query).unwrap()
             }
             Word => unicode::hir_class(PERL_WORD),
         };
         // We needn't apply case folding here because the Perl Unicode classes
         // are already closed under Unicode simple case folding.
         if ast_class.negated {
             class.negate();
         }
         class
     }

     fn hir_perl_byte_class(
         &self,
         ast_class: &ast::ClassPerl,
     ) -> hir::ClassBytes {
         use ast::ClassPerlKind::*;

         assert!(!self.flags().unicode());
         let mut class = match ast_class.kind {
             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
         };
         // We needn't apply case folding here because the Perl ASCII classes
         // are already closed (under ASCII case folding).
         if ast_class.negated {
             class.negate();
         }
         class
     }

     fn unicode_fold_and_negate(
         &self,
         negated: bool,
         class: &mut hir::ClassUnicode,
     ) {
         // Note that we must apply case folding before negation!
         // Consider `(?i)[^x]`. If we applied negation field, then
         // the result would be the character class that matched any
         // Unicode scalar value.
         if self.flags().case_insensitive() {
             class.case_fold_simple();
         }
         if negated {
             class.negate();
         }
     }

     fn bytes_fold_and_negate(
         &self,
         span: &Span,
         negated: bool,
         class: &mut hir::ClassBytes,
     ) -> Result<()> {
         // Note that we must apply case folding before negation!
         // Consider `(?i)[^x]`. If we applied negation field, then
         // the result would be the character class that matched any
         // Unicode scalar value.
         if self.flags().case_insensitive() {
             class.case_fold_simple();
         }
         if negated {
             class.negate();
         }
         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
         }
         Ok(())
     }

     /// Return a scalar byte value suitable for use as a literal in a byte
     /// character class.
     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
         match try!(self.literal_to_char(ast)) {
             hir::Literal::Byte(byte) => Ok(byte),
             hir::Literal::Unicode(ch) => {
                 if ch <= 0x7F as char {
                     Ok(ch as u8)
                 } else {
                     // We can't feasibly support Unicode in
                     // byte oriented classes. Byte classes don't
                     // do Unicode case folding.
                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
                 }
             }
         }
     }
 }

 /// A translator's representation of a regular expression's flags at any given
 /// moment in time.
 ///
 /// Each flag can be in one of three states: absent, present but disabled or
 /// present but enabled.
 #[derive(Clone, Copy, Debug, Default)]
 struct Flags {
     case_insensitive: Option<bool>,
     multi_line: Option<bool>,
     dot_matches_new_line: Option<bool>,
     swap_greed: Option<bool>,
     unicode: Option<bool>,
     // Note that `ignore_whitespace` is omitted here because it is handled
     // entirely in the parser.
 }

 impl Flags {
     fn from_ast(ast: &ast::Flags) -> Flags {
         let mut flags = Flags::default();
         let mut enable = true;
         for item in &ast.items {
             match item.kind {
                 ast::FlagsItemKind::Negation => {
                     enable = false;
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
                     flags.case_insensitive = Some(enable);
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
                     flags.multi_line = Some(enable);
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
                     flags.dot_matches_new_line = Some(enable);
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
                     flags.swap_greed = Some(enable);
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
                     flags.unicode = Some(enable);
                 }
                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
             }
         }
         flags
     }

     fn merge(&mut self, previous: &Flags) {
         if self.case_insensitive.is_none() {
             self.case_insensitive = previous.case_insensitive;
         }
         if self.multi_line.is_none() {
             self.multi_line = previous.multi_line;
         }
         if self.dot_matches_new_line.is_none() {
             self.dot_matches_new_line = previous.dot_matches_new_line;
         }
         if self.swap_greed.is_none() {
             self.swap_greed = previous.swap_greed;
         }
         if self.unicode.is_none() {
             self.unicode = previous.unicode;
         }
     }

     fn case_insensitive(&self) -> bool {
         self.case_insensitive.unwrap_or(false)
     }

     fn multi_line(&self) -> bool {
         self.multi_line.unwrap_or(false)
     }

     fn dot_matches_new_line(&self) -> bool {
         self.dot_matches_new_line.unwrap_or(false)
     }

     fn swap_greed(&self) -> bool {
         self.swap_greed.unwrap_or(false)
     }

     fn unicode(&self) -> bool {
         self.unicode.unwrap_or(true)
     }
 }

 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
     let ranges: Vec<_> = ascii_class(kind).iter().cloned().map(|(s, e)| {
         hir::ClassBytesRange::new(s as u8, e as u8)
     }).collect();
     hir::ClassBytes::new(ranges)
 }

 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
     use ast::ClassAsciiKind::*;

     // TODO: Get rid of these consts, which appear necessary for older
     // versions of Rust.
     type T = &'static [(char, char)];
     match *kind {
         Alnum => {
             const X: T = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
             X
         }
         Alpha => {
             const X: T = &[('A', 'Z'), ('a', 'z')];
             X
         }
         Ascii => {
             const X: T = &[('\x00', '\x7F')];
             X
         }
         Blank => {
             const X: T = &[(' ', '\t')];
             X
         }
         Cntrl => {
             const X: T = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
             X
         }
         Digit => {
             const X: T = &[('0', '9')];
             X
         }
         Graph => {
             const X: T = &[('!', '~')];
             X
         }
         Lower => {
             const X: T = &[('a', 'z')];
             X
         }
         Print => {
             const X: T = &[(' ', '~')];
             X
         }
         Punct => {
             const X: T = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
             X
         }
         Space => {
             const X: T = &[
                 ('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), ('\x0C', '\x0C'),
                 ('\r', '\r'), (' ', ' '),
             ];
             X
         }
         Upper => {
             const X: T = &[('A', 'Z')];
             X
         }
         Word => {
             const X: T = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
             X
         }
         Xdigit => {
             const X: T = &[('0', '9'), ('A', 'F'), ('a', 'f')];
             X
         }
     }
 }

 #[cfg(test)]
 mod tests {
     use ast::{self, Ast, Position, Span};
     use ast::parse::ParserBuilder;
     use hir::{self, Hir, HirKind};
     use unicode::{self, ClassQuery};

     use super::{TranslatorBuilder, ascii_class};

     // We create these errors to compare with real hir::Errors in the tests.
     // We define equality between TestError and hir::Error to disregard the
     // pattern string in hir::Error, which is annoying to provide in tests.
     #[derive(Clone, Debug)]
     struct TestError {
         span: Span,
         kind: hir::ErrorKind,
     }

     impl PartialEq<hir::Error> for TestError {
         fn eq(&self, other: &hir::Error) -> bool {
             self.span == other.span && self.kind == other.kind
         }
     }

     impl PartialEq<TestError> for hir::Error {
         fn eq(&self, other: &TestError) -> bool {
             self.span == other.span && self.kind == other.kind
         }
     }

     fn parse(pattern: &str) -> Ast {
         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
     }

     fn t(pattern: &str) -> Hir {
         TranslatorBuilder::new()
             .allow_invalid_utf8(false)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap()
     }

     fn t_err(pattern: &str) -> hir::Error {
         TranslatorBuilder::new()
             .allow_invalid_utf8(false)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap_err()
     }

     fn t_bytes(pattern: &str) -> Hir {
         TranslatorBuilder::new()
             .allow_invalid_utf8(true)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap()
     }

     fn hir_lit(s: &str) -> Hir {
         match s.len() {
             0 => Hir::empty(),
             _ => {
                 let lits = s
                     .chars()
                     .map(hir::Literal::Unicode)
                     .map(Hir::literal)
                     .collect();
                 Hir::concat(lits)
             }
         }
     }

     fn hir_blit(s: &[u8]) -> Hir {
         match s.len() {
             0 => Hir::empty(),
             1 => Hir::literal(hir::Literal::Byte(s[0])),
             _ => {
                 let lits = s
                     .iter()
                     .cloned()
                     .map(hir::Literal::Byte)
                     .map(Hir::literal)
                     .collect();
                 Hir::concat(lits)
             }
         }
     }

     fn hir_group(i: u32, expr: Hir)  -> Hir {
         Hir::group(hir::Group {
             kind: hir::GroupKind::CaptureIndex(i),
             hir: Box::new(expr),
         })
     }

     fn hir_group_name(i: u32, name: &str, expr: Hir)  -> Hir {
         Hir::group(hir::Group {
             kind: hir::GroupKind::CaptureName {
                 name: name.to_string(),
                 index: i,
             },
             hir: Box::new(expr),
         })
     }

     fn hir_group_nocap(expr: Hir)  -> Hir {
         Hir::group(hir::Group {
             kind: hir::GroupKind::NonCapturing,
             hir: Box::new(expr),
         })
     }

     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
             kind: hir::RepetitionKind::ZeroOrOne,
             greedy: greedy,
             hir: Box::new(expr),
         })
     }

     fn hir_star(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
             kind: hir::RepetitionKind::ZeroOrMore,
             greedy: greedy,
             hir: Box::new(expr),
         })
     }

     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
             kind: hir::RepetitionKind::OneOrMore,
             greedy: greedy,
             hir: Box::new(expr),
         })
     }

     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
             kind: hir::RepetitionKind::Range(range),
             greedy: greedy,
             hir: Box::new(expr),
         })
     }

     fn hir_alt(alts: Vec<Hir>) -> Hir {
         Hir::alternation(alts)
     }

     fn hir_cat(exprs: Vec<Hir>) -> Hir {
         Hir::concat(exprs)
     }

     fn hir_uclass_query(query: ClassQuery) -> Hir {
         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
     }

     fn hir_uclass_perl_word() -> Hir {
         use unicode_tables::perl_word::PERL_WORD;
         Hir::class(hir::Class::Unicode(unicode::hir_class(PERL_WORD)))
     }

     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
         let ranges: Vec<hir::ClassUnicodeRange> = ranges
             .iter()
             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
             .collect();
         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
     }

     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
         let ranges: Vec<hir::ClassBytesRange> = ranges
             .iter()
             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
             .collect();
         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
     }

     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
         let ranges: Vec<hir::ClassBytesRange> = ranges
             .iter()
             .map(|&(s, e)| {
                 assert!(s as u32 <= 0x7F);
                 assert!(e as u32 <= 0x7F);
                 hir::ClassBytesRange::new(s as u8, e as u8)
             })
             .collect();
         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
     }

     fn hir_case_fold(expr: Hir) -> Hir {
         match expr.into_kind() {
             HirKind::Class(mut cls) => {
                 cls.case_fold_simple();
                 Hir::class(cls)
             }
             _ => panic!("cannot case fold non-class Hir expr"),
         }
     }

     fn hir_negate(expr: Hir) -> Hir {
         match expr.into_kind() {
             HirKind::Class(mut cls) => {
                 cls.negate();
                 Hir::class(cls)
             }
             _ => panic!("cannot negate non-class Hir expr"),
         }
     }

     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
         use hir::Class::{Bytes, Unicode};

         match (expr1.into_kind(), expr2.into_kind()) {
             (
                 HirKind::Class(Unicode(mut c1)),
                 HirKind::Class(Unicode(c2)),
             ) => {
                 c1.union(&c2);
                 Hir::class(hir::Class::Unicode(c1))
             }
             (
                 HirKind::Class(Bytes(mut c1)),
                 HirKind::Class(Bytes(c2)),
             ) => {
                 c1.union(&c2);
                 Hir::class(hir::Class::Bytes(c1))
             }
             _ => panic!("cannot union non-class Hir exprs"),
         }
     }

     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
         use hir::Class::{Bytes, Unicode};

         match (expr1.into_kind(), expr2.into_kind()) {
             (
                 HirKind::Class(Unicode(mut c1)),
                 HirKind::Class(Unicode(c2)),
             ) => {
                 c1.difference(&c2);
                 Hir::class(hir::Class::Unicode(c1))
             }
             (
                 HirKind::Class(Bytes(mut c1)),
                 HirKind::Class(Bytes(c2)),
             ) => {
                 c1.difference(&c2);
                 Hir::class(hir::Class::Bytes(c1))
             }
             _ => panic!("cannot difference non-class Hir exprs"),
         }
     }

     fn hir_anchor(anchor: hir::Anchor) -> Hir {
         Hir::anchor(anchor)
     }

     fn hir_word(wb: hir::WordBoundary) -> Hir {
         Hir::word_boundary(wb)
     }

     #[test]
     fn empty() {
         assert_eq!(t(""), Hir::empty());
         assert_eq!(t("(?i)"), Hir::empty());
         assert_eq!(t("()"), hir_group(1, Hir::empty()));
         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
         assert_eq!(t("()|()"), hir_alt(vec![
             hir_group(1, Hir::empty()),
             hir_group(2, Hir::empty()),
         ]));
         assert_eq!(t("(|b)"), hir_group(1, hir_alt(vec![
             Hir::empty(),
             hir_lit("b"),
         ])));
         assert_eq!(t("(a|)"), hir_group(1, hir_alt(vec![
             hir_lit("a"),
             Hir::empty(),
         ])));
         assert_eq!(t("(a||c)"), hir_group(1, hir_alt(vec![
             hir_lit("a"),
             Hir::empty(),
             hir_lit("c"),
         ])));
         assert_eq!(t("(||)"), hir_group(1, hir_alt(vec![
             Hir::empty(),
             Hir::empty(),
             Hir::empty(),
         ])));
     }

     #[test]
     fn literal() {
         assert_eq!(t("a"), hir_lit("a"));
         assert_eq!(t("(?-u)a"), hir_lit("a"));
         assert_eq!(t("☃"), hir_lit("☃"));
         assert_eq!(t("abcd"), hir_lit("abcd"));

         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));

         assert_eq!(t_err("(?-u)☃"), TestError {
             kind: hir::ErrorKind::UnicodeNotAllowed,
             span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 7)),
         });
         assert_eq!(t_err(r"(?-u)\xFF"), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
         });
     }

     #[test]
     fn literal_case_insensitive() {
         assert_eq!(t("(?i)a"), hir_uclass(&[
             ('A', 'A'), ('a', 'a'),
         ]));
         assert_eq!(t("(?i:a)"), hir_group_nocap(hir_uclass(&[
             ('A', 'A'), ('a', 'a')],
         )));
         assert_eq!(t("a(?i)a(?-i)a"), hir_cat(vec![
             hir_lit("a"),
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
             hir_lit("a"),
         ]));
         assert_eq!(t("(?i)ab@c"), hir_cat(vec![
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
             hir_uclass(&[('B', 'B'), ('b', 'b')]),
             hir_lit("@"),
             hir_uclass(&[('C', 'C'), ('c', 'c')]),
         ]));
         assert_eq!(t("(?i)β"), hir_uclass(&[
             ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
         ]));

         assert_eq!(t("(?i-u)a"), hir_bclass(&[
             (b'A', b'A'), (b'a', b'a'),
         ]));
         assert_eq!(t("(?-u)a(?i)a(?-i)a"), hir_cat(vec![
             hir_lit("a"),
             hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
             hir_lit("a"),
         ]));
         assert_eq!(t("(?i-u)ab@c"), hir_cat(vec![
             hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
             hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
             hir_lit("@"),
             hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
         ]));

         assert_eq!(t_bytes("(?i-u)a"), hir_bclass(&[
             (b'A', b'A'), (b'a', b'a'),
         ]));
         assert_eq!(t_bytes("(?i-u)\x61"), hir_bclass(&[
             (b'A', b'A'), (b'a', b'a'),
         ]));
         assert_eq!(t_bytes(r"(?i-u)\x61"), hir_bclass(&[
             (b'A', b'A'), (b'a', b'a'),
         ]));
         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));

         assert_eq!(t_err("(?i-u)β"), TestError {
             kind: hir::ErrorKind::UnicodeNotAllowed,
             span: Span::new(
                 Position::new(6, 1, 7),
                 Position::new(8, 1, 8),
             ),
         });
     }

     #[test]
     fn dot() {
         assert_eq!(t("."), hir_uclass(&[
             ('\0', '\t'),
             ('\x0B', '\u{10FFFF}'),
         ]));
         assert_eq!(t("(?s)."), hir_uclass(&[
             ('\0', '\u{10FFFF}'),
         ]));
         assert_eq!(t_bytes("(?-u)."), hir_bclass(&[
             (b'\0', b'\t'),
             (b'\x0B', b'\xFF'),
         ]));
         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[
             (b'\0', b'\xFF'),
         ]));

         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
         assert_eq!(t_err("(?-u)."), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(5, 1, 6), Position::new(6, 1, 7)),
         });
         assert_eq!(t_err("(?s-u)."), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(6, 1, 7), Position::new(7, 1, 8)),
         });
     }

     #[test]
     fn assertions() {
         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));

         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
         assert_eq!(
             t_bytes(r"(?-u)\B"),
             hir_word(hir::WordBoundary::AsciiNegate));

         // TODO(ag): Enable this tests when regex goes to 1.0.
         // assert_eq!(t_err(r"(?-u)\B"), TestError {
             // kind: hir::ErrorKind::InvalidUtf8,
             // span: Span::new(Position::new(5, 1, 6), Position::new(7, 1, 8)),
         // });
     }

     #[test]
     fn group() {
         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
         assert_eq!(t("(a)(b)"), hir_cat(vec![
             hir_group(1, hir_lit("a")),
             hir_group(2, hir_lit("b")),
         ]));
         assert_eq!(t("(a)|(b)"), hir_alt(vec![
             hir_group(1, hir_lit("a")),
             hir_group(2, hir_lit("b")),
         ]));
         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
         assert_eq!(t("(?P<foo>a)(?P<bar>b)"), hir_cat(vec![
             hir_group_name(1, "foo", hir_lit("a")),
             hir_group_name(2, "bar", hir_lit("b")),
         ]));
         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
         assert_eq!(t("(?:a)(b)"), hir_cat(vec![
             hir_group_nocap(hir_lit("a")),
             hir_group(1, hir_lit("b")),
         ]));
         assert_eq!(t("(a)(?:b)(c)"), hir_cat(vec![
             hir_group(1, hir_lit("a")),
             hir_group_nocap(hir_lit("b")),
             hir_group(2, hir_lit("c")),
         ]));
         assert_eq!(t("(a)(?P<foo>b)(c)"), hir_cat(vec![
             hir_group(1, hir_lit("a")),
             hir_group_name(2, "foo", hir_lit("b")),
             hir_group(3, hir_lit("c")),
         ]));
     }

     #[test]
     fn flags() {
         assert_eq!(t("(?i:a)a"), hir_cat(vec![
             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
             hir_lit("a"),
         ]));
         assert_eq!(t("(?i-u:a)β"), hir_cat(vec![
             hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
             hir_lit("β"),
         ]));
         assert_eq!(t("(?i)(?-i:a)a"), hir_cat(vec![
             hir_group_nocap(hir_lit("a")),
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
         ]));
         assert_eq!(t("(?im)a^"), hir_cat(vec![
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
             hir_anchor(hir::Anchor::StartLine),
         ]));
         assert_eq!(t("(?im)a^(?i-m)a^"), hir_cat(vec![
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
             hir_anchor(hir::Anchor::StartLine),
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
             hir_anchor(hir::Anchor::StartText),
         ]));
         assert_eq!(t("(?U)a*a*?(?-U)a*a*?"), hir_cat(vec![
             hir_star(false, hir_lit("a")),
             hir_star(true, hir_lit("a")),
             hir_star(true, hir_lit("a")),
             hir_star(false, hir_lit("a")),
         ]));
         assert_eq!(t("(?:a(?i)a)a"), hir_cat(vec![
             hir_group_nocap(hir_cat(vec![
                 hir_lit("a"),
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
             ])),
             hir_lit("a"),
         ]));
         assert_eq!(t("(?i)(?:a(?-i)a)a"), hir_cat(vec![
             hir_group_nocap(hir_cat(vec![
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
                 hir_lit("a"),
             ])),
             hir_uclass(&[('A', 'A'), ('a', 'a')]),
         ]));
     }

     #[test]
     fn escape() {
         assert_eq!(
             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
             hir_lit(r"\.+*?()|[]{}^$#"),
         );
     }

     #[test]
     fn repetition() {
         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));

         assert_eq!(
             t("a{1}"),
             hir_range(
                 true,
                 hir::RepetitionRange::Exactly(1),
                 hir_lit("a"),
             ));
         assert_eq!(
             t("a{1,}"),
             hir_range(
                 true,
                 hir::RepetitionRange::AtLeast(1),
                 hir_lit("a"),
             ));
         assert_eq!(
             t("a{1,2}"),
             hir_range(
                 true,
                 hir::RepetitionRange::Bounded(1, 2),
                 hir_lit("a"),
             ));
         assert_eq!(
             t("a{1}?"),
             hir_range(
                 false,
                 hir::RepetitionRange::Exactly(1),
                 hir_lit("a"),
             ));
         assert_eq!(
             t("a{1,}?"),
             hir_range(
                 false,
                 hir::RepetitionRange::AtLeast(1),
                 hir_lit("a"),
             ));
         assert_eq!(
             t("a{1,2}?"),
             hir_range(
                 false,
                 hir::RepetitionRange::Bounded(1, 2),
                 hir_lit("a"),
             ));

         assert_eq!(t("ab?"), hir_cat(vec![
             hir_lit("a"),
             hir_quest(true, hir_lit("b")),
         ]));
         assert_eq!(t("(ab)?"), hir_quest(true, hir_group(1, hir_cat(vec![
             hir_lit("a"),
             hir_lit("b"),
         ]))));
         assert_eq!(t("a|b?"), hir_alt(vec![
             hir_lit("a"),
             hir_quest(true, hir_lit("b")),
         ]));
     }

     #[test]
     fn cat_alt() {
         assert_eq!(t("(ab)"), hir_group(1, hir_cat(vec![
             hir_lit("a"),
             hir_lit("b"),
         ])));
         assert_eq!(t("a|b"), hir_alt(vec![
             hir_lit("a"),
             hir_lit("b"),
         ]));
         assert_eq!(t("a|b|c"), hir_alt(vec![
             hir_lit("a"),
             hir_lit("b"),
             hir_lit("c"),
         ]));
         assert_eq!(t("ab|bc|cd"), hir_alt(vec![
             hir_lit("ab"),
             hir_lit("bc"),
             hir_lit("cd"),
         ]));
         assert_eq!(t("(a|b)"), hir_group(1, hir_alt(vec![
             hir_lit("a"),
             hir_lit("b"),
         ])));
         assert_eq!(t("(a|b|c)"), hir_group(1, hir_alt(vec![
             hir_lit("a"),
             hir_lit("b"),
             hir_lit("c"),
         ])));
         assert_eq!(t("(ab|bc|cd)"), hir_group(1, hir_alt(vec![
             hir_lit("ab"),
             hir_lit("bc"),
             hir_lit("cd"),
         ])));
         assert_eq!(t("(ab|(bc|(cd)))"), hir_group(1, hir_alt(vec![
             hir_lit("ab"),
             hir_group(2, hir_alt(vec![
                 hir_lit("bc"),
                 hir_group(3, hir_lit("cd")),
             ])),
         ])));
     }

     #[test]
     fn class_ascii() {
         assert_eq!(
             t("[[:alnum:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)));
         assert_eq!(
             t("[[:alpha:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)));
         assert_eq!(
             t("[[:ascii:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)));
         assert_eq!(
             t("[[:blank:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)));
         assert_eq!(
             t("[[:cntrl:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)));
         assert_eq!(
             t("[[:digit:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)));
         assert_eq!(
             t("[[:graph:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)));
         assert_eq!(
             t("[[:lower:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)));
         assert_eq!(
             t("[[:print:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)));
         assert_eq!(
             t("[[:punct:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)));
         assert_eq!(
             t("[[:space:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)));
         assert_eq!(
             t("[[:upper:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)));
         assert_eq!(
             t("[[:word:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)));
         assert_eq!(
             t("[[:xdigit:]]"),
             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)));

         assert_eq!(
             t("[[:^lower:]]"),
             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))));
         assert_eq!(
             t("(?i)[[:lower:]]"),
             hir_uclass(&[
                 ('A', 'Z'), ('a', 'z'),
                 ('\u{17F}', '\u{17F}'),
                 ('\u{212A}', '\u{212A}'),
             ]));

         assert_eq!(
             t("(?-u)[[:lower:]]"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)));
         assert_eq!(
             t("(?i-u)[[:lower:]]"),
             hir_case_fold(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Lower))));

         assert_eq!(t_err("(?-u)[[:^lower:]]"), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(6, 1, 7), Position::new(16, 1, 17)),
         });
         assert_eq!(t_err("(?i-u)[[:^lower:]]"), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(7, 1, 8), Position::new(17, 1, 18)),
         });
     }

     #[test]
     fn class_perl() {
         // Unicode
         assert_eq!(
             t(r"\d"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"\s"),
             hir_uclass_query(ClassQuery::Binary("space")));
         assert_eq!(
             t(r"\w"),
             hir_uclass_perl_word());
         assert_eq!(
             t(r"(?i)\d"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"(?i)\s"),
             hir_uclass_query(ClassQuery::Binary("space")));
         assert_eq!(
             t(r"(?i)\w"),
             hir_uclass_perl_word());

         // Unicode, negated
         assert_eq!(
             t(r"\D"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
         assert_eq!(
             t(r"\S"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
         assert_eq!(
             t(r"\W"),
             hir_negate(hir_uclass_perl_word()));
         assert_eq!(
             t(r"(?i)\D"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
         assert_eq!(
             t(r"(?i)\S"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
         assert_eq!(
             t(r"(?i)\W"),
             hir_negate(hir_uclass_perl_word()));

         // ASCII only
         assert_eq!(
             t(r"(?-u)\d"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
         assert_eq!(
             t(r"(?-u)\s"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
         assert_eq!(
             t(r"(?-u)\w"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));
         assert_eq!(
             t(r"(?i-u)\d"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
         assert_eq!(
             t(r"(?i-u)\s"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
         assert_eq!(
             t(r"(?i-u)\w"),
             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));

         // ASCII only, negated
         assert_eq!(
             t(r"(?-u)\D"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Digit))));
         assert_eq!(
             t(r"(?-u)\S"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Space))));
         assert_eq!(
             t(r"(?-u)\W"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Word))));
         assert_eq!(
             t(r"(?i-u)\D"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Digit))));
         assert_eq!(
             t(r"(?i-u)\S"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Space))));
         assert_eq!(
             t(r"(?i-u)\W"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Word))));
     }

     #[test]
     fn class_unicode() {
         assert_eq!(
             t(r"\pZ"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\pz"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\p{Separator}"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\p{se      PaRa ToR}"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\p{gc:Separator}"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\p{gc=Separator}"),
             hir_uclass_query(ClassQuery::Binary("Z")));
         assert_eq!(
             t(r"\p{Other}"),
             hir_uclass_query(ClassQuery::Binary("Other")));
         assert_eq!(
             t(r"\pC"),
             hir_uclass_query(ClassQuery::Binary("Other")));

         assert_eq!(
             t(r"\PZ"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
         assert_eq!(
             t(r"\P{separator}"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
         assert_eq!(
             t(r"\P{gc!=separator}"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));

         assert_eq!(
             t(r"\p{Greek}"),
             hir_uclass_query(ClassQuery::Binary("Greek")));
         assert_eq!(
             t(r"(?i)\p{Greek}"),
             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))));
         assert_eq!(
             t(r"(?i)\P{Greek}"),
             hir_negate(hir_case_fold(hir_uclass_query(
                 ClassQuery::Binary("Greek")))));

         assert_eq!(
             t(r"\p{any}"),
             hir_uclass_query(ClassQuery::Binary("Any")));
         assert_eq!(
             t(r"\p{assigned}"),
             hir_uclass_query(ClassQuery::Binary("Assigned")));
         assert_eq!(
             t(r"\p{ascii}"),
             hir_uclass_query(ClassQuery::Binary("ASCII")));
         assert_eq!(
             t(r"\p{gc:any}"),
             hir_uclass_query(ClassQuery::Binary("Any")));
         assert_eq!(
             t(r"\p{gc:assigned}"),
             hir_uclass_query(ClassQuery::Binary("Assigned")));
         assert_eq!(
             t(r"\p{gc:ascii}"),
             hir_uclass_query(ClassQuery::Binary("ASCII")));

         assert_eq!(t_err(r"(?-u)\pZ"), TestError {
             kind: hir::ErrorKind::UnicodeNotAllowed,
             span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 9)),
         });
         assert_eq!(t_err(r"(?-u)\p{Separator}"), TestError {
             kind: hir::ErrorKind::UnicodeNotAllowed,
             span: Span::new(Position::new(5, 1, 6), Position::new(18, 1, 19)),
         });
         assert_eq!(t_err(r"\pE"), TestError {
             kind: hir::ErrorKind::UnicodePropertyNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(3, 1, 4)),
         });
         assert_eq!(t_err(r"\p{Foo}"), TestError {
             kind: hir::ErrorKind::UnicodePropertyNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
         });
         assert_eq!(t_err(r"\p{gc:Foo}"), TestError {
             kind: hir::ErrorKind::UnicodePropertyValueNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
         });
         assert_eq!(t_err(r"\p{sc:Foo}"), TestError {
             kind: hir::ErrorKind::UnicodePropertyValueNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
         });
         assert_eq!(t_err(r"\p{scx:Foo}"), TestError {
             kind: hir::ErrorKind::UnicodePropertyValueNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
         });
         assert_eq!(t_err(r"\p{age:Foo}"), TestError {
             kind: hir::ErrorKind::UnicodePropertyValueNotFound,
             span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
         });
     }

     #[test]
     fn class_bracketed() {
         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
         assert_eq!(
             t(r"[\d]"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"[\pZ]"),
             hir_uclass_query(ClassQuery::Binary("separator")));
         assert_eq!(
             t(r"[\p{separator}]"),
             hir_uclass_query(ClassQuery::Binary("separator")));
         assert_eq!(
             t(r"[^\D]"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"[^\PZ]"),
             hir_uclass_query(ClassQuery::Binary("separator")));
         assert_eq!(
             t(r"[^\P{separator}]"),
             hir_uclass_query(ClassQuery::Binary("separator")));
         assert_eq!(
             t(r"(?i)[^\D]"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"(?i)[^\P{greek}]"),
             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))));

         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));

         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
         assert_eq!(t("(?i)[k]"), hir_uclass(&[
             ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
         ]));
         assert_eq!(t("(?i)[β]"), hir_uclass(&[
             ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
         ]));
         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[
             (b'K', b'K'), (b'k', b'k'),
         ]));

         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
         assert_eq!(
             t_bytes("(?-u)[^a]"),
             hir_negate(hir_bclass(&[(b'a', b'a')])));
         assert_eq!(
             t(r"[^\d]"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
         assert_eq!(
             t(r"[^\pZ]"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
         assert_eq!(
             t(r"[^\p{separator}]"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
         assert_eq!(
             t(r"(?i)[^\p{greek}]"),
             hir_negate(hir_case_fold(hir_uclass_query(
                 ClassQuery::Binary("greek")))));
         assert_eq!(
             t(r"(?i)[\P{greek}]"),
             hir_negate(hir_case_fold(hir_uclass_query(
                 ClassQuery::Binary("greek")))));

         // Test some weird cases.
         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));

         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));

         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));

         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));

         assert_eq!(t_err("(?-u)[^a]"), TestError {
             kind: hir::ErrorKind::InvalidUtf8,
             span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
         });
         assert_eq!(t_err(r"[^\s\S]"), TestError {
             kind: hir::ErrorKind::EmptyClassNotAllowed,
             span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
         });
         assert_eq!(t_err(r"(?-u)[^\s\S]"), TestError {
             kind: hir::ErrorKind::EmptyClassNotAllowed,
             span: Span::new(Position::new(5, 1, 6), Position::new(12, 1, 13)),
         });
     }

     #[test]
     fn class_bracketed_union() {
         assert_eq!(
             t("[a-zA-Z]"),
             hir_uclass(&[('A', 'Z'), ('a', 'z')]));
         assert_eq!(
             t(r"[a\pZb]"),
             hir_union(
                 hir_uclass(&[('a', 'b')]),
                 hir_uclass_query(ClassQuery::Binary("separator"))));
         assert_eq!(
             t(r"[\pZ\p{Greek}]"),
             hir_union(
                 hir_uclass_query(ClassQuery::Binary("greek")),
                 hir_uclass_query(ClassQuery::Binary("separator"))));
         assert_eq!(
             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
             hir_union(
                 hir_uclass_query(ClassQuery::ByValue {
                     property_name: "age",
                     property_value: "3.0",
                 }),
                 hir_union(
                     hir_uclass_query(ClassQuery::Binary("greek")),
                     hir_uclass_query(ClassQuery::Binary("separator")))));
         assert_eq!(
             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
             hir_union(
                 hir_uclass_query(ClassQuery::ByValue {
                     property_name: "age",
                     property_value: "3.0",
                 }),
                 hir_union(
                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
                     hir_union(
                         hir_uclass_query(ClassQuery::Binary("greek")),
                         hir_uclass_query(ClassQuery::Binary("separator"))))));

         assert_eq!(
             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
             hir_case_fold(hir_union(
                 hir_uclass_query(ClassQuery::ByValue {
                     property_name: "age",
                     property_value: "3.0",
                 }),
                 hir_union(
                     hir_uclass_query(ClassQuery::Binary("greek")),
                     hir_uclass_query(ClassQuery::Binary("separator"))))));
         assert_eq!(
             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
             hir_negate(hir_union(
                 hir_uclass_query(ClassQuery::ByValue {
                     property_name: "age",
                     property_value: "3.0",
                 }),
                 hir_union(
                     hir_uclass_query(ClassQuery::Binary("greek")),
                     hir_uclass_query(ClassQuery::Binary("separator"))))));
         assert_eq!(
             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
             hir_negate(hir_case_fold(hir_union(
                 hir_uclass_query(ClassQuery::ByValue {
                     property_name: "age",
                     property_value: "3.0",
                 }),
                 hir_union(
                     hir_uclass_query(ClassQuery::Binary("greek")),
                     hir_uclass_query(ClassQuery::Binary("separator")))))));
     }

     #[test]
     fn class_bracketed_nested() {
         assert_eq!(
             t(r"[a[^c]]"),
             hir_negate(hir_uclass(&[('c', 'c')])));
         assert_eq!(
             t(r"[a-b[^c]]"),
             hir_negate(hir_uclass(&[('c', 'c')])));
         assert_eq!(
             t(r"[a-c[^c]]"),
             hir_negate(hir_uclass(&[])));

         assert_eq!(
             t(r"[^a[^c]]"),
             hir_uclass(&[('c', 'c')]));
         assert_eq!(
             t(r"[^a-b[^c]]"),
             hir_uclass(&[('c', 'c')]));

         assert_eq!(
             t(r"(?i)[a[^c]]"),
             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));
         assert_eq!(
             t(r"(?i)[a-b[^c]]"),
             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));

         assert_eq!(
             t(r"(?i)[^a[^c]]"),
             hir_uclass(&[('C', 'C'), ('c', 'c')]));
         assert_eq!(
             t(r"(?i)[^a-b[^c]]"),
             hir_uclass(&[('C', 'C'), ('c', 'c')]));

         assert_eq!(t_err(r"[^a-c[^c]]"), TestError {
             kind: hir::ErrorKind::EmptyClassNotAllowed,
             span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
         });
         assert_eq!(t_err(r"(?i)[^a-c[^c]]"), TestError {
             kind: hir::ErrorKind::EmptyClassNotAllowed,
             span: Span::new(Position::new(4, 1, 5), Position::new(14, 1, 15)),
         });
     }

     #[test]
     fn class_bracketed_intersect() {
         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));

         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));

         assert_eq!(
             t("(?i)[abc&&b-c]"),
             hir_case_fold(hir_uclass(&[('b', 'c')])));
         assert_eq!(
             t("(?i)[abc&&[b-c]]"),
             hir_case_fold(hir_uclass(&[('b', 'c')])));
         assert_eq!(
             t("(?i)[[abc]&&[b-c]]"),
             hir_case_fold(hir_uclass(&[('b', 'c')])));
         assert_eq!(
             t("(?i)[a-z&&b-y&&c-x]"),
             hir_case_fold(hir_uclass(&[('c', 'x')])));
         assert_eq!(
             t("(?i)[c-da-b&&a-d]"),
             hir_case_fold(hir_uclass(&[('a', 'd')])));
         assert_eq!(
             t("(?i)[a-d&&c-da-b]"),
             hir_case_fold(hir_uclass(&[('a', 'd')])));

         assert_eq!(
             t("(?i-u)[abc&&b-c]"),
             hir_case_fold(hir_bclass(&[(b'b', b'c')])));
         assert_eq!(
             t("(?i-u)[abc&&[b-c]]"),
             hir_case_fold(hir_bclass(&[(b'b', b'c')])));
         assert_eq!(
             t("(?i-u)[[abc]&&[b-c]]"),
             hir_case_fold(hir_bclass(&[(b'b', b'c')])));
         assert_eq!(
             t("(?i-u)[a-z&&b-y&&c-x]"),
             hir_case_fold(hir_bclass(&[(b'c', b'x')])));
         assert_eq!(
             t("(?i-u)[c-da-b&&a-d]"),
             hir_case_fold(hir_bclass(&[(b'a', b'd')])));
         assert_eq!(
             t("(?i-u)[a-d&&c-da-b]"),
             hir_case_fold(hir_bclass(&[(b'a', b'd')])));

         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
         // `^` is also allowed to be unescaped after `&&`.
         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
         // `]` needs to be escaped after `&&` since it's not at start of class.
         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
         // Test precedence.
         assert_eq!(
             t(r"[a-w&&[^c-g]z]"),
             hir_uclass(&[('a', 'b'), ('h', 'w')]));
     }

     #[test]
     fn class_bracketed_intersect_negate() {
         assert_eq!(
             t(r"[^\w&&\d]"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
         assert_eq!(
             t(r"[^[a-z&&a-c]]"),
             hir_negate(hir_uclass(&[('a', 'c')])));
         assert_eq!(
             t(r"[^[\w&&\d]]"),
             hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
         assert_eq!(
             t(r"[^[^\w&&\d]]"),
             hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(
             t(r"[[[^\w]&&[^\d]]]"),
             hir_negate(hir_uclass_perl_word()));

         assert_eq!(
             t_bytes(r"(?-u)[^\w&&\d]"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Digit))));
         assert_eq!(
             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
             hir_negate(hir_bclass(&[(b'a', b'c')])));
         assert_eq!(
             t_bytes(r"(?-u)[^[\w&&\d]]"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Digit))));
         assert_eq!(
             t_bytes(r"(?-u)[^[^\w&&\d]]"),
             hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Digit)));
         assert_eq!(
             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
             hir_negate(hir_bclass_from_char(ascii_class(
                 &ast::ClassAsciiKind::Word))));
     }

     #[test]
     fn class_bracketed_difference() {
         assert_eq!(
             t(r"[\pL--[:ascii:]]"),
             hir_difference(
                 hir_uclass_query(ClassQuery::Binary("letter")),
                 hir_uclass(&[('\0', '\x7F')])));

         assert_eq!(
             t(r"(?-u)[[:alpha:]--[:lower:]]"),
             hir_bclass(&[(b'A', b'Z')]));
     }

     #[test]
     fn class_bracketed_symmetric_difference() {
         assert_eq!(
             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
             hir_uclass(&[
                 ('\u{0342}', '\u{0342}'),
                 ('\u{0345}', '\u{0345}'),
                 ('\u{1DC0}', '\u{1DC1}'),
             ]));
         assert_eq!(
             t(r"[a-g~~c-j]"),
             hir_uclass(&[('a', 'b'), ('h', 'j')]));

         assert_eq!(
             t(r"(?-u)[a-g~~c-j]"),
             hir_bclass(&[(b'a', b'b'), (b'h', b'j')]));
     }

     #[test]
     fn ignore_whitespace() {
         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
         assert_eq!(t(r"(?x)\x # comment
 { # comment
     53 # comment
 } #comment"), hir_lit("S"));

         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
         assert_eq!(t(r"(?x)\x # comment
         53 # comment"), hir_lit("S"));
         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));

         assert_eq!(t(r"(?x)\p # comment
 { # comment
     Separator # comment
 } # comment"), hir_uclass_query(ClassQuery::Binary("separator")));

         assert_eq!(t(r"(?x)a # comment
 { # comment
     5 # comment
     , # comment
     10 # comment
 } # comment"),
             hir_range(
                 true, hir::RepetitionRange::Bounded(5, 10), hir_lit("a")));

         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
     }

     #[test]
     fn analysis_is_always_utf8() {
         // Positive examples.
         assert!(t_bytes(r"a").is_always_utf8());
         assert!(t_bytes(r"ab").is_always_utf8());
         assert!(t_bytes(r"(?-u)a").is_always_utf8());
         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
         assert!(t_bytes(r"\xFF").is_always_utf8());
         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
         assert!(t_bytes(r"[^a]").is_always_utf8());
         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
         assert!(t_bytes(r"\b").is_always_utf8());
         assert!(t_bytes(r"\B").is_always_utf8());
         assert!(t_bytes(r"(?-u)\b").is_always_utf8());

         // Negative examples.
         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
     }

     #[test]
     fn analysis_is_all_assertions() {
         // Positive examples.
         assert!(t(r"\b").is_all_assertions());
         assert!(t(r"\B").is_all_assertions());
         assert!(t(r"^").is_all_assertions());
         assert!(t(r"$").is_all_assertions());
         assert!(t(r"\A").is_all_assertions());
         assert!(t(r"\z").is_all_assertions());
         assert!(t(r"$^\z\A\b\B").is_all_assertions());
         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
         assert!(t(r"^$|$^").is_all_assertions());
         assert!(t(r"((\b)+())*^").is_all_assertions());

         // Negative examples.
         assert!(!t(r"^a").is_all_assertions());
     }

     #[test]
     fn analysis_is_anchored() {
         // Positive examples.
         assert!(t(r"^").is_anchored_start());
         assert!(t(r"$").is_anchored_end());

         assert!(t(r"^^").is_anchored_start());
         assert!(t(r"$$").is_anchored_end());

         assert!(t(r"^$").is_anchored_start());
         assert!(t(r"^$").is_anchored_end());

         assert!(t(r"^foo").is_anchored_start());
         assert!(t(r"foo$").is_anchored_end());

         assert!(t(r"^foo|^bar").is_anchored_start());
         assert!(t(r"foo$|bar$").is_anchored_end());

         assert!(t(r"^(foo|bar)").is_anchored_start());
         assert!(t(r"(foo|bar)$").is_anchored_end());

         assert!(t(r"^+").is_anchored_start());
         assert!(t(r"$+").is_anchored_end());
         assert!(t(r"^++").is_anchored_start());
         assert!(t(r"$++").is_anchored_end());
         assert!(t(r"(^)+").is_anchored_start());
         assert!(t(r"($)+").is_anchored_end());

         assert!(t(r"$^").is_anchored_start());
         assert!(t(r"$^").is_anchored_end());
         assert!(t(r"$^|^$").is_anchored_start());
         assert!(t(r"$^|^$").is_anchored_end());

         assert!(t(r"\b^").is_anchored_start());
         assert!(t(r"$\b").is_anchored_end());
         assert!(t(r"^(?m:^)").is_anchored_start());
         assert!(t(r"(?m:$)$").is_anchored_end());
         assert!(t(r"(?m:^)^").is_anchored_start());
         assert!(t(r"$(?m:$)").is_anchored_end());

         // Negative examples.
         assert!(!t(r"(?m)^").is_anchored_start());
         assert!(!t(r"(?m)$").is_anchored_end());
         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
         assert!(!t(r"$^|(?m:^$)").is_anchored_end());

         assert!(!t(r"a^").is_anchored_start());
         assert!(!t(r"$a").is_anchored_start());

         assert!(!t(r"a^").is_anchored_start());
         assert!(!t(r"$a").is_anchored_start());

         assert!(!t(r"^foo|bar").is_anchored_start());
         assert!(!t(r"foo|bar$").is_anchored_end());

         assert!(!t(r"^*").is_anchored_start());
         assert!(!t(r"$*").is_anchored_end());
         assert!(!t(r"^*+").is_anchored_start());
         assert!(!t(r"$*+").is_anchored_end());
         assert!(!t(r"^+*").is_anchored_start());
         assert!(!t(r"$+*").is_anchored_end());
         assert!(!t(r"(^)*").is_anchored_start());
         assert!(!t(r"($)*").is_anchored_end());
     }

     #[test]
     fn analysis_is_any_anchored() {
         // Positive examples.
         assert!(t(r"^").is_any_anchored_start());
         assert!(t(r"$").is_any_anchored_end());
         assert!(t(r"\A").is_any_anchored_start());
         assert!(t(r"\z").is_any_anchored_end());

         // Negative examples.
         assert!(!t(r"(?m)^").is_any_anchored_start());
         assert!(!t(r"(?m)$").is_any_anchored_end());
         assert!(!t(r"$").is_any_anchored_start());
         assert!(!t(r"^").is_any_anchored_end());
     }

     #[test]
     fn analysis_is_match_empty() {
         // Positive examples.
         assert!(t(r"").is_match_empty());
         assert!(t(r"()").is_match_empty());
         assert!(t(r"()*").is_match_empty());
         assert!(t(r"()+").is_match_empty());
         assert!(t(r"()?").is_match_empty());
         assert!(t(r"a*").is_match_empty());
         assert!(t(r"a?").is_match_empty());
         assert!(t(r"a{0}").is_match_empty());
         assert!(t(r"a{0,}").is_match_empty());
         assert!(t(r"a{0,1}").is_match_empty());
         assert!(t(r"a{0,10}").is_match_empty());
         assert!(t(r"\pL*").is_match_empty());
         assert!(t(r"a*|b").is_match_empty());
         assert!(t(r"b|a*").is_match_empty());
         assert!(t(r"a*a?(abcd)*").is_match_empty());
         assert!(t(r"^").is_match_empty());
         assert!(t(r"$").is_match_empty());
         assert!(t(r"(?m)^").is_match_empty());
         assert!(t(r"(?m)$").is_match_empty());
         assert!(t(r"\A").is_match_empty());
         assert!(t(r"\z").is_match_empty());
         assert!(t(r"\B").is_match_empty());
         assert!(t_bytes(r"(?-u)\B").is_match_empty());

         // Negative examples.
         assert!(!t(r"a+").is_match_empty());
         assert!(!t(r"a{1}").is_match_empty());
         assert!(!t(r"a{1,}").is_match_empty());
         assert!(!t(r"a{1,2}").is_match_empty());
         assert!(!t(r"a{1,10}").is_match_empty());
         assert!(!t(r"b|a").is_match_empty());
         assert!(!t(r"a*a+(abcd)*").is_match_empty());
         assert!(!t(r"\b").is_match_empty());
         assert!(!t(r"(?-u)\b").is_match_empty());
     }
 }