| // Copyright 2018 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| /*! |
| Defines a translator that converts an `Ast` to an `Hir`. |
| */ |
| |
| use std::cell::{Cell, RefCell}; |
| use std::result; |
| |
| use ast::{self, Ast, Span, Visitor}; |
| use hir::{self, Error, ErrorKind, Hir}; |
| use unicode::{self, ClassQuery}; |
| |
| type Result<T> = result::Result<T, Error>; |
| |
| /// A builder for constructing an AST->HIR translator. |
| #[derive(Clone, Debug)] |
| pub struct TranslatorBuilder { |
| allow_invalid_utf8: bool, |
| flags: Flags, |
| } |
| |
| impl Default for TranslatorBuilder { |
| fn default() -> TranslatorBuilder { |
| TranslatorBuilder::new() |
| } |
| } |
| |
| impl TranslatorBuilder { |
| /// Create a new translator builder with a default c onfiguration. |
| pub fn new() -> TranslatorBuilder { |
| TranslatorBuilder { |
| allow_invalid_utf8: false, |
| flags: Flags::default(), |
| } |
| } |
| |
| /// Build a translator using the current configuration. |
| pub fn build(&self) -> Translator { |
| Translator { |
| stack: RefCell::new(vec![]), |
| flags: Cell::new(self.flags), |
| allow_invalid_utf8: self.allow_invalid_utf8, |
| } |
| } |
| |
| /// When enabled, translation will permit the construction of a regular |
| /// expression that may match invalid UTF-8. |
| /// |
| /// When disabled (the default), the translator is guaranteed to produce |
| /// an expression that will only ever match valid UTF-8 (otherwise, the |
| /// translator will return an error). |
| /// |
| /// Note that currently, even when invalid UTF-8 is banned, the translator |
| /// will permit a negated ASCII word boundary (i.e., `(?-u:\B)`) even |
| /// though it can actually match at invalid UTF-8 boundaries. This bug |
| /// will be fixed on the next semver release. |
| pub fn allow_invalid_utf8( |
| &mut self, |
| yes: bool, |
| ) -> &mut TranslatorBuilder { |
| self.allow_invalid_utf8 = yes; |
| self |
| } |
| |
| /// Enable or disable the case insensitive flag (`i`) by default. |
| pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { |
| self.flags.case_insensitive = if yes { Some(true) } else { None }; |
| self |
| } |
| |
| /// Enable or disable the multi-line matching flag (`m`) by default. |
| pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { |
| self.flags.multi_line = if yes { Some(true) } else { None }; |
| self |
| } |
| |
| /// Enable or disable the "dot matches any character" flag (`s`) by |
| /// default. |
| pub fn dot_matches_new_line( |
| &mut self, |
| yes: bool, |
| ) -> &mut TranslatorBuilder { |
| self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; |
| self |
| } |
| |
| /// Enable or disable the "swap greed" flag (`U`) by default. |
| pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { |
| self.flags.swap_greed = if yes { Some(true) } else { None }; |
| self |
| } |
| |
| /// Enable or disable the Unicode flag (`u`) by default. |
| pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { |
| self.flags.unicode = if yes { None } else { Some(false) }; |
| self |
| } |
| } |
| |
| /// A translator maps abstract syntax to a high level intermediate |
| /// representation. |
| /// |
| /// A translator may be benefit from reuse. That is, a translator can translate |
| /// many abstract syntax trees. |
| /// |
| /// A `Translator` can be configured in more detail via a |
| /// [`TranslatorBuilder`](struct.TranslatorBuilder.html). |
| #[derive(Clone, Debug)] |
| pub struct Translator { |
| /// Our call stack, but on the heap. |
| stack: RefCell<Vec<HirFrame>>, |
| /// The current flag settings. |
| flags: Cell<Flags>, |
| /// Whether we're allowed to produce HIR that can match arbitrary bytes. |
| allow_invalid_utf8: bool, |
| } |
| |
| impl Translator { |
| /// Create a new translator using the default configuration. |
| pub fn new() -> Translator { |
| TranslatorBuilder::new().build() |
| } |
| |
| /// Translate the given abstract syntax tree (AST) into a high level |
| /// intermediate representation (HIR). |
| /// |
| /// If there was a problem doing the translation, then an HIR-specific |
| /// error is returned. |
| /// |
| /// The original pattern string used to produce the `Ast` *must* also be |
| /// provided. The translator does not use the pattern string during any |
| /// correct translation, but is used for error reporting. |
| pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> { |
| ast::visit(ast, TranslatorI::new(self, pattern)) |
| } |
| } |
| |
| /// An HirFrame is a single stack frame, represented explicitly, which is |
| /// created for each item in the Ast that we traverse. |
| /// |
| /// Note that technically, this type doesn't represent our entire stack |
| /// frame. In particular, the Ast visitor represents any state associated with |
| /// traversing the Ast itself. |
| #[derive(Clone, Debug)] |
| enum HirFrame { |
| /// An arbitrary HIR expression. These get pushed whenever we hit a base |
| /// case in the Ast. They get popped after an inductive (i.e., recursive) |
| /// step is complete. |
| Expr(Hir), |
| /// A Unicode character class. This frame is mutated as we descend into |
| /// the Ast of a character class (which is itself its own mini recursive |
| /// structure). |
| ClassUnicode(hir::ClassUnicode), |
| /// A byte-oriented character class. This frame is mutated as we descend |
| /// into the Ast of a character class (which is itself its own mini |
| /// recursive structure). |
| /// |
| /// Byte character classes are created when Unicode mode (`u`) is disabled. |
| /// If `allow_invalid_utf8` is disabled (the default), then a byte |
| /// character is only permitted to match ASCII text. |
| ClassBytes(hir::ClassBytes), |
| /// This is pushed on to the stack upon first seeing any kind of group, |
| /// indicated by parentheses (including non-capturing groups). It is popped |
| /// upon leaving a group. |
| Group { |
| /// The old active flags, if any, when this group was opened. |
| /// |
| /// If this group sets flags, then the new active flags are set to the |
| /// result of merging the old flags with the flags introduced by this |
| /// group. |
| /// |
| /// When this group is popped, the active flags should be restored to |
| /// the flags set here. |
| /// |
| /// The "active" flags correspond to whatever flags are set in the |
| /// Translator. |
| old_flags: Option<Flags>, |
| }, |
| /// This is pushed whenever a concatenation is observed. After visiting |
| /// every sub-expression in the concatenation, the translator's stack is |
| /// popped until it sees a Concat frame. |
| Concat, |
| /// This is pushed whenever an alternation is observed. After visiting |
| /// every sub-expression in the alternation, the translator's stack is |
| /// popped until it sees an Alternation frame. |
| Alternation, |
| } |
| |
| impl HirFrame { |
| /// Assert that the current stack frame is an Hir expression and return it. |
| fn unwrap_expr(self) -> Hir { |
| match self { |
| HirFrame::Expr(expr) => expr, |
| _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self) |
| } |
| } |
| |
| /// Assert that the current stack frame is a Unicode class expression and |
| /// return it. |
| fn unwrap_class_unicode(self) -> hir::ClassUnicode { |
| match self { |
| HirFrame::ClassUnicode(cls) => cls, |
| _ => panic!("tried to unwrap Unicode class \ |
| from HirFrame, got: {:?}", self) |
| } |
| } |
| |
| /// Assert that the current stack frame is a byte class expression and |
| /// return it. |
| fn unwrap_class_bytes(self) -> hir::ClassBytes { |
| match self { |
| HirFrame::ClassBytes(cls) => cls, |
| _ => panic!("tried to unwrap byte class \ |
| from HirFrame, got: {:?}", self) |
| } |
| } |
| |
| /// Assert that the current stack frame is a group indicator and return |
| /// its corresponding flags (the flags that were active at the time the |
| /// group was entered) if they exist. |
| fn unwrap_group(self) -> Option<Flags> { |
| match self { |
| HirFrame::Group { old_flags } => old_flags, |
| _ => panic!("tried to unwrap group from HirFrame, got: {:?}", self) |
| } |
| } |
| } |
| |
| impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { |
| type Output = Hir; |
| type Err = Error; |
| |
| fn finish(self) -> Result<Hir> { |
| if self.trans().stack.borrow().is_empty() { |
| // This can happen if the Ast given consists of a single set of |
| // flags. e.g., `(?i)`. /shrug |
| return Ok(Hir::empty()); |
| } |
| // ... otherwise, we should have exactly one HIR on the stack. |
| assert_eq!(self.trans().stack.borrow().len(), 1); |
| Ok(self.pop().unwrap().unwrap_expr()) |
| } |
| |
| fn visit_pre(&mut self, ast: &Ast) -> Result<()> { |
| match *ast { |
| Ast::Class(ast::Class::Bracketed(_)) => { |
| if self.flags().unicode() { |
| let cls = hir::ClassUnicode::empty(); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let cls = hir::ClassBytes::empty(); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| Ast::Group(ref x) => { |
| let old_flags = x.flags().map(|ast| self.set_flags(ast)); |
| self.push(HirFrame::Group { |
| old_flags: old_flags, |
| }); |
| } |
| Ast::Concat(ref x) if x.asts.is_empty() => {} |
| Ast::Concat(_) => { |
| self.push(HirFrame::Concat); |
| } |
| Ast::Alternation(ref x) if x.asts.is_empty() => {} |
| Ast::Alternation(_) => { |
| self.push(HirFrame::Alternation); |
| } |
| _ => {} |
| } |
| Ok(()) |
| } |
| |
| fn visit_post(&mut self, ast: &Ast) -> Result<()> { |
| match *ast { |
| Ast::Empty(_) => { |
| self.push(HirFrame::Expr(Hir::empty())); |
| } |
| Ast::Flags(ref x) => { |
| self.set_flags(&x.flags); |
| } |
| Ast::Literal(ref x) => { |
| self.push(HirFrame::Expr(try!(self.hir_literal(x)))); |
| } |
| Ast::Dot(span) => { |
| self.push(HirFrame::Expr(try!(self.hir_dot(span)))); |
| } |
| Ast::Assertion(ref x) => { |
| self.push(HirFrame::Expr(try!(self.hir_assertion(x)))); |
| } |
| Ast::Class(ast::Class::Perl(ref x)) => { |
| if self.flags().unicode() { |
| let cls = self.hir_perl_unicode_class(x); |
| let hcls = hir::Class::Unicode(cls); |
| self.push(HirFrame::Expr(Hir::class(hcls))); |
| } else { |
| let cls = self.hir_perl_byte_class(x); |
| let hcls = hir::Class::Bytes(cls); |
| self.push(HirFrame::Expr(Hir::class(hcls))); |
| } |
| } |
| Ast::Class(ast::Class::Unicode(ref x)) => { |
| let cls = hir::Class::Unicode(try!(self.hir_unicode_class(x))); |
| self.push(HirFrame::Expr(Hir::class(cls))); |
| } |
| Ast::Class(ast::Class::Bracketed(ref ast)) => { |
| if self.flags().unicode() { |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| self.unicode_fold_and_negate(ast.negated, &mut cls); |
| if cls.iter().next().is_none() { |
| return Err(self.error( |
| ast.span, ErrorKind::EmptyClassNotAllowed)); |
| } |
| let expr = Hir::class(hir::Class::Unicode(cls)); |
| self.push(HirFrame::Expr(expr)); |
| } else { |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| try!(self.bytes_fold_and_negate( |
| &ast.span, ast.negated, &mut cls)); |
| if cls.iter().next().is_none() { |
| return Err(self.error( |
| ast.span, ErrorKind::EmptyClassNotAllowed)); |
| } |
| |
| let expr = Hir::class(hir::Class::Bytes(cls)); |
| self.push(HirFrame::Expr(expr)); |
| } |
| } |
| Ast::Repetition(ref x) => { |
| let expr = self.pop().unwrap().unwrap_expr(); |
| self.push(HirFrame::Expr(self.hir_repetition(x, expr))); |
| } |
| Ast::Group(ref x) => { |
| let expr = self.pop().unwrap().unwrap_expr(); |
| if let Some(flags) = self.pop().unwrap().unwrap_group() { |
| self.trans().flags.set(flags); |
| } |
| self.push(HirFrame::Expr(self.hir_group(x, expr))); |
| } |
| Ast::Concat(_) => { |
| let mut exprs = vec![]; |
| while let Some(HirFrame::Expr(expr)) = self.pop() { |
| if !expr.kind().is_empty() { |
| exprs.push(expr); |
| } |
| } |
| exprs.reverse(); |
| self.push(HirFrame::Expr(Hir::concat(exprs))); |
| } |
| Ast::Alternation(_) => { |
| let mut exprs = vec![]; |
| while let Some(HirFrame::Expr(expr)) = self.pop() { |
| exprs.push(expr); |
| } |
| exprs.reverse(); |
| self.push(HirFrame::Expr(Hir::alternation(exprs))); |
| } |
| } |
| Ok(()) |
| } |
| |
| fn visit_class_set_item_pre( |
| &mut self, |
| ast: &ast::ClassSetItem, |
| ) -> Result<()> { |
| match *ast { |
| ast::ClassSetItem::Bracketed(_) => { |
| if self.flags().unicode() { |
| let cls = hir::ClassUnicode::empty(); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let cls = hir::ClassBytes::empty(); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| // We needn't handle the Union case here since the visitor will |
| // do it for us. |
| _ => {} |
| } |
| Ok(()) |
| } |
| |
| fn visit_class_set_item_post( |
| &mut self, |
| ast: &ast::ClassSetItem, |
| ) -> Result<()> { |
| match *ast { |
| ast::ClassSetItem::Empty(_) => {} |
| ast::ClassSetItem::Literal(ref x) => { |
| if self.flags().unicode() { |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| let byte = try!(self.class_literal_byte(x)); |
| cls.push(hir::ClassBytesRange::new(byte, byte)); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| ast::ClassSetItem::Range(ref x) => { |
| if self.flags().unicode() { |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| let start = try!(self.class_literal_byte(&x.start)); |
| let end = try!(self.class_literal_byte(&x.end)); |
| cls.push(hir::ClassBytesRange::new(start, end)); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| ast::ClassSetItem::Ascii(ref x) => { |
| if self.flags().unicode() { |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| for &(s, e) in ascii_class(&x.kind) { |
| cls.push(hir::ClassUnicodeRange::new(s, e)); |
| } |
| self.unicode_fold_and_negate(x.negated, &mut cls); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| for &(s, e) in ascii_class(&x.kind) { |
| cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); |
| } |
| try!(self.bytes_fold_and_negate( |
| &x.span, x.negated, &mut cls)); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| ast::ClassSetItem::Unicode(ref x) => { |
| let xcls = try!(self.hir_unicode_class(x)); |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| cls.union(&xcls); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } |
| ast::ClassSetItem::Perl(ref x) => { |
| if self.flags().unicode() { |
| let xcls = self.hir_perl_unicode_class(x); |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| cls.union(&xcls); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let xcls = self.hir_perl_byte_class(x); |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| cls.union(&xcls); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| } |
| ast::ClassSetItem::Bracketed(ref ast) => { |
| if self.flags().unicode() { |
| let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); |
| self.unicode_fold_and_negate(ast.negated, &mut cls1); |
| |
| let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); |
| cls2.union(&cls1); |
| self.push(HirFrame::ClassUnicode(cls2)); |
| } else { |
| let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); |
| try!(self.bytes_fold_and_negate( |
| &ast.span, ast.negated, &mut cls1)); |
| |
| let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); |
| cls2.union(&cls1); |
| self.push(HirFrame::ClassBytes(cls2)); |
| } |
| } |
| // This is handled automatically by the visitor. |
| ast::ClassSetItem::Union(_) => {} |
| } |
| Ok(()) |
| } |
| |
| fn visit_class_set_binary_op_pre( |
| &mut self, |
| _op: &ast::ClassSetBinaryOp, |
| ) -> Result<()> { |
| if self.flags().unicode() { |
| let cls = hir::ClassUnicode::empty(); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let cls = hir::ClassBytes::empty(); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| Ok(()) |
| } |
| |
| fn visit_class_set_binary_op_in( |
| &mut self, |
| _op: &ast::ClassSetBinaryOp, |
| ) -> Result<()> { |
| if self.flags().unicode() { |
| let cls = hir::ClassUnicode::empty(); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let cls = hir::ClassBytes::empty(); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| Ok(()) |
| } |
| |
| fn visit_class_set_binary_op_post( |
| &mut self, |
| op: &ast::ClassSetBinaryOp, |
| ) -> Result<()> { |
| use ast::ClassSetBinaryOpKind::*; |
| |
| if self.flags().unicode() { |
| let mut rhs = self.pop().unwrap().unwrap_class_unicode(); |
| let mut lhs = self.pop().unwrap().unwrap_class_unicode(); |
| let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
| if self.flags().case_insensitive() { |
| rhs.case_fold_simple(); |
| lhs.case_fold_simple(); |
| } |
| match op.kind { |
| Intersection => lhs.intersect(&rhs), |
| Difference => lhs.difference(&rhs), |
| SymmetricDifference => lhs.symmetric_difference(&rhs), |
| } |
| cls.union(&lhs); |
| self.push(HirFrame::ClassUnicode(cls)); |
| } else { |
| let mut rhs = self.pop().unwrap().unwrap_class_bytes(); |
| let mut lhs = self.pop().unwrap().unwrap_class_bytes(); |
| let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
| if self.flags().case_insensitive() { |
| rhs.case_fold_simple(); |
| lhs.case_fold_simple(); |
| } |
| match op.kind { |
| Intersection => lhs.intersect(&rhs), |
| Difference => lhs.difference(&rhs), |
| SymmetricDifference => lhs.symmetric_difference(&rhs), |
| } |
| cls.union(&lhs); |
| self.push(HirFrame::ClassBytes(cls)); |
| } |
| Ok(()) |
| } |
| } |
| |
| /// The internal implementation of a translator. |
| /// |
| /// This type is responsible for carrying around the original pattern string, |
| /// which is not tied to the internal state of a translator. |
| /// |
| /// A TranslatorI exists for the time it takes to translate a single Ast. |
| #[derive(Clone, Debug)] |
| struct TranslatorI<'t, 'p> { |
| trans: &'t Translator, |
| pattern: &'p str, |
| } |
| |
| impl<'t, 'p> TranslatorI<'t, 'p> { |
| /// Build a new internal translator. |
| fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { |
| TranslatorI { trans: trans, pattern: pattern } |
| } |
| |
| /// Return a reference to the underlying translator. |
| fn trans(&self) -> &Translator { |
| &self.trans |
| } |
| |
| /// Push the given frame on to the call stack. |
| fn push(&self, frame: HirFrame) { |
| self.trans().stack.borrow_mut().push(frame); |
| } |
| |
| /// Pop the top of the call stack. If the call stack is empty, return None. |
| fn pop(&self) -> Option<HirFrame> { |
| self.trans().stack.borrow_mut().pop() |
| } |
| |
| /// Create a new error with the given span and error type. |
| fn error(&self, span: Span, kind: ErrorKind) -> Error { |
| Error { kind: kind, pattern: self.pattern.to_string(), span: span } |
| } |
| |
| /// Return a copy of the active flags. |
| fn flags(&self) -> Flags { |
| self.trans().flags.get() |
| } |
| |
| /// Set the flags of this translator from the flags set in the given AST. |
| /// Then, return the old flags. |
| fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { |
| let old_flags = self.flags(); |
| let mut new_flags = Flags::from_ast(ast_flags); |
| new_flags.merge(&old_flags); |
| self.trans().flags.set(new_flags); |
| old_flags |
| } |
| |
| fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> { |
| let ch = match try!(self.literal_to_char(lit)) { |
| byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), |
| hir::Literal::Unicode(ch) => ch, |
| }; |
| if self.flags().case_insensitive() { |
| self.hir_from_char_case_insensitive(lit.span, ch) |
| } else { |
| self.hir_from_char(lit.span, ch) |
| } |
| } |
| |
| /// Convert an Ast literal to its scalar representation. |
| /// |
| /// When Unicode mode is enabled, then this always succeeds and returns a |
| /// `char` (Unicode scalar value). |
| /// |
| /// When Unicode mode is disabled, then a raw byte is returned. If that |
| /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns |
| /// an error. |
| fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> { |
| if self.flags().unicode() { |
| return Ok(hir::Literal::Unicode(lit.c)); |
| } |
| let byte = match lit.byte() { |
| None => return Ok(hir::Literal::Unicode(lit.c)), |
| Some(byte) => byte, |
| }; |
| if byte <= 0x7F { |
| return Ok(hir::Literal::Unicode(byte as char)); |
| } |
| if !self.trans().allow_invalid_utf8 { |
| return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); |
| } |
| Ok(hir::Literal::Byte(byte)) |
| } |
| |
| fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> { |
| if !self.flags().unicode() && c.len_utf8() > 1 { |
| return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
| } |
| Ok(Hir::literal(hir::Literal::Unicode(c))) |
| } |
| |
| fn hir_from_char_case_insensitive( |
| &self, |
| span: Span, |
| c: char, |
| ) -> Result<Hir> { |
| // If case folding won't do anything, then don't bother trying. |
| if !unicode::contains_simple_case_mapping(c, c) { |
| return self.hir_from_char(span, c); |
| } |
| if self.flags().unicode() { |
| let mut cls = hir::ClassUnicode::new(vec![ |
| hir::ClassUnicodeRange::new(c, c), |
| ]); |
| cls.case_fold_simple(); |
| Ok(Hir::class(hir::Class::Unicode(cls))) |
| } else { |
| if c.len_utf8() > 1 { |
| return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
| } |
| let mut cls = hir::ClassBytes::new(vec![ |
| hir::ClassBytesRange::new(c as u8, c as u8), |
| ]); |
| cls.case_fold_simple(); |
| Ok(Hir::class(hir::Class::Bytes(cls))) |
| } |
| } |
| |
| fn hir_dot(&self, span: Span) -> Result<Hir> { |
| let unicode = self.flags().unicode(); |
| if !unicode && !self.trans().allow_invalid_utf8 { |
| return Err(self.error(span, ErrorKind::InvalidUtf8)); |
| } |
| Ok(if self.flags().dot_matches_new_line() { |
| Hir::any(!unicode) |
| } else { |
| Hir::dot(!unicode) |
| }) |
| } |
| |
| fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> { |
| let unicode = self.flags().unicode(); |
| let multi_line = self.flags().multi_line(); |
| Ok(match asst.kind { |
| ast::AssertionKind::StartLine => { |
| Hir::anchor(if multi_line { |
| hir::Anchor::StartLine |
| } else { |
| hir::Anchor::StartText |
| }) |
| } |
| ast::AssertionKind::EndLine => { |
| Hir::anchor(if multi_line { |
| hir::Anchor::EndLine |
| } else { |
| hir::Anchor::EndText |
| }) |
| } |
| ast::AssertionKind::StartText => { |
| Hir::anchor(hir::Anchor::StartText) |
| } |
| ast::AssertionKind::EndText => { |
| Hir::anchor(hir::Anchor::EndText) |
| } |
| ast::AssertionKind::WordBoundary => { |
| Hir::word_boundary(if unicode { |
| hir::WordBoundary::Unicode |
| } else { |
| hir::WordBoundary::Ascii |
| }) |
| } |
| ast::AssertionKind::NotWordBoundary => { |
| Hir::word_boundary(if unicode { |
| hir::WordBoundary::UnicodeNegate |
| } else { |
| // It is possible for negated ASCII word boundaries to |
| // match at invalid UTF-8 boundaries, even when searching |
| // valid UTF-8. |
| // |
| // TODO(ag): Enable this error when regex goes to 1.0. |
| // Otherwise, it is too steep of a breaking change. |
| // if !self.trans().allow_invalid_utf8 { |
| // return Err(self.error( |
| // asst.span, ErrorKind::InvalidUtf8)); |
| // } |
| hir::WordBoundary::AsciiNegate |
| }) |
| } |
| }) |
| } |
| |
| fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { |
| let kind = match group.kind { |
| ast::GroupKind::CaptureIndex(idx) => { |
| hir::GroupKind::CaptureIndex(idx) |
| } |
| ast::GroupKind::CaptureName(ref capname) => { |
| hir::GroupKind::CaptureName { |
| name: capname.name.clone(), |
| index: capname.index, |
| } |
| } |
| ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, |
| }; |
| Hir::group(hir::Group { |
| kind: kind, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { |
| let kind = match rep.op.kind { |
| ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, |
| ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, |
| ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, |
| ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { |
| hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) |
| } |
| ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { |
| hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) |
| } |
| ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(m,n)) => { |
| hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) |
| } |
| }; |
| let greedy = |
| if self.flags().swap_greed() { |
| !rep.greedy |
| } else { |
| rep.greedy |
| }; |
| Hir::repetition(hir::Repetition { |
| kind: kind, |
| greedy: greedy, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_unicode_class( |
| &self, |
| ast_class: &ast::ClassUnicode, |
| ) -> Result<hir::ClassUnicode> { |
| use ast::ClassUnicodeKind::*; |
| |
| if !self.flags().unicode() { |
| return Err(self.error( |
| ast_class.span, |
| ErrorKind::UnicodeNotAllowed, |
| )); |
| } |
| let query = match ast_class.kind { |
| OneLetter(name) => ClassQuery::OneLetter(name), |
| Named(ref name) => ClassQuery::Binary(name), |
| NamedValue { ref name, ref value, .. } => { |
| ClassQuery::ByValue { |
| property_name: name, |
| property_value: value, |
| } |
| } |
| }; |
| match unicode::class(query) { |
| Ok(mut class) => { |
| self.unicode_fold_and_negate(ast_class.negated, &mut class); |
| Ok(class) |
| } |
| Err(unicode::Error::PropertyNotFound) => { |
| Err(self.error( |
| ast_class.span, |
| ErrorKind::UnicodePropertyNotFound, |
| )) |
| } |
| Err(unicode::Error::PropertyValueNotFound) => { |
| Err(self.error( |
| ast_class.span, |
| ErrorKind::UnicodePropertyValueNotFound, |
| )) |
| } |
| } |
| } |
| |
| fn hir_perl_unicode_class( |
| &self, |
| ast_class: &ast::ClassPerl, |
| ) -> hir::ClassUnicode { |
| use ast::ClassPerlKind::*; |
| use unicode_tables::perl_word::PERL_WORD; |
| |
| assert!(self.flags().unicode()); |
| let mut class = match ast_class.kind { |
| Digit => { |
| let query = ClassQuery::Binary("Decimal_Number"); |
| unicode::class(query).unwrap() |
| } |
| Space => { |
| let query = ClassQuery::Binary("Whitespace"); |
| unicode::class(query).unwrap() |
| } |
| Word => unicode::hir_class(PERL_WORD), |
| }; |
| // We needn't apply case folding here because the Perl Unicode classes |
| // are already closed under Unicode simple case folding. |
| if ast_class.negated { |
| class.negate(); |
| } |
| class |
| } |
| |
| fn hir_perl_byte_class( |
| &self, |
| ast_class: &ast::ClassPerl, |
| ) -> hir::ClassBytes { |
| use ast::ClassPerlKind::*; |
| |
| assert!(!self.flags().unicode()); |
| let mut class = match ast_class.kind { |
| Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), |
| Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), |
| Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), |
| }; |
| // We needn't apply case folding here because the Perl ASCII classes |
| // are already closed (under ASCII case folding). |
| if ast_class.negated { |
| class.negate(); |
| } |
| class |
| } |
| |
| fn unicode_fold_and_negate( |
| &self, |
| negated: bool, |
| class: &mut hir::ClassUnicode, |
| ) { |
| // Note that we must apply case folding before negation! |
| // Consider `(?i)[^x]`. If we applied negation field, then |
| // the result would be the character class that matched any |
| // Unicode scalar value. |
| if self.flags().case_insensitive() { |
| class.case_fold_simple(); |
| } |
| if negated { |
| class.negate(); |
| } |
| } |
| |
| fn bytes_fold_and_negate( |
| &self, |
| span: &Span, |
| negated: bool, |
| class: &mut hir::ClassBytes, |
| ) -> Result<()> { |
| // Note that we must apply case folding before negation! |
| // Consider `(?i)[^x]`. If we applied negation field, then |
| // the result would be the character class that matched any |
| // Unicode scalar value. |
| if self.flags().case_insensitive() { |
| class.case_fold_simple(); |
| } |
| if negated { |
| class.negate(); |
| } |
| if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { |
| return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); |
| } |
| Ok(()) |
| } |
| |
| /// Return a scalar byte value suitable for use as a literal in a byte |
| /// character class. |
| fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> { |
| match try!(self.literal_to_char(ast)) { |
| hir::Literal::Byte(byte) => Ok(byte), |
| hir::Literal::Unicode(ch) => { |
| if ch <= 0x7F as char { |
| Ok(ch as u8) |
| } else { |
| // We can't feasibly support Unicode in |
| // byte oriented classes. Byte classes don't |
| // do Unicode case folding. |
| Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) |
| } |
| } |
| } |
| } |
| } |
| |
| /// A translator's representation of a regular expression's flags at any given |
| /// moment in time. |
| /// |
| /// Each flag can be in one of three states: absent, present but disabled or |
| /// present but enabled. |
| #[derive(Clone, Copy, Debug, Default)] |
| struct Flags { |
| case_insensitive: Option<bool>, |
| multi_line: Option<bool>, |
| dot_matches_new_line: Option<bool>, |
| swap_greed: Option<bool>, |
| unicode: Option<bool>, |
| // Note that `ignore_whitespace` is omitted here because it is handled |
| // entirely in the parser. |
| } |
| |
| impl Flags { |
| fn from_ast(ast: &ast::Flags) -> Flags { |
| let mut flags = Flags::default(); |
| let mut enable = true; |
| for item in &ast.items { |
| match item.kind { |
| ast::FlagsItemKind::Negation => { |
| enable = false; |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { |
| flags.case_insensitive = Some(enable); |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { |
| flags.multi_line = Some(enable); |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { |
| flags.dot_matches_new_line = Some(enable); |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { |
| flags.swap_greed = Some(enable); |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { |
| flags.unicode = Some(enable); |
| } |
| ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} |
| } |
| } |
| flags |
| } |
| |
| fn merge(&mut self, previous: &Flags) { |
| if self.case_insensitive.is_none() { |
| self.case_insensitive = previous.case_insensitive; |
| } |
| if self.multi_line.is_none() { |
| self.multi_line = previous.multi_line; |
| } |
| if self.dot_matches_new_line.is_none() { |
| self.dot_matches_new_line = previous.dot_matches_new_line; |
| } |
| if self.swap_greed.is_none() { |
| self.swap_greed = previous.swap_greed; |
| } |
| if self.unicode.is_none() { |
| self.unicode = previous.unicode; |
| } |
| } |
| |
| fn case_insensitive(&self) -> bool { |
| self.case_insensitive.unwrap_or(false) |
| } |
| |
| fn multi_line(&self) -> bool { |
| self.multi_line.unwrap_or(false) |
| } |
| |
| fn dot_matches_new_line(&self) -> bool { |
| self.dot_matches_new_line.unwrap_or(false) |
| } |
| |
| fn swap_greed(&self) -> bool { |
| self.swap_greed.unwrap_or(false) |
| } |
| |
| fn unicode(&self) -> bool { |
| self.unicode.unwrap_or(true) |
| } |
| } |
| |
| fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { |
| let ranges: Vec<_> = ascii_class(kind).iter().cloned().map(|(s, e)| { |
| hir::ClassBytesRange::new(s as u8, e as u8) |
| }).collect(); |
| hir::ClassBytes::new(ranges) |
| } |
| |
| fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { |
| use ast::ClassAsciiKind::*; |
| |
| // TODO: Get rid of these consts, which appear necessary for older |
| // versions of Rust. |
| type T = &'static [(char, char)]; |
| match *kind { |
| Alnum => { |
| const X: T = &[('0', '9'), ('A', 'Z'), ('a', 'z')]; |
| X |
| } |
| Alpha => { |
| const X: T = &[('A', 'Z'), ('a', 'z')]; |
| X |
| } |
| Ascii => { |
| const X: T = &[('\x00', '\x7F')]; |
| X |
| } |
| Blank => { |
| const X: T = &[(' ', '\t')]; |
| X |
| } |
| Cntrl => { |
| const X: T = &[('\x00', '\x1F'), ('\x7F', '\x7F')]; |
| X |
| } |
| Digit => { |
| const X: T = &[('0', '9')]; |
| X |
| } |
| Graph => { |
| const X: T = &[('!', '~')]; |
| X |
| } |
| Lower => { |
| const X: T = &[('a', 'z')]; |
| X |
| } |
| Print => { |
| const X: T = &[(' ', '~')]; |
| X |
| } |
| Punct => { |
| const X: T = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')]; |
| X |
| } |
| Space => { |
| const X: T = &[ |
| ('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), ('\x0C', '\x0C'), |
| ('\r', '\r'), (' ', ' '), |
| ]; |
| X |
| } |
| Upper => { |
| const X: T = &[('A', 'Z')]; |
| X |
| } |
| Word => { |
| const X: T = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')]; |
| X |
| } |
| Xdigit => { |
| const X: T = &[('0', '9'), ('A', 'F'), ('a', 'f')]; |
| X |
| } |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use ast::{self, Ast, Position, Span}; |
| use ast::parse::ParserBuilder; |
| use hir::{self, Hir, HirKind}; |
| use unicode::{self, ClassQuery}; |
| |
| use super::{TranslatorBuilder, ascii_class}; |
| |
| // We create these errors to compare with real hir::Errors in the tests. |
| // We define equality between TestError and hir::Error to disregard the |
| // pattern string in hir::Error, which is annoying to provide in tests. |
| #[derive(Clone, Debug)] |
| struct TestError { |
| span: Span, |
| kind: hir::ErrorKind, |
| } |
| |
| impl PartialEq<hir::Error> for TestError { |
| fn eq(&self, other: &hir::Error) -> bool { |
| self.span == other.span && self.kind == other.kind |
| } |
| } |
| |
| impl PartialEq<TestError> for hir::Error { |
| fn eq(&self, other: &TestError) -> bool { |
| self.span == other.span && self.kind == other.kind |
| } |
| } |
| |
| fn parse(pattern: &str) -> Ast { |
| ParserBuilder::new().octal(true).build().parse(pattern).unwrap() |
| } |
| |
| fn t(pattern: &str) -> Hir { |
| TranslatorBuilder::new() |
| .allow_invalid_utf8(false) |
| .build() |
| .translate(pattern, &parse(pattern)) |
| .unwrap() |
| } |
| |
| fn t_err(pattern: &str) -> hir::Error { |
| TranslatorBuilder::new() |
| .allow_invalid_utf8(false) |
| .build() |
| .translate(pattern, &parse(pattern)) |
| .unwrap_err() |
| } |
| |
| fn t_bytes(pattern: &str) -> Hir { |
| TranslatorBuilder::new() |
| .allow_invalid_utf8(true) |
| .build() |
| .translate(pattern, &parse(pattern)) |
| .unwrap() |
| } |
| |
| fn hir_lit(s: &str) -> Hir { |
| match s.len() { |
| 0 => Hir::empty(), |
| _ => { |
| let lits = s |
| .chars() |
| .map(hir::Literal::Unicode) |
| .map(Hir::literal) |
| .collect(); |
| Hir::concat(lits) |
| } |
| } |
| } |
| |
| fn hir_blit(s: &[u8]) -> Hir { |
| match s.len() { |
| 0 => Hir::empty(), |
| 1 => Hir::literal(hir::Literal::Byte(s[0])), |
| _ => { |
| let lits = s |
| .iter() |
| .cloned() |
| .map(hir::Literal::Byte) |
| .map(Hir::literal) |
| .collect(); |
| Hir::concat(lits) |
| } |
| } |
| } |
| |
| fn hir_group(i: u32, expr: Hir) -> Hir { |
| Hir::group(hir::Group { |
| kind: hir::GroupKind::CaptureIndex(i), |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { |
| Hir::group(hir::Group { |
| kind: hir::GroupKind::CaptureName { |
| name: name.to_string(), |
| index: i, |
| }, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_group_nocap(expr: Hir) -> Hir { |
| Hir::group(hir::Group { |
| kind: hir::GroupKind::NonCapturing, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_quest(greedy: bool, expr: Hir) -> Hir { |
| Hir::repetition(hir::Repetition { |
| kind: hir::RepetitionKind::ZeroOrOne, |
| greedy: greedy, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_star(greedy: bool, expr: Hir) -> Hir { |
| Hir::repetition(hir::Repetition { |
| kind: hir::RepetitionKind::ZeroOrMore, |
| greedy: greedy, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_plus(greedy: bool, expr: Hir) -> Hir { |
| Hir::repetition(hir::Repetition { |
| kind: hir::RepetitionKind::OneOrMore, |
| greedy: greedy, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { |
| Hir::repetition(hir::Repetition { |
| kind: hir::RepetitionKind::Range(range), |
| greedy: greedy, |
| hir: Box::new(expr), |
| }) |
| } |
| |
| fn hir_alt(alts: Vec<Hir>) -> Hir { |
| Hir::alternation(alts) |
| } |
| |
| fn hir_cat(exprs: Vec<Hir>) -> Hir { |
| Hir::concat(exprs) |
| } |
| |
| fn hir_uclass_query(query: ClassQuery) -> Hir { |
| Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) |
| } |
| |
| fn hir_uclass_perl_word() -> Hir { |
| use unicode_tables::perl_word::PERL_WORD; |
| Hir::class(hir::Class::Unicode(unicode::hir_class(PERL_WORD))) |
| } |
| |
| fn hir_uclass(ranges: &[(char, char)]) -> Hir { |
| let ranges: Vec<hir::ClassUnicodeRange> = ranges |
| .iter() |
| .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) |
| .collect(); |
| Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) |
| } |
| |
| fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { |
| let ranges: Vec<hir::ClassBytesRange> = ranges |
| .iter() |
| .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) |
| .collect(); |
| Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
| } |
| |
| fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { |
| let ranges: Vec<hir::ClassBytesRange> = ranges |
| .iter() |
| .map(|&(s, e)| { |
| assert!(s as u32 <= 0x7F); |
| assert!(e as u32 <= 0x7F); |
| hir::ClassBytesRange::new(s as u8, e as u8) |
| }) |
| .collect(); |
| Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
| } |
| |
| fn hir_case_fold(expr: Hir) -> Hir { |
| match expr.into_kind() { |
| HirKind::Class(mut cls) => { |
| cls.case_fold_simple(); |
| Hir::class(cls) |
| } |
| _ => panic!("cannot case fold non-class Hir expr"), |
| } |
| } |
| |
| fn hir_negate(expr: Hir) -> Hir { |
| match expr.into_kind() { |
| HirKind::Class(mut cls) => { |
| cls.negate(); |
| Hir::class(cls) |
| } |
| _ => panic!("cannot negate non-class Hir expr"), |
| } |
| } |
| |
| fn hir_union(expr1: Hir, expr2: Hir) -> Hir { |
| use hir::Class::{Bytes, Unicode}; |
| |
| match (expr1.into_kind(), expr2.into_kind()) { |
| ( |
| HirKind::Class(Unicode(mut c1)), |
| HirKind::Class(Unicode(c2)), |
| ) => { |
| c1.union(&c2); |
| Hir::class(hir::Class::Unicode(c1)) |
| } |
| ( |
| HirKind::Class(Bytes(mut c1)), |
| HirKind::Class(Bytes(c2)), |
| ) => { |
| c1.union(&c2); |
| Hir::class(hir::Class::Bytes(c1)) |
| } |
| _ => panic!("cannot union non-class Hir exprs"), |
| } |
| } |
| |
| fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { |
| use hir::Class::{Bytes, Unicode}; |
| |
| match (expr1.into_kind(), expr2.into_kind()) { |
| ( |
| HirKind::Class(Unicode(mut c1)), |
| HirKind::Class(Unicode(c2)), |
| ) => { |
| c1.difference(&c2); |
| Hir::class(hir::Class::Unicode(c1)) |
| } |
| ( |
| HirKind::Class(Bytes(mut c1)), |
| HirKind::Class(Bytes(c2)), |
| ) => { |
| c1.difference(&c2); |
| Hir::class(hir::Class::Bytes(c1)) |
| } |
| _ => panic!("cannot difference non-class Hir exprs"), |
| } |
| } |
| |
| fn hir_anchor(anchor: hir::Anchor) -> Hir { |
| Hir::anchor(anchor) |
| } |
| |
| fn hir_word(wb: hir::WordBoundary) -> Hir { |
| Hir::word_boundary(wb) |
| } |
| |
| #[test] |
| fn empty() { |
| assert_eq!(t(""), Hir::empty()); |
| assert_eq!(t("(?i)"), Hir::empty()); |
| assert_eq!(t("()"), hir_group(1, Hir::empty())); |
| assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); |
| assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty())); |
| assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); |
| assert_eq!(t("()|()"), hir_alt(vec![ |
| hir_group(1, Hir::empty()), |
| hir_group(2, Hir::empty()), |
| ])); |
| assert_eq!(t("(|b)"), hir_group(1, hir_alt(vec![ |
| Hir::empty(), |
| hir_lit("b"), |
| ]))); |
| assert_eq!(t("(a|)"), hir_group(1, hir_alt(vec![ |
| hir_lit("a"), |
| Hir::empty(), |
| ]))); |
| assert_eq!(t("(a||c)"), hir_group(1, hir_alt(vec![ |
| hir_lit("a"), |
| Hir::empty(), |
| hir_lit("c"), |
| ]))); |
| assert_eq!(t("(||)"), hir_group(1, hir_alt(vec![ |
| Hir::empty(), |
| Hir::empty(), |
| Hir::empty(), |
| ]))); |
| } |
| |
| #[test] |
| fn literal() { |
| assert_eq!(t("a"), hir_lit("a")); |
| assert_eq!(t("(?-u)a"), hir_lit("a")); |
| assert_eq!(t("☃"), hir_lit("☃")); |
| assert_eq!(t("abcd"), hir_lit("abcd")); |
| |
| assert_eq!(t_bytes("(?-u)a"), hir_lit("a")); |
| assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a")); |
| assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); |
| assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); |
| |
| assert_eq!(t_err("(?-u)☃"), TestError { |
| kind: hir::ErrorKind::UnicodeNotAllowed, |
| span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 7)), |
| }); |
| assert_eq!(t_err(r"(?-u)\xFF"), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)), |
| }); |
| } |
| |
| #[test] |
| fn literal_case_insensitive() { |
| assert_eq!(t("(?i)a"), hir_uclass(&[ |
| ('A', 'A'), ('a', 'a'), |
| ])); |
| assert_eq!(t("(?i:a)"), hir_group_nocap(hir_uclass(&[ |
| ('A', 'A'), ('a', 'a')], |
| ))); |
| assert_eq!(t("a(?i)a(?-i)a"), hir_cat(vec![ |
| hir_lit("a"), |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_lit("a"), |
| ])); |
| assert_eq!(t("(?i)ab@c"), hir_cat(vec![ |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_uclass(&[('B', 'B'), ('b', 'b')]), |
| hir_lit("@"), |
| hir_uclass(&[('C', 'C'), ('c', 'c')]), |
| ])); |
| assert_eq!(t("(?i)β"), hir_uclass(&[ |
| ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'), |
| ])); |
| |
| assert_eq!(t("(?i-u)a"), hir_bclass(&[ |
| (b'A', b'A'), (b'a', b'a'), |
| ])); |
| assert_eq!(t("(?-u)a(?i)a(?-i)a"), hir_cat(vec![ |
| hir_lit("a"), |
| hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), |
| hir_lit("a"), |
| ])); |
| assert_eq!(t("(?i-u)ab@c"), hir_cat(vec![ |
| hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), |
| hir_bclass(&[(b'B', b'B'), (b'b', b'b')]), |
| hir_lit("@"), |
| hir_bclass(&[(b'C', b'C'), (b'c', b'c')]), |
| ])); |
| |
| assert_eq!(t_bytes("(?i-u)a"), hir_bclass(&[ |
| (b'A', b'A'), (b'a', b'a'), |
| ])); |
| assert_eq!(t_bytes("(?i-u)\x61"), hir_bclass(&[ |
| (b'A', b'A'), (b'a', b'a'), |
| ])); |
| assert_eq!(t_bytes(r"(?i-u)\x61"), hir_bclass(&[ |
| (b'A', b'A'), (b'a', b'a'), |
| ])); |
| assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); |
| |
| assert_eq!(t_err("(?i-u)β"), TestError { |
| kind: hir::ErrorKind::UnicodeNotAllowed, |
| span: Span::new( |
| Position::new(6, 1, 7), |
| Position::new(8, 1, 8), |
| ), |
| }); |
| } |
| |
| #[test] |
| fn dot() { |
| assert_eq!(t("."), hir_uclass(&[ |
| ('\0', '\t'), |
| ('\x0B', '\u{10FFFF}'), |
| ])); |
| assert_eq!(t("(?s)."), hir_uclass(&[ |
| ('\0', '\u{10FFFF}'), |
| ])); |
| assert_eq!(t_bytes("(?-u)."), hir_bclass(&[ |
| (b'\0', b'\t'), |
| (b'\x0B', b'\xFF'), |
| ])); |
| assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[ |
| (b'\0', b'\xFF'), |
| ])); |
| |
| // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. |
| assert_eq!(t_err("(?-u)."), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(5, 1, 6), Position::new(6, 1, 7)), |
| }); |
| assert_eq!(t_err("(?s-u)."), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(6, 1, 7), Position::new(7, 1, 8)), |
| }); |
| } |
| |
| #[test] |
| fn assertions() { |
| assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText)); |
| assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText)); |
| assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText)); |
| assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText)); |
| assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine)); |
| assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine)); |
| assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText)); |
| assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText)); |
| |
| assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode)); |
| assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate)); |
| assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii)); |
| assert_eq!( |
| t_bytes(r"(?-u)\B"), |
| hir_word(hir::WordBoundary::AsciiNegate)); |
| |
| // TODO(ag): Enable this tests when regex goes to 1.0. |
| // assert_eq!(t_err(r"(?-u)\B"), TestError { |
| // kind: hir::ErrorKind::InvalidUtf8, |
| // span: Span::new(Position::new(5, 1, 6), Position::new(7, 1, 8)), |
| // }); |
| } |
| |
| #[test] |
| fn group() { |
| assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); |
| assert_eq!(t("(a)(b)"), hir_cat(vec![ |
| hir_group(1, hir_lit("a")), |
| hir_group(2, hir_lit("b")), |
| ])); |
| assert_eq!(t("(a)|(b)"), hir_alt(vec![ |
| hir_group(1, hir_lit("a")), |
| hir_group(2, hir_lit("b")), |
| ])); |
| assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty())); |
| assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a"))); |
| assert_eq!(t("(?P<foo>a)(?P<bar>b)"), hir_cat(vec![ |
| hir_group_name(1, "foo", hir_lit("a")), |
| hir_group_name(2, "bar", hir_lit("b")), |
| ])); |
| assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); |
| assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a"))); |
| assert_eq!(t("(?:a)(b)"), hir_cat(vec![ |
| hir_group_nocap(hir_lit("a")), |
| hir_group(1, hir_lit("b")), |
| ])); |
| assert_eq!(t("(a)(?:b)(c)"), hir_cat(vec![ |
| hir_group(1, hir_lit("a")), |
| hir_group_nocap(hir_lit("b")), |
| hir_group(2, hir_lit("c")), |
| ])); |
| assert_eq!(t("(a)(?P<foo>b)(c)"), hir_cat(vec![ |
| hir_group(1, hir_lit("a")), |
| hir_group_name(2, "foo", hir_lit("b")), |
| hir_group(3, hir_lit("c")), |
| ])); |
| } |
| |
| #[test] |
| fn flags() { |
| assert_eq!(t("(?i:a)a"), hir_cat(vec![ |
| hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])), |
| hir_lit("a"), |
| ])); |
| assert_eq!(t("(?i-u:a)β"), hir_cat(vec![ |
| hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), |
| hir_lit("β"), |
| ])); |
| assert_eq!(t("(?i)(?-i:a)a"), hir_cat(vec![ |
| hir_group_nocap(hir_lit("a")), |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| ])); |
| assert_eq!(t("(?im)a^"), hir_cat(vec![ |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_anchor(hir::Anchor::StartLine), |
| ])); |
| assert_eq!(t("(?im)a^(?i-m)a^"), hir_cat(vec![ |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_anchor(hir::Anchor::StartLine), |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_anchor(hir::Anchor::StartText), |
| ])); |
| assert_eq!(t("(?U)a*a*?(?-U)a*a*?"), hir_cat(vec![ |
| hir_star(false, hir_lit("a")), |
| hir_star(true, hir_lit("a")), |
| hir_star(true, hir_lit("a")), |
| hir_star(false, hir_lit("a")), |
| ])); |
| assert_eq!(t("(?:a(?i)a)a"), hir_cat(vec![ |
| hir_group_nocap(hir_cat(vec![ |
| hir_lit("a"), |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| ])), |
| hir_lit("a"), |
| ])); |
| assert_eq!(t("(?i)(?:a(?-i)a)a"), hir_cat(vec![ |
| hir_group_nocap(hir_cat(vec![ |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| hir_lit("a"), |
| ])), |
| hir_uclass(&[('A', 'A'), ('a', 'a')]), |
| ])); |
| } |
| |
| #[test] |
| fn escape() { |
| assert_eq!( |
| t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), |
| hir_lit(r"\.+*?()|[]{}^$#"), |
| ); |
| } |
| |
| #[test] |
| fn repetition() { |
| assert_eq!(t("a?"), hir_quest(true, hir_lit("a"))); |
| assert_eq!(t("a*"), hir_star(true, hir_lit("a"))); |
| assert_eq!(t("a+"), hir_plus(true, hir_lit("a"))); |
| assert_eq!(t("a??"), hir_quest(false, hir_lit("a"))); |
| assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); |
| assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); |
| |
| assert_eq!( |
| t("a{1}"), |
| hir_range( |
| true, |
| hir::RepetitionRange::Exactly(1), |
| hir_lit("a"), |
| )); |
| assert_eq!( |
| t("a{1,}"), |
| hir_range( |
| true, |
| hir::RepetitionRange::AtLeast(1), |
| hir_lit("a"), |
| )); |
| assert_eq!( |
| t("a{1,2}"), |
| hir_range( |
| true, |
| hir::RepetitionRange::Bounded(1, 2), |
| hir_lit("a"), |
| )); |
| assert_eq!( |
| t("a{1}?"), |
| hir_range( |
| false, |
| hir::RepetitionRange::Exactly(1), |
| hir_lit("a"), |
| )); |
| assert_eq!( |
| t("a{1,}?"), |
| hir_range( |
| false, |
| hir::RepetitionRange::AtLeast(1), |
| hir_lit("a"), |
| )); |
| assert_eq!( |
| t("a{1,2}?"), |
| hir_range( |
| false, |
| hir::RepetitionRange::Bounded(1, 2), |
| hir_lit("a"), |
| )); |
| |
| assert_eq!(t("ab?"), hir_cat(vec![ |
| hir_lit("a"), |
| hir_quest(true, hir_lit("b")), |
| ])); |
| assert_eq!(t("(ab)?"), hir_quest(true, hir_group(1, hir_cat(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| ])))); |
| assert_eq!(t("a|b?"), hir_alt(vec![ |
| hir_lit("a"), |
| hir_quest(true, hir_lit("b")), |
| ])); |
| } |
| |
| #[test] |
| fn cat_alt() { |
| assert_eq!(t("(ab)"), hir_group(1, hir_cat(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| ]))); |
| assert_eq!(t("a|b"), hir_alt(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| ])); |
| assert_eq!(t("a|b|c"), hir_alt(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| hir_lit("c"), |
| ])); |
| assert_eq!(t("ab|bc|cd"), hir_alt(vec![ |
| hir_lit("ab"), |
| hir_lit("bc"), |
| hir_lit("cd"), |
| ])); |
| assert_eq!(t("(a|b)"), hir_group(1, hir_alt(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| ]))); |
| assert_eq!(t("(a|b|c)"), hir_group(1, hir_alt(vec![ |
| hir_lit("a"), |
| hir_lit("b"), |
| hir_lit("c"), |
| ]))); |
| assert_eq!(t("(ab|bc|cd)"), hir_group(1, hir_alt(vec![ |
| hir_lit("ab"), |
| hir_lit("bc"), |
| hir_lit("cd"), |
| ]))); |
| assert_eq!(t("(ab|(bc|(cd)))"), hir_group(1, hir_alt(vec![ |
| hir_lit("ab"), |
| hir_group(2, hir_alt(vec![ |
| hir_lit("bc"), |
| hir_group(3, hir_lit("cd")), |
| ])), |
| ]))); |
| } |
| |
| #[test] |
| fn class_ascii() { |
| assert_eq!( |
| t("[[:alnum:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))); |
| assert_eq!( |
| t("[[:alpha:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))); |
| assert_eq!( |
| t("[[:ascii:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))); |
| assert_eq!( |
| t("[[:blank:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))); |
| assert_eq!( |
| t("[[:cntrl:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))); |
| assert_eq!( |
| t("[[:digit:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))); |
| assert_eq!( |
| t("[[:graph:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))); |
| assert_eq!( |
| t("[[:lower:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))); |
| assert_eq!( |
| t("[[:print:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))); |
| assert_eq!( |
| t("[[:punct:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))); |
| assert_eq!( |
| t("[[:space:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))); |
| assert_eq!( |
| t("[[:upper:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))); |
| assert_eq!( |
| t("[[:word:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))); |
| assert_eq!( |
| t("[[:xdigit:]]"), |
| hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))); |
| |
| assert_eq!( |
| t("[[:^lower:]]"), |
| hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))); |
| assert_eq!( |
| t("(?i)[[:lower:]]"), |
| hir_uclass(&[ |
| ('A', 'Z'), ('a', 'z'), |
| ('\u{17F}', '\u{17F}'), |
| ('\u{212A}', '\u{212A}'), |
| ])); |
| |
| assert_eq!( |
| t("(?-u)[[:lower:]]"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))); |
| assert_eq!( |
| t("(?i-u)[[:lower:]]"), |
| hir_case_fold(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Lower)))); |
| |
| assert_eq!(t_err("(?-u)[[:^lower:]]"), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(6, 1, 7), Position::new(16, 1, 17)), |
| }); |
| assert_eq!(t_err("(?i-u)[[:^lower:]]"), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(7, 1, 8), Position::new(17, 1, 18)), |
| }); |
| } |
| |
| #[test] |
| fn class_perl() { |
| // Unicode |
| assert_eq!( |
| t(r"\d"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"\s"), |
| hir_uclass_query(ClassQuery::Binary("space"))); |
| assert_eq!( |
| t(r"\w"), |
| hir_uclass_perl_word()); |
| assert_eq!( |
| t(r"(?i)\d"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"(?i)\s"), |
| hir_uclass_query(ClassQuery::Binary("space"))); |
| assert_eq!( |
| t(r"(?i)\w"), |
| hir_uclass_perl_word()); |
| |
| // Unicode, negated |
| assert_eq!( |
| t(r"\D"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))); |
| assert_eq!( |
| t(r"\S"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("space")))); |
| assert_eq!( |
| t(r"\W"), |
| hir_negate(hir_uclass_perl_word())); |
| assert_eq!( |
| t(r"(?i)\D"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))); |
| assert_eq!( |
| t(r"(?i)\S"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("space")))); |
| assert_eq!( |
| t(r"(?i)\W"), |
| hir_negate(hir_uclass_perl_word())); |
| |
| // ASCII only |
| assert_eq!( |
| t(r"(?-u)\d"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))); |
| assert_eq!( |
| t(r"(?-u)\s"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))); |
| assert_eq!( |
| t(r"(?-u)\w"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))); |
| assert_eq!( |
| t(r"(?i-u)\d"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))); |
| assert_eq!( |
| t(r"(?i-u)\s"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))); |
| assert_eq!( |
| t(r"(?i-u)\w"), |
| hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))); |
| |
| // ASCII only, negated |
| assert_eq!( |
| t(r"(?-u)\D"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Digit)))); |
| assert_eq!( |
| t(r"(?-u)\S"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Space)))); |
| assert_eq!( |
| t(r"(?-u)\W"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Word)))); |
| assert_eq!( |
| t(r"(?i-u)\D"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Digit)))); |
| assert_eq!( |
| t(r"(?i-u)\S"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Space)))); |
| assert_eq!( |
| t(r"(?i-u)\W"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Word)))); |
| } |
| |
| #[test] |
| fn class_unicode() { |
| assert_eq!( |
| t(r"\pZ"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\pz"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\p{Separator}"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\p{se PaRa ToR}"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\p{gc:Separator}"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\p{gc=Separator}"), |
| hir_uclass_query(ClassQuery::Binary("Z"))); |
| assert_eq!( |
| t(r"\p{Other}"), |
| hir_uclass_query(ClassQuery::Binary("Other"))); |
| assert_eq!( |
| t(r"\pC"), |
| hir_uclass_query(ClassQuery::Binary("Other"))); |
| |
| assert_eq!( |
| t(r"\PZ"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))); |
| assert_eq!( |
| t(r"\P{separator}"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))); |
| assert_eq!( |
| t(r"\P{gc!=separator}"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))); |
| |
| assert_eq!( |
| t(r"\p{Greek}"), |
| hir_uclass_query(ClassQuery::Binary("Greek"))); |
| assert_eq!( |
| t(r"(?i)\p{Greek}"), |
| hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))); |
| assert_eq!( |
| t(r"(?i)\P{Greek}"), |
| hir_negate(hir_case_fold(hir_uclass_query( |
| ClassQuery::Binary("Greek"))))); |
| |
| assert_eq!( |
| t(r"\p{any}"), |
| hir_uclass_query(ClassQuery::Binary("Any"))); |
| assert_eq!( |
| t(r"\p{assigned}"), |
| hir_uclass_query(ClassQuery::Binary("Assigned"))); |
| assert_eq!( |
| t(r"\p{ascii}"), |
| hir_uclass_query(ClassQuery::Binary("ASCII"))); |
| assert_eq!( |
| t(r"\p{gc:any}"), |
| hir_uclass_query(ClassQuery::Binary("Any"))); |
| assert_eq!( |
| t(r"\p{gc:assigned}"), |
| hir_uclass_query(ClassQuery::Binary("Assigned"))); |
| assert_eq!( |
| t(r"\p{gc:ascii}"), |
| hir_uclass_query(ClassQuery::Binary("ASCII"))); |
| |
| assert_eq!(t_err(r"(?-u)\pZ"), TestError { |
| kind: hir::ErrorKind::UnicodeNotAllowed, |
| span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 9)), |
| }); |
| assert_eq!(t_err(r"(?-u)\p{Separator}"), TestError { |
| kind: hir::ErrorKind::UnicodeNotAllowed, |
| span: Span::new(Position::new(5, 1, 6), Position::new(18, 1, 19)), |
| }); |
| assert_eq!(t_err(r"\pE"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(3, 1, 4)), |
| }); |
| assert_eq!(t_err(r"\p{Foo}"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)), |
| }); |
| assert_eq!(t_err(r"\p{gc:Foo}"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)), |
| }); |
| assert_eq!(t_err(r"\p{sc:Foo}"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)), |
| }); |
| assert_eq!(t_err(r"\p{scx:Foo}"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)), |
| }); |
| assert_eq!(t_err(r"\p{age:Foo}"), TestError { |
| kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
| span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)), |
| }); |
| } |
| |
| #[test] |
| fn class_bracketed() { |
| assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')])); |
| assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')]))); |
| assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); |
| assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); |
| assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); |
| assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')])); |
| assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')])); |
| assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')])); |
| assert_eq!( |
| t(r"[\d]"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"[\pZ]"), |
| hir_uclass_query(ClassQuery::Binary("separator"))); |
| assert_eq!( |
| t(r"[\p{separator}]"), |
| hir_uclass_query(ClassQuery::Binary("separator"))); |
| assert_eq!( |
| t(r"[^\D]"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"[^\PZ]"), |
| hir_uclass_query(ClassQuery::Binary("separator"))); |
| assert_eq!( |
| t(r"[^\P{separator}]"), |
| hir_uclass_query(ClassQuery::Binary("separator"))); |
| assert_eq!( |
| t(r"(?i)[^\D]"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"(?i)[^\P{greek}]"), |
| hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))); |
| |
| assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')])); |
| assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')])); |
| assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')])); |
| |
| assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')])); |
| assert_eq!(t("(?i)[k]"), hir_uclass(&[ |
| ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'), |
| ])); |
| assert_eq!(t("(?i)[β]"), hir_uclass(&[ |
| ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'), |
| ])); |
| assert_eq!(t("(?i-u)[k]"), hir_bclass(&[ |
| (b'K', b'K'), (b'k', b'k'), |
| ])); |
| |
| assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')]))); |
| assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')]))); |
| assert_eq!( |
| t_bytes("(?-u)[^a]"), |
| hir_negate(hir_bclass(&[(b'a', b'a')]))); |
| assert_eq!( |
| t(r"[^\d]"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))); |
| assert_eq!( |
| t(r"[^\pZ]"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))); |
| assert_eq!( |
| t(r"[^\p{separator}]"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))); |
| assert_eq!( |
| t(r"(?i)[^\p{greek}]"), |
| hir_negate(hir_case_fold(hir_uclass_query( |
| ClassQuery::Binary("greek"))))); |
| assert_eq!( |
| t(r"(?i)[\P{greek}]"), |
| hir_negate(hir_case_fold(hir_uclass_query( |
| ClassQuery::Binary("greek"))))); |
| |
| // Test some weird cases. |
| assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')])); |
| |
| assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')])); |
| assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')])); |
| assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')])); |
| assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')])); |
| assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')])); |
| |
| assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')])); |
| assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')])); |
| assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')])); |
| assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')])); |
| assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')])); |
| |
| assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')])); |
| assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')])); |
| assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')])); |
| assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')])); |
| assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')])); |
| |
| assert_eq!(t_err("(?-u)[^a]"), TestError { |
| kind: hir::ErrorKind::InvalidUtf8, |
| span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)), |
| }); |
| assert_eq!(t_err(r"[^\s\S]"), TestError { |
| kind: hir::ErrorKind::EmptyClassNotAllowed, |
| span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)), |
| }); |
| assert_eq!(t_err(r"(?-u)[^\s\S]"), TestError { |
| kind: hir::ErrorKind::EmptyClassNotAllowed, |
| span: Span::new(Position::new(5, 1, 6), Position::new(12, 1, 13)), |
| }); |
| } |
| |
| #[test] |
| fn class_bracketed_union() { |
| assert_eq!( |
| t("[a-zA-Z]"), |
| hir_uclass(&[('A', 'Z'), ('a', 'z')])); |
| assert_eq!( |
| t(r"[a\pZb]"), |
| hir_union( |
| hir_uclass(&[('a', 'b')]), |
| hir_uclass_query(ClassQuery::Binary("separator")))); |
| assert_eq!( |
| t(r"[\pZ\p{Greek}]"), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator")))); |
| assert_eq!( |
| t(r"[\p{age:3.0}\pZ\p{Greek}]"), |
| hir_union( |
| hir_uclass_query(ClassQuery::ByValue { |
| property_name: "age", |
| property_value: "3.0", |
| }), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator"))))); |
| assert_eq!( |
| t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"), |
| hir_union( |
| hir_uclass_query(ClassQuery::ByValue { |
| property_name: "age", |
| property_value: "3.0", |
| }), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("cyrillic")), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator")))))); |
| |
| assert_eq!( |
| t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"), |
| hir_case_fold(hir_union( |
| hir_uclass_query(ClassQuery::ByValue { |
| property_name: "age", |
| property_value: "3.0", |
| }), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator")))))); |
| assert_eq!( |
| t(r"[^\p{age:3.0}\pZ\p{Greek}]"), |
| hir_negate(hir_union( |
| hir_uclass_query(ClassQuery::ByValue { |
| property_name: "age", |
| property_value: "3.0", |
| }), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator")))))); |
| assert_eq!( |
| t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"), |
| hir_negate(hir_case_fold(hir_union( |
| hir_uclass_query(ClassQuery::ByValue { |
| property_name: "age", |
| property_value: "3.0", |
| }), |
| hir_union( |
| hir_uclass_query(ClassQuery::Binary("greek")), |
| hir_uclass_query(ClassQuery::Binary("separator"))))))); |
| } |
| |
| #[test] |
| fn class_bracketed_nested() { |
| assert_eq!( |
| t(r"[a[^c]]"), |
| hir_negate(hir_uclass(&[('c', 'c')]))); |
| assert_eq!( |
| t(r"[a-b[^c]]"), |
| hir_negate(hir_uclass(&[('c', 'c')]))); |
| assert_eq!( |
| t(r"[a-c[^c]]"), |
| hir_negate(hir_uclass(&[]))); |
| |
| assert_eq!( |
| t(r"[^a[^c]]"), |
| hir_uclass(&[('c', 'c')])); |
| assert_eq!( |
| t(r"[^a-b[^c]]"), |
| hir_uclass(&[('c', 'c')])); |
| |
| assert_eq!( |
| t(r"(?i)[a[^c]]"), |
| hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))); |
| assert_eq!( |
| t(r"(?i)[a-b[^c]]"), |
| hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))); |
| |
| assert_eq!( |
| t(r"(?i)[^a[^c]]"), |
| hir_uclass(&[('C', 'C'), ('c', 'c')])); |
| assert_eq!( |
| t(r"(?i)[^a-b[^c]]"), |
| hir_uclass(&[('C', 'C'), ('c', 'c')])); |
| |
| assert_eq!(t_err(r"[^a-c[^c]]"), TestError { |
| kind: hir::ErrorKind::EmptyClassNotAllowed, |
| span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)), |
| }); |
| assert_eq!(t_err(r"(?i)[^a-c[^c]]"), TestError { |
| kind: hir::ErrorKind::EmptyClassNotAllowed, |
| span: Span::new(Position::new(4, 1, 5), Position::new(14, 1, 15)), |
| }); |
| } |
| |
| #[test] |
| fn class_bracketed_intersect() { |
| assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')])); |
| assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')])); |
| assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')])); |
| assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')])); |
| assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')])); |
| assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')])); |
| assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')])); |
| assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')])); |
| assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); |
| |
| assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')])); |
| assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); |
| assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); |
| assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')])); |
| assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')])); |
| assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')])); |
| |
| assert_eq!( |
| t("(?i)[abc&&b-c]"), |
| hir_case_fold(hir_uclass(&[('b', 'c')]))); |
| assert_eq!( |
| t("(?i)[abc&&[b-c]]"), |
| hir_case_fold(hir_uclass(&[('b', 'c')]))); |
| assert_eq!( |
| t("(?i)[[abc]&&[b-c]]"), |
| hir_case_fold(hir_uclass(&[('b', 'c')]))); |
| assert_eq!( |
| t("(?i)[a-z&&b-y&&c-x]"), |
| hir_case_fold(hir_uclass(&[('c', 'x')]))); |
| assert_eq!( |
| t("(?i)[c-da-b&&a-d]"), |
| hir_case_fold(hir_uclass(&[('a', 'd')]))); |
| assert_eq!( |
| t("(?i)[a-d&&c-da-b]"), |
| hir_case_fold(hir_uclass(&[('a', 'd')]))); |
| |
| assert_eq!( |
| t("(?i-u)[abc&&b-c]"), |
| hir_case_fold(hir_bclass(&[(b'b', b'c')]))); |
| assert_eq!( |
| t("(?i-u)[abc&&[b-c]]"), |
| hir_case_fold(hir_bclass(&[(b'b', b'c')]))); |
| assert_eq!( |
| t("(?i-u)[[abc]&&[b-c]]"), |
| hir_case_fold(hir_bclass(&[(b'b', b'c')]))); |
| assert_eq!( |
| t("(?i-u)[a-z&&b-y&&c-x]"), |
| hir_case_fold(hir_bclass(&[(b'c', b'x')]))); |
| assert_eq!( |
| t("(?i-u)[c-da-b&&a-d]"), |
| hir_case_fold(hir_bclass(&[(b'a', b'd')]))); |
| assert_eq!( |
| t("(?i-u)[a-d&&c-da-b]"), |
| hir_case_fold(hir_bclass(&[(b'a', b'd')]))); |
| |
| // In `[a^]`, `^` does not need to be escaped, so it makes sense that |
| // `^` is also allowed to be unescaped after `&&`. |
| assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')])); |
| // `]` needs to be escaped after `&&` since it's not at start of class. |
| assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')])); |
| assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')])); |
| assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')])); |
| assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')])); |
| // Test precedence. |
| assert_eq!( |
| t(r"[a-w&&[^c-g]z]"), |
| hir_uclass(&[('a', 'b'), ('h', 'w')])); |
| } |
| |
| #[test] |
| fn class_bracketed_intersect_negate() { |
| assert_eq!( |
| t(r"[^\w&&\d]"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))); |
| assert_eq!( |
| t(r"[^[a-z&&a-c]]"), |
| hir_negate(hir_uclass(&[('a', 'c')]))); |
| assert_eq!( |
| t(r"[^[\w&&\d]]"), |
| hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))); |
| assert_eq!( |
| t(r"[^[^\w&&\d]]"), |
| hir_uclass_query(ClassQuery::Binary("digit"))); |
| assert_eq!( |
| t(r"[[[^\w]&&[^\d]]]"), |
| hir_negate(hir_uclass_perl_word())); |
| |
| assert_eq!( |
| t_bytes(r"(?-u)[^\w&&\d]"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Digit)))); |
| assert_eq!( |
| t_bytes(r"(?-u)[^[a-z&&a-c]]"), |
| hir_negate(hir_bclass(&[(b'a', b'c')]))); |
| assert_eq!( |
| t_bytes(r"(?-u)[^[\w&&\d]]"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Digit)))); |
| assert_eq!( |
| t_bytes(r"(?-u)[^[^\w&&\d]]"), |
| hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Digit))); |
| assert_eq!( |
| t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), |
| hir_negate(hir_bclass_from_char(ascii_class( |
| &ast::ClassAsciiKind::Word)))); |
| } |
| |
| #[test] |
| fn class_bracketed_difference() { |
| assert_eq!( |
| t(r"[\pL--[:ascii:]]"), |
| hir_difference( |
| hir_uclass_query(ClassQuery::Binary("letter")), |
| hir_uclass(&[('\0', '\x7F')]))); |
| |
| assert_eq!( |
| t(r"(?-u)[[:alpha:]--[:lower:]]"), |
| hir_bclass(&[(b'A', b'Z')])); |
| } |
| |
| #[test] |
| fn class_bracketed_symmetric_difference() { |
| assert_eq!( |
| t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), |
| hir_uclass(&[ |
| ('\u{0342}', '\u{0342}'), |
| ('\u{0345}', '\u{0345}'), |
| ('\u{1DC0}', '\u{1DC1}'), |
| ])); |
| assert_eq!( |
| t(r"[a-g~~c-j]"), |
| hir_uclass(&[('a', 'b'), ('h', 'j')])); |
| |
| assert_eq!( |
| t(r"(?-u)[a-g~~c-j]"), |
| hir_bclass(&[(b'a', b'b'), (b'h', b'j')])); |
| } |
| |
| #[test] |
| fn ignore_whitespace() { |
| assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3")); |
| assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S")); |
| assert_eq!(t(r"(?x)\x # comment |
| { # comment |
| 53 # comment |
| } #comment"), hir_lit("S")); |
| |
| assert_eq!(t(r"(?x)\x 53"), hir_lit("S")); |
| assert_eq!(t(r"(?x)\x # comment |
| 53 # comment"), hir_lit("S")); |
| assert_eq!(t(r"(?x)\x5 3"), hir_lit("S")); |
| |
| assert_eq!(t(r"(?x)\p # comment |
| { # comment |
| Separator # comment |
| } # comment"), hir_uclass_query(ClassQuery::Binary("separator"))); |
| |
| assert_eq!(t(r"(?x)a # comment |
| { # comment |
| 5 # comment |
| , # comment |
| 10 # comment |
| } # comment"), |
| hir_range( |
| true, hir::RepetitionRange::Bounded(5, 10), hir_lit("a"))); |
| |
| assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); |
| } |
| |
| #[test] |
| fn analysis_is_always_utf8() { |
| // Positive examples. |
| assert!(t_bytes(r"a").is_always_utf8()); |
| assert!(t_bytes(r"ab").is_always_utf8()); |
| assert!(t_bytes(r"(?-u)a").is_always_utf8()); |
| assert!(t_bytes(r"(?-u)ab").is_always_utf8()); |
| assert!(t_bytes(r"\xFF").is_always_utf8()); |
| assert!(t_bytes(r"\xFF\xFF").is_always_utf8()); |
| assert!(t_bytes(r"[^a]").is_always_utf8()); |
| assert!(t_bytes(r"[^a][^a]").is_always_utf8()); |
| assert!(t_bytes(r"\b").is_always_utf8()); |
| assert!(t_bytes(r"\B").is_always_utf8()); |
| assert!(t_bytes(r"(?-u)\b").is_always_utf8()); |
| |
| // Negative examples. |
| assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8()); |
| assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8()); |
| assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8()); |
| assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8()); |
| assert!(!t_bytes(r"(?-u)\B").is_always_utf8()); |
| } |
| |
| #[test] |
| fn analysis_is_all_assertions() { |
| // Positive examples. |
| assert!(t(r"\b").is_all_assertions()); |
| assert!(t(r"\B").is_all_assertions()); |
| assert!(t(r"^").is_all_assertions()); |
| assert!(t(r"$").is_all_assertions()); |
| assert!(t(r"\A").is_all_assertions()); |
| assert!(t(r"\z").is_all_assertions()); |
| assert!(t(r"$^\z\A\b\B").is_all_assertions()); |
| assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions()); |
| assert!(t(r"^$|$^").is_all_assertions()); |
| assert!(t(r"((\b)+())*^").is_all_assertions()); |
| |
| // Negative examples. |
| assert!(!t(r"^a").is_all_assertions()); |
| } |
| |
| #[test] |
| fn analysis_is_anchored() { |
| // Positive examples. |
| assert!(t(r"^").is_anchored_start()); |
| assert!(t(r"$").is_anchored_end()); |
| |
| assert!(t(r"^^").is_anchored_start()); |
| assert!(t(r"$$").is_anchored_end()); |
| |
| assert!(t(r"^$").is_anchored_start()); |
| assert!(t(r"^$").is_anchored_end()); |
| |
| assert!(t(r"^foo").is_anchored_start()); |
| assert!(t(r"foo$").is_anchored_end()); |
| |
| assert!(t(r"^foo|^bar").is_anchored_start()); |
| assert!(t(r"foo$|bar$").is_anchored_end()); |
| |
| assert!(t(r"^(foo|bar)").is_anchored_start()); |
| assert!(t(r"(foo|bar)$").is_anchored_end()); |
| |
| assert!(t(r"^+").is_anchored_start()); |
| assert!(t(r"$+").is_anchored_end()); |
| assert!(t(r"^++").is_anchored_start()); |
| assert!(t(r"$++").is_anchored_end()); |
| assert!(t(r"(^)+").is_anchored_start()); |
| assert!(t(r"($)+").is_anchored_end()); |
| |
| assert!(t(r"$^").is_anchored_start()); |
| assert!(t(r"$^").is_anchored_end()); |
| assert!(t(r"$^|^$").is_anchored_start()); |
| assert!(t(r"$^|^$").is_anchored_end()); |
| |
| assert!(t(r"\b^").is_anchored_start()); |
| assert!(t(r"$\b").is_anchored_end()); |
| assert!(t(r"^(?m:^)").is_anchored_start()); |
| assert!(t(r"(?m:$)$").is_anchored_end()); |
| assert!(t(r"(?m:^)^").is_anchored_start()); |
| assert!(t(r"$(?m:$)").is_anchored_end()); |
| |
| // Negative examples. |
| assert!(!t(r"(?m)^").is_anchored_start()); |
| assert!(!t(r"(?m)$").is_anchored_end()); |
| assert!(!t(r"(?m:^$)|$^").is_anchored_start()); |
| assert!(!t(r"(?m:^$)|$^").is_anchored_end()); |
| assert!(!t(r"$^|(?m:^$)").is_anchored_start()); |
| assert!(!t(r"$^|(?m:^$)").is_anchored_end()); |
| |
| assert!(!t(r"a^").is_anchored_start()); |
| assert!(!t(r"$a").is_anchored_start()); |
| |
| assert!(!t(r"a^").is_anchored_start()); |
| assert!(!t(r"$a").is_anchored_start()); |
| |
| assert!(!t(r"^foo|bar").is_anchored_start()); |
| assert!(!t(r"foo|bar$").is_anchored_end()); |
| |
| assert!(!t(r"^*").is_anchored_start()); |
| assert!(!t(r"$*").is_anchored_end()); |
| assert!(!t(r"^*+").is_anchored_start()); |
| assert!(!t(r"$*+").is_anchored_end()); |
| assert!(!t(r"^+*").is_anchored_start()); |
| assert!(!t(r"$+*").is_anchored_end()); |
| assert!(!t(r"(^)*").is_anchored_start()); |
| assert!(!t(r"($)*").is_anchored_end()); |
| } |
| |
| #[test] |
| fn analysis_is_any_anchored() { |
| // Positive examples. |
| assert!(t(r"^").is_any_anchored_start()); |
| assert!(t(r"$").is_any_anchored_end()); |
| assert!(t(r"\A").is_any_anchored_start()); |
| assert!(t(r"\z").is_any_anchored_end()); |
| |
| // Negative examples. |
| assert!(!t(r"(?m)^").is_any_anchored_start()); |
| assert!(!t(r"(?m)$").is_any_anchored_end()); |
| assert!(!t(r"$").is_any_anchored_start()); |
| assert!(!t(r"^").is_any_anchored_end()); |
| } |
| |
| #[test] |
| fn analysis_is_match_empty() { |
| // Positive examples. |
| assert!(t(r"").is_match_empty()); |
| assert!(t(r"()").is_match_empty()); |
| assert!(t(r"()*").is_match_empty()); |
| assert!(t(r"()+").is_match_empty()); |
| assert!(t(r"()?").is_match_empty()); |
| assert!(t(r"a*").is_match_empty()); |
| assert!(t(r"a?").is_match_empty()); |
| assert!(t(r"a{0}").is_match_empty()); |
| assert!(t(r"a{0,}").is_match_empty()); |
| assert!(t(r"a{0,1}").is_match_empty()); |
| assert!(t(r"a{0,10}").is_match_empty()); |
| assert!(t(r"\pL*").is_match_empty()); |
| assert!(t(r"a*|b").is_match_empty()); |
| assert!(t(r"b|a*").is_match_empty()); |
| assert!(t(r"a*a?(abcd)*").is_match_empty()); |
| assert!(t(r"^").is_match_empty()); |
| assert!(t(r"$").is_match_empty()); |
| assert!(t(r"(?m)^").is_match_empty()); |
| assert!(t(r"(?m)$").is_match_empty()); |
| assert!(t(r"\A").is_match_empty()); |
| assert!(t(r"\z").is_match_empty()); |
| assert!(t(r"\B").is_match_empty()); |
| assert!(t_bytes(r"(?-u)\B").is_match_empty()); |
| |
| // Negative examples. |
| assert!(!t(r"a+").is_match_empty()); |
| assert!(!t(r"a{1}").is_match_empty()); |
| assert!(!t(r"a{1,}").is_match_empty()); |
| assert!(!t(r"a{1,2}").is_match_empty()); |
| assert!(!t(r"a{1,10}").is_match_empty()); |
| assert!(!t(r"b|a").is_match_empty()); |
| assert!(!t(r"a*a+(abcd)*").is_match_empty()); |
| assert!(!t(r"\b").is_match_empty()); |
| assert!(!t(r"(?-u)\b").is_match_empty()); |
| } |
| } |