vendor/regex-syntax-0.7.2/src/ast/mod.rs - toolchain/rustc - Git at Google

 /*!
 Defines an abstract syntax for regular expressions.
 */

 use core::cmp::Ordering;

 use alloc::{boxed::Box, string::String, vec, vec::Vec};

 pub use crate::ast::visitor::{visit, Visitor};

 pub mod parse;
 pub mod print;
 mod visitor;

 /// An error that occurred while parsing a regular expression into an abstract
 /// syntax tree.
 ///
 /// Note that not all ASTs represents a valid regular expression. For example,
 /// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
 /// valid Unicode property name. That particular error is reported when
 /// translating an AST to the high-level intermediate representation (`HIR`).
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Error {
     /// The kind of error.
     kind: ErrorKind,
     /// The original pattern that the parser generated the error from. Every
     /// span in an error is a valid range into this string.
     pattern: String,
     /// The span of this error.
     span: Span,
 }

 impl Error {
     /// Return the type of this error.
     pub fn kind(&self) -> &ErrorKind {
         &self.kind
     }

     /// The original pattern string in which this error occurred.
     ///
     /// Every span reported by this error is reported in terms of this string.
     pub fn pattern(&self) -> &str {
         &self.pattern
     }

     /// Return the span at which this error occurred.
     pub fn span(&self) -> &Span {
         &self.span
     }

     /// Return an auxiliary span. This span exists only for some errors that
     /// benefit from being able to point to two locations in the original
     /// regular expression. For example, "duplicate" errors will have the
     /// main error position set to the duplicate occurrence while its
     /// auxiliary span will be set to the initial occurrence.
     pub fn auxiliary_span(&self) -> Option<&Span> {
         use self::ErrorKind::*;
         match self.kind {
             FlagDuplicate { ref original } => Some(original),
             FlagRepeatedNegation { ref original, .. } => Some(original),
             GroupNameDuplicate { ref original, .. } => Some(original),
             _ => None,
         }
     }
 }

 /// The type of an error that occurred while building an AST.
 ///
 /// This error type is marked as `non_exhaustive`. This means that adding a
 /// new variant is not considered a breaking change.
 #[non_exhaustive]
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ErrorKind {
     /// The capturing group limit was exceeded.
     ///
     /// Note that this represents a limit on the total number of capturing
     /// groups in a regex and not necessarily the number of nested capturing
     /// groups. That is, the nest limit can be low and it is still possible for
     /// this error to occur.
     CaptureLimitExceeded,
     /// An invalid escape sequence was found in a character class set.
     ClassEscapeInvalid,
     /// An invalid character class range was found. An invalid range is any
     /// range where the start is greater than the end.
     ClassRangeInvalid,
     /// An invalid range boundary was found in a character class. Range
     /// boundaries must be a single literal codepoint, but this error indicates
     /// that something else was found, such as a nested class.
     ClassRangeLiteral,
     /// An opening `[` was found with no corresponding closing `]`.
     ClassUnclosed,
     /// Note that this error variant is no longer used. Namely, a decimal
     /// number can only appear as a repetition quantifier. When the number
     /// in a repetition quantifier is empty, then it gets its own specialized
     /// error, `RepetitionCountDecimalEmpty`.
     DecimalEmpty,
     /// An invalid decimal number was given where one was expected.
     DecimalInvalid,
     /// A bracketed hex literal was empty.
     EscapeHexEmpty,
     /// A bracketed hex literal did not correspond to a Unicode scalar value.
     EscapeHexInvalid,
     /// An invalid hexadecimal digit was found.
     EscapeHexInvalidDigit,
     /// EOF was found before an escape sequence was completed.
     EscapeUnexpectedEof,
     /// An unrecognized escape sequence.
     EscapeUnrecognized,
     /// A dangling negation was used when setting flags, e.g., `i-`.
     FlagDanglingNegation,
     /// A flag was used twice, e.g., `i-i`.
     FlagDuplicate {
         /// The position of the original flag. The error position
         /// points to the duplicate flag.
         original: Span,
     },
     /// The negation operator was used twice, e.g., `-i-s`.
     FlagRepeatedNegation {
         /// The position of the original negation operator. The error position
         /// points to the duplicate negation operator.
         original: Span,
     },
     /// Expected a flag but got EOF, e.g., `(?`.
     FlagUnexpectedEof,
     /// Unrecognized flag, e.g., `a`.
     FlagUnrecognized,
     /// A duplicate capture name was found.
     GroupNameDuplicate {
         /// The position of the initial occurrence of the capture name. The
         /// error position itself points to the duplicate occurrence.
         original: Span,
     },
     /// A capture group name is empty, e.g., `(?P<>abc)`.
     GroupNameEmpty,
     /// An invalid character was seen for a capture group name. This includes
     /// errors where the first character is a digit (even though subsequent
     /// characters are allowed to be digits).
     GroupNameInvalid,
     /// A closing `>` could not be found for a capture group name.
     GroupNameUnexpectedEof,
     /// An unclosed group, e.g., `(ab`.
     ///
     /// The span of this error corresponds to the unclosed parenthesis.
     GroupUnclosed,
     /// An unopened group, e.g., `ab)`.
     GroupUnopened,
     /// The nest limit was exceeded. The limit stored here is the limit
     /// configured in the parser.
     NestLimitExceeded(u32),
     /// The range provided in a counted repetition operator is invalid. The
     /// range is invalid if the start is greater than the end.
     RepetitionCountInvalid,
     /// An opening `{` was not followed by a valid decimal value.
     /// For example, `x{}` or `x{]}` would fail.
     RepetitionCountDecimalEmpty,
     /// An opening `{` was found with no corresponding closing `}`.
     RepetitionCountUnclosed,
     /// A repetition operator was applied to a missing sub-expression. This
     /// occurs, for example, in the regex consisting of just a `*` or even
     /// `(?i)*`. It is, however, possible to create a repetition operating on
     /// an empty sub-expression. For example, `()*` is still considered valid.
     RepetitionMissing,
     /// The Unicode class is not valid. This typically occurs when a `\p` is
     /// followed by something other than a `{`.
     UnicodeClassInvalid,
     /// When octal support is disabled, this error is produced when an octal
     /// escape is used. The octal escape is assumed to be an invocation of
     /// a backreference, which is the common case.
     UnsupportedBackreference,
     /// When syntax similar to PCRE's look-around is used, this error is
     /// returned. Some example syntaxes that are rejected include, but are
     /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
     /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
     /// error is used to improve the user experience.
     UnsupportedLookAround,
 }

 #[cfg(feature = "std")]
 impl std::error::Error for Error {}

 impl core::fmt::Display for Error {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         crate::error::Formatter::from(self).fmt(f)
     }
 }

 impl core::fmt::Display for ErrorKind {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         use self::ErrorKind::*;
         match *self {
             CaptureLimitExceeded => write!(
                 f,
                 "exceeded the maximum number of \
                  capturing groups ({})",
                 u32::MAX
             ),
             ClassEscapeInvalid => {
                 write!(f, "invalid escape sequence found in character class")
             }
             ClassRangeInvalid => write!(
                 f,
                 "invalid character class range, \
                  the start must be <= the end"
             ),
             ClassRangeLiteral => {
                 write!(f, "invalid range boundary, must be a literal")
             }
             ClassUnclosed => write!(f, "unclosed character class"),
             DecimalEmpty => write!(f, "decimal literal empty"),
             DecimalInvalid => write!(f, "decimal literal invalid"),
             EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
             EscapeHexInvalid => {
                 write!(f, "hexadecimal literal is not a Unicode scalar value")
             }
             EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
             EscapeUnexpectedEof => write!(
                 f,
                 "incomplete escape sequence, \
                  reached end of pattern prematurely"
             ),
             EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
             FlagDanglingNegation => {
                 write!(f, "dangling flag negation operator")
             }
             FlagDuplicate { .. } => write!(f, "duplicate flag"),
             FlagRepeatedNegation { .. } => {
                 write!(f, "flag negation operator repeated")
             }
             FlagUnexpectedEof => {
                 write!(f, "expected flag but got end of regex")
             }
             FlagUnrecognized => write!(f, "unrecognized flag"),
             GroupNameDuplicate { .. } => {
                 write!(f, "duplicate capture group name")
             }
             GroupNameEmpty => write!(f, "empty capture group name"),
             GroupNameInvalid => write!(f, "invalid capture group character"),
             GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
             GroupUnclosed => write!(f, "unclosed group"),
             GroupUnopened => write!(f, "unopened group"),
             NestLimitExceeded(limit) => write!(
                 f,
                 "exceed the maximum number of \
                  nested parentheses/brackets ({})",
                 limit
             ),
             RepetitionCountInvalid => write!(
                 f,
                 "invalid repetition count range, \
                  the start must be <= the end"
             ),
             RepetitionCountDecimalEmpty => {
                 write!(f, "repetition quantifier expects a valid decimal")
             }
             RepetitionCountUnclosed => {
                 write!(f, "unclosed counted repetition")
             }
             RepetitionMissing => {
                 write!(f, "repetition operator missing expression")
             }
             UnicodeClassInvalid => {
                 write!(f, "invalid Unicode character class")
             }
             UnsupportedBackreference => {
                 write!(f, "backreferences are not supported")
             }
             UnsupportedLookAround => write!(
                 f,
                 "look-around, including look-ahead and look-behind, \
                  is not supported"
             ),
         }
     }
 }

 /// Span represents the position information of a single AST item.
 ///
 /// All span positions are absolute byte offsets that can be used on the
 /// original regular expression that was parsed.
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub struct Span {
     /// The start byte offset.
     pub start: Position,
     /// The end byte offset.
     pub end: Position,
 }

 impl core::fmt::Debug for Span {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(f, "Span({:?}, {:?})", self.start, self.end)
     }
 }

 impl Ord for Span {
     fn cmp(&self, other: &Span) -> Ordering {
         (&self.start, &self.end).cmp(&(&other.start, &other.end))
     }
 }

 impl PartialOrd for Span {
     fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }

 /// A single position in a regular expression.
 ///
 /// A position encodes one half of a span, and include the byte offset, line
 /// number and column number.
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub struct Position {
     /// The absolute offset of this position, starting at `0` from the
     /// beginning of the regular expression pattern string.
     pub offset: usize,
     /// The line number, starting at `1`.
     pub line: usize,
     /// The approximate column number, starting at `1`.
     pub column: usize,
 }

 impl core::fmt::Debug for Position {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(
             f,
             "Position(o: {:?}, l: {:?}, c: {:?})",
             self.offset, self.line, self.column
         )
     }
 }

 impl Ord for Position {
     fn cmp(&self, other: &Position) -> Ordering {
         self.offset.cmp(&other.offset)
     }
 }

 impl PartialOrd for Position {
     fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }

 impl Span {
     /// Create a new span with the given positions.
     pub fn new(start: Position, end: Position) -> Span {
         Span { start, end }
     }

     /// Create a new span using the given position as the start and end.
     pub fn splat(pos: Position) -> Span {
         Span::new(pos, pos)
     }

     /// Create a new span by replacing the starting the position with the one
     /// given.
     pub fn with_start(self, pos: Position) -> Span {
         Span { start: pos, ..self }
     }

     /// Create a new span by replacing the ending the position with the one
     /// given.
     pub fn with_end(self, pos: Position) -> Span {
         Span { end: pos, ..self }
     }

     /// Returns true if and only if this span occurs on a single line.
     pub fn is_one_line(&self) -> bool {
         self.start.line == self.end.line
     }

     /// Returns true if and only if this span is empty. That is, it points to
     /// a single position in the concrete syntax of a regular expression.
     pub fn is_empty(&self) -> bool {
         self.start.offset == self.end.offset
     }
 }

 impl Position {
     /// Create a new position with the given information.
     ///
     /// `offset` is the absolute offset of the position, starting at `0` from
     /// the beginning of the regular expression pattern string.
     ///
     /// `line` is the line number, starting at `1`.
     ///
     /// `column` is the approximate column number, starting at `1`.
     pub fn new(offset: usize, line: usize, column: usize) -> Position {
         Position { offset, line, column }
     }
 }

 /// An abstract syntax tree for a singular expression along with comments
 /// found.
 ///
 /// Comments are not stored in the tree itself to avoid complexity. Each
 /// comment contains a span of precisely where it occurred in the original
 /// regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct WithComments {
     /// The actual ast.
     pub ast: Ast,
     /// All comments found in the original regular expression.
     pub comments: Vec<Comment>,
 }

 /// A comment from a regular expression with an associated span.
 ///
 /// A regular expression can only contain comments when the `x` flag is
 /// enabled.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Comment {
     /// The span of this comment, including the beginning `#` and ending `\n`.
     pub span: Span,
     /// The comment text, starting with the first character following the `#`
     /// and ending with the last character preceding the `\n`.
     pub comment: String,
 }

 /// An abstract syntax tree for a single regular expression.
 ///
 /// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
 /// space proportional to the size of the `Ast`.
 ///
 /// This type defines its own destructor that uses constant stack space and
 /// heap space proportional to the size of the `Ast`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Ast {
     /// An empty regex that matches everything.
     Empty(Span),
     /// A set of flags, e.g., `(?is)`.
     Flags(SetFlags),
     /// A single character literal, which includes escape sequences.
     Literal(Literal),
     /// The "any character" class.
     Dot(Span),
     /// A single zero-width assertion.
     Assertion(Assertion),
     /// A single character class. This includes all forms of character classes
     /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
     Class(Class),
     /// A repetition operator applied to an arbitrary regular expression.
     Repetition(Repetition),
     /// A grouped regular expression.
     Group(Group),
     /// An alternation of regular expressions.
     Alternation(Alternation),
     /// A concatenation of regular expressions.
     Concat(Concat),
 }

 impl Ast {
     /// Return the span of this abstract syntax tree.
     pub fn span(&self) -> &Span {
         match *self {
             Ast::Empty(ref span) => span,
             Ast::Flags(ref x) => &x.span,
             Ast::Literal(ref x) => &x.span,
             Ast::Dot(ref span) => span,
             Ast::Assertion(ref x) => &x.span,
             Ast::Class(ref x) => x.span(),
             Ast::Repetition(ref x) => &x.span,
             Ast::Group(ref x) => &x.span,
             Ast::Alternation(ref x) => &x.span,
             Ast::Concat(ref x) => &x.span,
         }
     }

     /// Return true if and only if this Ast is empty.
     pub fn is_empty(&self) -> bool {
         match *self {
             Ast::Empty(_) => true,
             _ => false,
         }
     }

     /// Returns true if and only if this AST has any (including possibly empty)
     /// subexpressions.
     fn has_subexprs(&self) -> bool {
         match *self {
             Ast::Empty(_)
             | Ast::Flags(_)
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_) => false,
             Ast::Class(_)
             | Ast::Repetition(_)
             | Ast::Group(_)
             | Ast::Alternation(_)
             | Ast::Concat(_) => true,
         }
     }
 }

 /// Print a display representation of this Ast.
 ///
 /// This does not preserve any of the original whitespace formatting that may
 /// have originally been present in the concrete syntax from which this Ast
 /// was generated.
 ///
 /// This implementation uses constant stack space and heap space proportional
 /// to the size of the `Ast`.
 impl core::fmt::Display for Ast {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         use crate::ast::print::Printer;
         Printer::new().print(self, f)
     }
 }

 /// An alternation of regular expressions.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Alternation {
     /// The span of this alternation.
     pub span: Span,
     /// The alternate regular expressions.
     pub asts: Vec<Ast>,
 }

 impl Alternation {
     /// Return this alternation as an AST.
     ///
     /// If this alternation contains zero ASTs, then Ast::Empty is
     /// returned. If this alternation contains exactly 1 AST, then the
     /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
             0 => Ast::Empty(self.span),
             1 => self.asts.pop().unwrap(),
             _ => Ast::Alternation(self),
         }
     }
 }

 /// A concatenation of regular expressions.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Concat {
     /// The span of this concatenation.
     pub span: Span,
     /// The concatenation regular expressions.
     pub asts: Vec<Ast>,
 }

 impl Concat {
     /// Return this concatenation as an AST.
     ///
     /// If this concatenation contains zero ASTs, then Ast::Empty is
     /// returned. If this concatenation contains exactly 1 AST, then the
     /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
             0 => Ast::Empty(self.span),
             1 => self.asts.pop().unwrap(),
             _ => Ast::Concat(self),
         }
     }
 }

 /// A single literal expression.
 ///
 /// A literal corresponds to a single Unicode scalar value. Literals may be
 /// represented in their literal form, e.g., `a` or in their escaped form,
 /// e.g., `\x61`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Literal {
     /// The span of this literal.
     pub span: Span,
     /// The kind of this literal.
     pub kind: LiteralKind,
     /// The Unicode scalar value corresponding to this literal.
     pub c: char,
 }

 impl Literal {
     /// If this literal was written as a `\x` hex escape, then this returns
     /// the corresponding byte value. Otherwise, this returns `None`.
     pub fn byte(&self) -> Option<u8> {
         match self.kind {
             LiteralKind::HexFixed(HexLiteralKind::X) => {
                 u8::try_from(self.c).ok()
             }
             _ => None,
         }
     }
 }

 /// The kind of a single literal expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum LiteralKind {
     /// The literal is written verbatim, e.g., `a` or `☃`.
     Verbatim,
     /// The literal is written as an escape because it is otherwise a special
     /// regex meta character, e.g., `\*` or `\[`.
     Meta,
     /// The literal is written as an escape despite the fact that the escape is
     /// unnecessary, e.g., `\%` or `\/`.
     Superfluous,
     /// The literal is written as an octal escape, e.g., `\141`.
     Octal,
     /// The literal is written as a hex code with a fixed number of digits
     /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
     /// `\U00000061`.
     HexFixed(HexLiteralKind),
     /// The literal is written as a hex code with a bracketed number of
     /// digits. The only restriction is that the bracketed hex code must refer
     /// to a valid Unicode scalar value.
     HexBrace(HexLiteralKind),
     /// The literal is written as a specially recognized escape, e.g., `\f`
     /// or `\n`.
     Special(SpecialLiteralKind),
 }

 /// The type of a special literal.
 ///
 /// A special literal is a special escape sequence recognized by the regex
 /// parser, e.g., `\f` or `\n`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum SpecialLiteralKind {
     /// Bell, spelled `\a` (`\x07`).
     Bell,
     /// Form feed, spelled `\f` (`\x0C`).
     FormFeed,
     /// Tab, spelled `\t` (`\x09`).
     Tab,
     /// Line feed, spelled `\n` (`\x0A`).
     LineFeed,
     /// Carriage return, spelled `\r` (`\x0D`).
     CarriageReturn,
     /// Vertical tab, spelled `\v` (`\x0B`).
     VerticalTab,
     /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
     /// parsing in verbose mode.
     Space,
 }

 /// The type of a Unicode hex literal.
 ///
 /// Note that all variants behave the same when used with brackets. They only
 /// differ when used without brackets in the number of hex digits that must
 /// follow.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum HexLiteralKind {
     /// A `\x` prefix. When used without brackets, this form is limited to
     /// two digits.
     X,
     /// A `\u` prefix. When used without brackets, this form is limited to
     /// four digits.
     UnicodeShort,
     /// A `\U` prefix. When used without brackets, this form is limited to
     /// eight digits.
     UnicodeLong,
 }

 impl HexLiteralKind {
     /// The number of digits that must be used with this literal form when
     /// used without brackets. When used with brackets, there is no
     /// restriction on the number of digits.
     pub fn digits(&self) -> u32 {
         match *self {
             HexLiteralKind::X => 2,
             HexLiteralKind::UnicodeShort => 4,
             HexLiteralKind::UnicodeLong => 8,
         }
     }
 }

 /// A single character class expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Class {
     /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
     Unicode(ClassUnicode),
     /// A perl character class, e.g., `\d` or `\W`.
     Perl(ClassPerl),
     /// A bracketed character class set, which may contain zero or more
     /// character ranges and/or zero or more nested classes. e.g.,
     /// `[a-zA-Z\pL]`.
     Bracketed(ClassBracketed),
 }

 impl Class {
     /// Return the span of this character class.
     pub fn span(&self) -> &Span {
         match *self {
             Class::Perl(ref x) => &x.span,
             Class::Unicode(ref x) => &x.span,
             Class::Bracketed(ref x) => &x.span,
         }
     }
 }

 /// A Perl character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassPerl {
     /// The span of this class.
     pub span: Span,
     /// The kind of Perl class.
     pub kind: ClassPerlKind,
     /// Whether the class is negated or not. e.g., `\d` is not negated but
     /// `\D` is.
     pub negated: bool,
 }

 /// The available Perl character classes.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassPerlKind {
     /// Decimal numbers.
     Digit,
     /// Whitespace.
     Space,
     /// Word characters.
     Word,
 }

 /// An ASCII character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassAscii {
     /// The span of this class.
     pub span: Span,
     /// The kind of ASCII class.
     pub kind: ClassAsciiKind,
     /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
     /// but `[[:^alpha:]]` is.
     pub negated: bool,
 }

 /// The available ASCII character classes.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassAsciiKind {
     /// `[0-9A-Za-z]`
     Alnum,
     /// `[A-Za-z]`
     Alpha,
     /// `[\x00-\x7F]`
     Ascii,
     /// `[ \t]`
     Blank,
     /// `[\x00-\x1F\x7F]`
     Cntrl,
     /// `[0-9]`
     Digit,
     /// `[!-~]`
     Graph,
     /// `[a-z]`
     Lower,
     /// `[ -~]`
     Print,
     /// `[!-/:-@\[-`{-~]`
     Punct,
     /// `[\t\n\v\f\r ]`
     Space,
     /// `[A-Z]`
     Upper,
     /// `[0-9A-Za-z_]`
     Word,
     /// `[0-9A-Fa-f]`
     Xdigit,
 }

 impl ClassAsciiKind {
     /// Return the corresponding ClassAsciiKind variant for the given name.
     ///
     /// The name given should correspond to the lowercase version of the
     /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
     ///
     /// If no variant with the corresponding name exists, then `None` is
     /// returned.
     pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
         use self::ClassAsciiKind::*;
         match name {
             "alnum" => Some(Alnum),
             "alpha" => Some(Alpha),
             "ascii" => Some(Ascii),
             "blank" => Some(Blank),
             "cntrl" => Some(Cntrl),
             "digit" => Some(Digit),
             "graph" => Some(Graph),
             "lower" => Some(Lower),
             "print" => Some(Print),
             "punct" => Some(Punct),
             "space" => Some(Space),
             "upper" => Some(Upper),
             "word" => Some(Word),
             "xdigit" => Some(Xdigit),
             _ => None,
         }
     }
 }

 /// A Unicode character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassUnicode {
     /// The span of this class.
     pub span: Span,
     /// Whether this class is negated or not.
     ///
     /// Note: be careful when using this attribute. This specifically refers
     /// to whether the class is written as `\p` or `\P`, where the latter
     /// is `negated = true`. However, it also possible to write something like
     /// `\P{scx!=Katakana}` which is actually equivalent to
     /// `\p{scx=Katakana}` and is therefore not actually negated even though
     /// `negated = true` here. To test whether this class is truly negated
     /// or not, use the `is_negated` method.
     pub negated: bool,
     /// The kind of Unicode class.
     pub kind: ClassUnicodeKind,
 }

 impl ClassUnicode {
     /// Returns true if this class has been negated.
     ///
     /// Note that this takes the Unicode op into account, if it's present.
     /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
     pub fn is_negated(&self) -> bool {
         match self.kind {
             ClassUnicodeKind::NamedValue {
                 op: ClassUnicodeOpKind::NotEqual,
                 ..
             } => !self.negated,
             _ => self.negated,
         }
     }
 }

 /// The available forms of Unicode character classes.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassUnicodeKind {
     /// A one letter abbreviated class, e.g., `\pN`.
     OneLetter(char),
     /// A binary property, general category or script. The string may be
     /// empty.
     Named(String),
     /// A property name and an associated value.
     NamedValue {
         /// The type of Unicode op used to associate `name` with `value`.
         op: ClassUnicodeOpKind,
         /// The property name (which may be empty).
         name: String,
         /// The property value (which may be empty).
         value: String,
     },
 }

 /// The type of op used in a Unicode character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassUnicodeOpKind {
     /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
     Equal,
     /// A property set to a specific value using a colon, e.g.,
     /// `\p{scx:Katakana}`.
     Colon,
     /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
     NotEqual,
 }

 impl ClassUnicodeOpKind {
     /// Whether the op is an equality op or not.
     pub fn is_equal(&self) -> bool {
         match *self {
             ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true,
             _ => false,
         }
     }
 }

 /// A bracketed character class, e.g., `[a-z0-9]`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassBracketed {
     /// The span of this class.
     pub span: Span,
     /// Whether this class is negated or not. e.g., `[a]` is not negated but
     /// `[^a]` is.
     pub negated: bool,
     /// The type of this set. A set is either a normal union of things, e.g.,
     /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
     pub kind: ClassSet,
 }

 /// A character class set.
 ///
 /// This type corresponds to the internal structure of a bracketed character
 /// class. That is, every bracketed character is one of two types: a union of
 /// items (literals, ranges, other bracketed classes) or a tree of binary set
 /// operations.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassSet {
     /// An item, which can be a single literal, range, nested character class
     /// or a union of items.
     Item(ClassSetItem),
     /// A single binary operation (i.e., &&, -- or ~~).
     BinaryOp(ClassSetBinaryOp),
 }

 impl ClassSet {
     /// Build a set from a union.
     pub fn union(ast: ClassSetUnion) -> ClassSet {
         ClassSet::Item(ClassSetItem::Union(ast))
     }

     /// Return the span of this character class set.
     pub fn span(&self) -> &Span {
         match *self {
             ClassSet::Item(ref x) => x.span(),
             ClassSet::BinaryOp(ref x) => &x.span,
         }
     }

     /// Return true if and only if this class set is empty.
     fn is_empty(&self) -> bool {
         match *self {
             ClassSet::Item(ClassSetItem::Empty(_)) => true,
             _ => false,
         }
     }
 }

 /// A single component of a character class set.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ClassSetItem {
     /// An empty item.
     ///
     /// Note that a bracketed character class cannot contain a single empty
     /// item. Empty items can appear when using one of the binary operators.
     /// For example, `[&&]` is the intersection of two empty classes.
     Empty(Span),
     /// A single literal.
     Literal(Literal),
     /// A range between two literals.
     Range(ClassSetRange),
     /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
     Ascii(ClassAscii),
     /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
     Unicode(ClassUnicode),
     /// A perl character class, e.g., `\d` or `\W`.
     Perl(ClassPerl),
     /// A bracketed character class set, which may contain zero or more
     /// character ranges and/or zero or more nested classes. e.g.,
     /// `[a-zA-Z\pL]`.
     Bracketed(Box<ClassBracketed>),
     /// A union of items.
     Union(ClassSetUnion),
 }

 impl ClassSetItem {
     /// Return the span of this character class set item.
     pub fn span(&self) -> &Span {
         match *self {
             ClassSetItem::Empty(ref span) => span,
             ClassSetItem::Literal(ref x) => &x.span,
             ClassSetItem::Range(ref x) => &x.span,
             ClassSetItem::Ascii(ref x) => &x.span,
             ClassSetItem::Perl(ref x) => &x.span,
             ClassSetItem::Unicode(ref x) => &x.span,
             ClassSetItem::Bracketed(ref x) => &x.span,
             ClassSetItem::Union(ref x) => &x.span,
         }
     }
 }

 /// A single character class range in a set.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassSetRange {
     /// The span of this range.
     pub span: Span,
     /// The start of this range.
     pub start: Literal,
     /// The end of this range.
     pub end: Literal,
 }

 impl ClassSetRange {
     /// Returns true if and only if this character class range is valid.
     ///
     /// The only case where a range is invalid is if its start is greater than
     /// its end.
     pub fn is_valid(&self) -> bool {
         self.start.c <= self.end.c
     }
 }

 /// A union of items inside a character class set.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassSetUnion {
     /// The span of the items in this operation. e.g., the `a-z0-9` in
     /// `[^a-z0-9]`
     pub span: Span,
     /// The sequence of items that make up this union.
     pub items: Vec<ClassSetItem>,
 }

 impl ClassSetUnion {
     /// Push a new item in this union.
     ///
     /// The ending position of this union's span is updated to the ending
     /// position of the span of the item given. If the union is empty, then
     /// the starting position of this union is set to the starting position
     /// of this item.
     ///
     /// In other words, if you only use this method to add items to a union
     /// and you set the spans on each item correctly, then you should never
     /// need to adjust the span of the union directly.
     pub fn push(&mut self, item: ClassSetItem) {
         if self.items.is_empty() {
             self.span.start = item.span().start;
         }
         self.span.end = item.span().end;
         self.items.push(item);
     }

     /// Return this union as a character class set item.
     ///
     /// If this union contains zero items, then an empty union is
     /// returned. If this concatenation contains exactly 1 item, then the
     /// corresponding item is returned. Otherwise, ClassSetItem::Union is
     /// returned.
     pub fn into_item(mut self) -> ClassSetItem {
         match self.items.len() {
             0 => ClassSetItem::Empty(self.span),
             1 => self.items.pop().unwrap(),
             _ => ClassSetItem::Union(self),
         }
     }
 }

 /// A Unicode character class set operation.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassSetBinaryOp {
     /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
     pub span: Span,
     /// The type of this set operation.
     pub kind: ClassSetBinaryOpKind,
     /// The left hand side of the operation.
     pub lhs: Box<ClassSet>,
     /// The right hand side of the operation.
     pub rhs: Box<ClassSet>,
 }

 /// The type of a Unicode character class set operation.
 ///
 /// Note that this doesn't explicitly represent union since there is no
 /// explicit union operator. Concatenation inside a character class corresponds
 /// to the union operation.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum ClassSetBinaryOpKind {
     /// The intersection of two sets, e.g., `\pN&&[a-z]`.
     Intersection,
     /// The difference of two sets, e.g., `\pN--[0-9]`.
     Difference,
     /// The symmetric difference of two sets. The symmetric difference is the
     /// set of elements belonging to one but not both sets.
     /// e.g., `[\pL~~[:ascii:]]`.
     SymmetricDifference,
 }

 /// A single zero-width assertion.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Assertion {
     /// The span of this assertion.
     pub span: Span,
     /// The assertion kind, e.g., `\b` or `^`.
     pub kind: AssertionKind,
 }

 /// An assertion kind.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum AssertionKind {
     /// `^`
     StartLine,
     /// `$`
     EndLine,
     /// `\A`
     StartText,
     /// `\z`
     EndText,
     /// `\b`
     WordBoundary,
     /// `\B`
     NotWordBoundary,
 }

 /// A repetition operation applied to a regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Repetition {
     /// The span of this operation.
     pub span: Span,
     /// The actual operation.
     pub op: RepetitionOp,
     /// Whether this operation was applied greedily or not.
     pub greedy: bool,
     /// The regular expression under repetition.
     pub ast: Box<Ast>,
 }

 /// The repetition operator itself.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct RepetitionOp {
     /// The span of this operator. This includes things like `+`, `*?` and
     /// `{m,n}`.
     pub span: Span,
     /// The type of operation.
     pub kind: RepetitionKind,
 }

 /// The kind of a repetition operator.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum RepetitionKind {
     /// `?`
     ZeroOrOne,
     /// `*`
     ZeroOrMore,
     /// `+`
     OneOrMore,
     /// `{m,n}`
     Range(RepetitionRange),
 }

 /// A range repetition operator.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum RepetitionRange {
     /// `{m}`
     Exactly(u32),
     /// `{m,}`
     AtLeast(u32),
     /// `{m,n}`
     Bounded(u32, u32),
 }

 impl RepetitionRange {
     /// Returns true if and only if this repetition range is valid.
     ///
     /// The only case where a repetition range is invalid is if it is bounded
     /// and its start is greater than its end.
     pub fn is_valid(&self) -> bool {
         match *self {
             RepetitionRange::Bounded(s, e) if s > e => false,
             _ => true,
         }
     }
 }

 /// A grouped regular expression.
 ///
 /// This includes both capturing and non-capturing groups. This does **not**
 /// include flag-only groups like `(?is)`, but does contain any group that
 /// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
 /// `(?is:a)`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Group {
     /// The span of this group.
     pub span: Span,
     /// The kind of this group.
     pub kind: GroupKind,
     /// The regular expression in this group.
     pub ast: Box<Ast>,
 }

 impl Group {
     /// If this group is non-capturing, then this returns the (possibly empty)
     /// set of flags. Otherwise, `None` is returned.
     pub fn flags(&self) -> Option<&Flags> {
         match self.kind {
             GroupKind::NonCapturing(ref flags) => Some(flags),
             _ => None,
         }
     }

     /// Returns true if and only if this group is capturing.
     pub fn is_capturing(&self) -> bool {
         match self.kind {
             GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
             GroupKind::NonCapturing(_) => false,
         }
     }

     /// Returns the capture index of this group, if this is a capturing group.
     ///
     /// This returns a capture index precisely when `is_capturing` is `true`.
     pub fn capture_index(&self) -> Option<u32> {
         match self.kind {
             GroupKind::CaptureIndex(i) => Some(i),
             GroupKind::CaptureName { ref name, .. } => Some(name.index),
             GroupKind::NonCapturing(_) => None,
         }
     }
 }

 /// The kind of a group.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum GroupKind {
     /// `(a)`
     CaptureIndex(u32),
     /// `(?<name>a)` or `(?P<name>a)`
     CaptureName {
         /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
         starts_with_p: bool,
         /// The capture name.
         name: CaptureName,
     },
     /// `(?:a)` and `(?i:a)`
     NonCapturing(Flags),
 }

 /// A capture name.
 ///
 /// This corresponds to the name itself between the angle brackets in, e.g.,
 /// `(?P<foo>expr)`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct CaptureName {
     /// The span of this capture name.
     pub span: Span,
     /// The capture name.
     pub name: String,
     /// The capture index.
     pub index: u32,
 }

 /// A group of flags that is not applied to a particular regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct SetFlags {
     /// The span of these flags, including the grouping parentheses.
     pub span: Span,
     /// The actual sequence of flags.
     pub flags: Flags,
 }

 /// A group of flags.
 ///
 /// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Flags {
     /// The span of this group of flags.
     pub span: Span,
     /// A sequence of flag items. Each item is either a flag or a negation
     /// operator.
     pub items: Vec<FlagsItem>,
 }

 impl Flags {
     /// Add the given item to this sequence of flags.
     ///
     /// If the item was added successfully, then `None` is returned. If the
     /// given item is a duplicate, then `Some(i)` is returned, where
     /// `items[i].kind == item.kind`.
     pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
         for (i, x) in self.items.iter().enumerate() {
             if x.kind == item.kind {
                 return Some(i);
             }
         }
         self.items.push(item);
         None
     }

     /// Returns the state of the given flag in this set.
     ///
     /// If the given flag is in the set but is negated, then `Some(false)` is
     /// returned.
     ///
     /// If the given flag is in the set and is not negated, then `Some(true)`
     /// is returned.
     ///
     /// Otherwise, `None` is returned.
     pub fn flag_state(&self, flag: Flag) -> Option<bool> {
         let mut negated = false;
         for x in &self.items {
             match x.kind {
                 FlagsItemKind::Negation => {
                     negated = true;
                 }
                 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
                     return Some(!negated);
                 }
                 _ => {}
             }
         }
         None
     }
 }

 /// A single item in a group of flags.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct FlagsItem {
     /// The span of this item.
     pub span: Span,
     /// The kind of this item.
     pub kind: FlagsItemKind,
 }

 /// The kind of an item in a group of flags.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum FlagsItemKind {
     /// A negation operator applied to all subsequent flags in the enclosing
     /// group.
     Negation,
     /// A single flag in a group.
     Flag(Flag),
 }

 impl FlagsItemKind {
     /// Returns true if and only if this item is a negation operator.
     pub fn is_negation(&self) -> bool {
         match *self {
             FlagsItemKind::Negation => true,
             _ => false,
         }
     }
 }

 /// A single flag.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum Flag {
     /// `i`
     CaseInsensitive,
     /// `m`
     MultiLine,
     /// `s`
     DotMatchesNewLine,
     /// `U`
     SwapGreed,
     /// `u`
     Unicode,
     /// `R`
     CRLF,
     /// `x`
     IgnoreWhitespace,
 }

 /// A custom `Drop` impl is used for `Ast` such that it uses constant stack
 /// space but heap space proportional to the depth of the `Ast`.
 impl Drop for Ast {
     fn drop(&mut self) {
         use core::mem;

         match *self {
             Ast::Empty(_)
             | Ast::Flags(_)
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
             // Classes are recursive, so they get their own Drop impl.
             | Ast::Class(_) => return,
             Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
             Ast::Group(ref x) if !x.ast.has_subexprs() => return,
             Ast::Alternation(ref x) if x.asts.is_empty() => return,
             Ast::Concat(ref x) if x.asts.is_empty() => return,
             _ => {}
         }

         let empty_span = || Span::splat(Position::new(0, 0, 0));
         let empty_ast = || Ast::Empty(empty_span());
         let mut stack = vec![mem::replace(self, empty_ast())];
         while let Some(mut ast) = stack.pop() {
             match ast {
                 Ast::Empty(_)
                 | Ast::Flags(_)
                 | Ast::Literal(_)
                 | Ast::Dot(_)
                 | Ast::Assertion(_)
                 // Classes are recursive, so they get their own Drop impl.
                 | Ast::Class(_) => {}
                 Ast::Repetition(ref mut x) => {
                     stack.push(mem::replace(&mut x.ast, empty_ast()));
                 }
                 Ast::Group(ref mut x) => {
                     stack.push(mem::replace(&mut x.ast, empty_ast()));
                 }
                 Ast::Alternation(ref mut x) => {
                     stack.extend(x.asts.drain(..));
                 }
                 Ast::Concat(ref mut x) => {
                     stack.extend(x.asts.drain(..));
                 }
             }
         }
     }
 }

 /// A custom `Drop` impl is used for `ClassSet` such that it uses constant
 /// stack space but heap space proportional to the depth of the `ClassSet`.
 impl Drop for ClassSet {
     fn drop(&mut self) {
         use core::mem;

         match *self {
             ClassSet::Item(ref item) => match *item {
                 ClassSetItem::Empty(_)
                 | ClassSetItem::Literal(_)
                 | ClassSetItem::Range(_)
                 | ClassSetItem::Ascii(_)
                 | ClassSetItem::Unicode(_)
                 | ClassSetItem::Perl(_) => return,
                 ClassSetItem::Bracketed(ref x) => {
                     if x.kind.is_empty() {
                         return;
                     }
                 }
                 ClassSetItem::Union(ref x) => {
                     if x.items.is_empty() {
                         return;
                     }
                 }
             },
             ClassSet::BinaryOp(ref op) => {
                 if op.lhs.is_empty() && op.rhs.is_empty() {
                     return;
                 }
             }
         }

         let empty_span = || Span::splat(Position::new(0, 0, 0));
         let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
         let mut stack = vec![mem::replace(self, empty_set())];
         while let Some(mut set) = stack.pop() {
             match set {
                 ClassSet::Item(ref mut item) => match *item {
                     ClassSetItem::Empty(_)
                     | ClassSetItem::Literal(_)
                     | ClassSetItem::Range(_)
                     | ClassSetItem::Ascii(_)
                     | ClassSetItem::Unicode(_)
                     | ClassSetItem::Perl(_) => {}
                     ClassSetItem::Bracketed(ref mut x) => {
                         stack.push(mem::replace(&mut x.kind, empty_set()));
                     }
                     ClassSetItem::Union(ref mut x) => {
                         stack.extend(x.items.drain(..).map(ClassSet::Item));
                     }
                 },
                 ClassSet::BinaryOp(ref mut op) => {
                     stack.push(mem::replace(&mut op.lhs, empty_set()));
                     stack.push(mem::replace(&mut op.rhs, empty_set()));
                 }
             }
         }
     }
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     // We use a thread with an explicit stack size to test that our destructor
     // for Ast can handle arbitrarily sized expressions in constant stack
     // space. In case we run on a platform without threads (WASM?), we limit
     // this test to Windows/Unix.
     #[test]
     #[cfg(any(unix, windows))]
     fn no_stack_overflow_on_drop() {
         use std::thread;

         let run = || {
             let span = || Span::splat(Position::new(0, 0, 0));
             let mut ast = Ast::Empty(span());
             for i in 0..200 {
                 ast = Ast::Group(Group {
                     span: span(),
                     kind: GroupKind::CaptureIndex(i),
                     ast: Box::new(ast),
                 });
             }
             assert!(!ast.is_empty());
         };

         // We run our test on a thread with a small stack size so we can
         // force the issue more easily.
         //
         // NOTE(2023-03-21): It turns out that some platforms (like FreeBSD)
         // will just barf with very small stack sizes. So we bump this up a bit
         // to give more room to breath. When I did this, I confirmed that if
         // I remove the custom `Drop` impl for `Ast`, then this test does
         // indeed still fail with a stack overflow. (At the time of writing, I
         // had to bump it all the way up to 32K before the test would pass even
         // without the custom `Drop` impl. So 16K seems like a safe number
         // here.)
         //
         // See: https://github.com/rust-lang/regex/issues/967
         thread::Builder::new()
             .stack_size(16 << 10)
             .spawn(run)
             .unwrap()
             .join()
             .unwrap();
     }
 }