vendor/regex-automata/src/util/syntax.rs - toolchain/rustc - Git at Google

 use regex_syntax::ParserBuilder;

 /// A common set of configuration options that apply to the syntax of a regex.
 ///
 /// This represents a group of configuration options that specifically apply
 /// to how the concrete syntax of a regular expression is interpreted. In
 /// particular, they are generally forwarded to the
 /// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html)
 /// in the
 /// [`regex-syntax`](https://docs.rs/regex-syntax)
 /// crate when building a regex from its concrete syntax directly.
 ///
 /// These options are defined as a group since they apply to every regex engine
 /// in this crate. Instead of re-defining them on every engine's builder, they
 /// are instead provided here as one cohesive unit.
 #[derive(Clone, Copy, Debug)]
 pub struct SyntaxConfig {
     case_insensitive: bool,
     multi_line: bool,
     dot_matches_new_line: bool,
     swap_greed: bool,
     ignore_whitespace: bool,
     unicode: bool,
     utf8: bool,
     nest_limit: u32,
     octal: bool,
 }

 impl SyntaxConfig {
     /// Return a new default syntax configuration.
     pub fn new() -> SyntaxConfig {
         // These defaults match the ones used in regex-syntax.
         SyntaxConfig {
             case_insensitive: false,
             multi_line: false,
             dot_matches_new_line: false,
             swap_greed: false,
             ignore_whitespace: false,
             unicode: true,
             utf8: true,
             nest_limit: 250,
             octal: false,
         }
     }

     /// Enable or disable the case insensitive flag by default.
     ///
     /// When Unicode mode is enabled, case insensitivity is Unicode-aware.
     /// Specifically, it will apply the "simple" case folding rules as
     /// specified by Unicode.
     ///
     /// By default this is disabled. It may alternatively be selectively
     /// enabled in the regular expression itself via the `i` flag.
     pub fn case_insensitive(mut self, yes: bool) -> SyntaxConfig {
         self.case_insensitive = yes;
         self
     }

     /// Enable or disable the multi-line matching flag by default.
     ///
     /// When this is enabled, the `^` and `$` look-around assertions will
     /// match immediately after and immediately before a new line character,
     /// respectively. Note that the `\A` and `\z` look-around assertions are
     /// unaffected by this setting and always correspond to matching at the
     /// beginning and end of the input.
     ///
     /// By default this is disabled. It may alternatively be selectively
     /// enabled in the regular expression itself via the `m` flag.
     pub fn multi_line(mut self, yes: bool) -> SyntaxConfig {
         self.multi_line = yes;
         self
     }

     /// Enable or disable the "dot matches any character" flag by default.
     ///
     /// When this is enabled, `.` will match any character. When it's disabled,
     /// then `.` will match any character except for a new line character.
     ///
     /// Note that `.` is impacted by whether the "unicode" setting is enabled
     /// or not. When Unicode is enabled (the defualt), `.` will match any UTF-8
     /// encoding of any Unicode scalar value (sans a new line, depending on
     /// whether this "dot matches new line" option is enabled). When Unicode
     /// mode is disabled, `.` will match any byte instead. Because of this,
     /// when Unicode mode is disabled, `.` can only be used when the "allow
     /// invalid UTF-8" option is enabled, since `.` could otherwise match
     /// invalid UTF-8.
     ///
     /// By default this is disabled. It may alternatively be selectively
     /// enabled in the regular expression itself via the `s` flag.
     pub fn dot_matches_new_line(mut self, yes: bool) -> SyntaxConfig {
         self.dot_matches_new_line = yes;
         self
     }

     /// Enable or disable the "swap greed" flag by default.
     ///
     /// When this is enabled, `.*` (for example) will become ungreedy and `.*?`
     /// will become greedy.
     ///
     /// By default this is disabled. It may alternatively be selectively
     /// enabled in the regular expression itself via the `U` flag.
     pub fn swap_greed(mut self, yes: bool) -> SyntaxConfig {
         self.swap_greed = yes;
         self
     }

     /// Enable verbose mode in the regular expression.
     ///
     /// When enabled, verbose mode permits insigificant whitespace in many
     /// places in the regular expression, as well as comments. Comments are
     /// started using `#` and continue until the end of the line.
     ///
     /// By default, this is disabled. It may be selectively enabled in the
     /// regular expression by using the `x` flag regardless of this setting.
     pub fn ignore_whitespace(mut self, yes: bool) -> SyntaxConfig {
         self.ignore_whitespace = yes;
         self
     }

     /// Enable or disable the Unicode flag (`u`) by default.
     ///
     /// By default this is **enabled**. It may alternatively be selectively
     /// disabled in the regular expression itself via the `u` flag.
     ///
     /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
     /// default), a regular expression will fail to parse if Unicode mode is
     /// disabled and a sub-expression could possibly match invalid UTF-8.
     ///
     /// **WARNING**: Unicode mode can greatly increase the size of the compiled
     /// DFA, which can noticeably impact both memory usage and compilation
     /// time. This is especially noticeable if your regex contains character
     /// classes like `\w` that are impacted by whether Unicode is enabled or
     /// not. If Unicode is not necessary, you are encouraged to disable it.
     pub fn unicode(mut self, yes: bool) -> SyntaxConfig {
         self.unicode = yes;
         self
     }

     /// When disabled, the builder will permit the construction of a regular
     /// expression that may match invalid UTF-8.
     ///
     /// For example, when [`SyntaxConfig::unicode`] is disabled, then
     /// expressions like `[^a]` may match invalid UTF-8 since they can match
     /// any single byte that is not `a`. By default, these sub-expressions
     /// are disallowed to avoid returning offsets that split a UTF-8
     /// encoded codepoint. However, in cases where matching at arbitrary
     /// locations is desired, this option can be disabled to permit all such
     /// sub-expressions.
     ///
     /// When enabled (the default), the builder is guaranteed to produce a
     /// regex that will only ever match valid UTF-8 (otherwise, the builder
     /// will return an error).
     pub fn utf8(mut self, yes: bool) -> SyntaxConfig {
         self.utf8 = yes;
         self
     }

     /// Set the nesting limit used for the regular expression parser.
     ///
     /// The nesting limit controls how deep the abstract syntax tree is allowed
     /// to be. If the AST exceeds the given limit (e.g., with too many nested
     /// groups), then an error is returned by the parser.
     ///
     /// The purpose of this limit is to act as a heuristic to prevent stack
     /// overflow when building a finite automaton from a regular expression's
     /// abstract syntax tree. In particular, construction currently uses
     /// recursion. In the future, the implementation may stop using recursion
     /// and this option will no longer be necessary.
     ///
     /// This limit is not checked until the entire AST is parsed. Therefore,
     /// if callers want to put a limit on the amount of heap space used, then
     /// they should impose a limit on the length, in bytes, of the concrete
     /// pattern string. In particular, this is viable since the parser will
     /// limit itself to heap space proportional to the lenth of the pattern
     /// string.
     ///
     /// Note that a nest limit of `0` will return a nest limit error for most
     /// patterns but not all. For example, a nest limit of `0` permits `a` but
     /// not `ab`, since `ab` requires a concatenation AST item, which results
     /// in a nest depth of `1`. In general, a nest limit is not something that
     /// manifests in an obvious way in the concrete syntax, therefore, it
     /// should not be used in a granular way.
     pub fn nest_limit(mut self, limit: u32) -> SyntaxConfig {
         self.nest_limit = limit;
         self
     }

     /// Whether to support octal syntax or not.
     ///
     /// Octal syntax is a little-known way of uttering Unicode codepoints in
     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
     /// `\141` are all equivalent regular expressions, where the last example
     /// shows octal syntax.
     ///
     /// While supporting octal syntax isn't in and of itself a problem, it does
     /// make good error messages harder. That is, in PCRE based regex engines,
     /// syntax like `\1` invokes a backreference, which is explicitly
     /// unsupported in Rust's regex engine. However, many users expect it to
     /// be supported. Therefore, when octal support is disabled, the error
     /// message will explicitly mention that backreferences aren't supported.
     ///
     /// Octal syntax is disabled by default.
     pub fn octal(mut self, yes: bool) -> SyntaxConfig {
         self.octal = yes;
         self
     }

     /// Returns whether "unicode" mode is enabled.
     pub fn get_unicode(&self) -> bool {
         self.unicode
     }

     /// Returns whether "case insensitive" mode is enabled.
     pub fn get_case_insensitive(&self) -> bool {
         self.case_insensitive
     }

     /// Returns whether "multi line" mode is enabled.
     pub fn get_multi_line(&self) -> bool {
         self.multi_line
     }

     /// Returns whether "dot matches new line" mode is enabled.
     pub fn get_dot_matches_new_line(&self) -> bool {
         self.dot_matches_new_line
     }

     /// Returns whether "swap greed" mode is enabled.
     pub fn get_swap_greed(&self) -> bool {
         self.swap_greed
     }

     /// Returns whether "ignore whitespace" mode is enabled.
     pub fn get_ignore_whitespace(&self) -> bool {
         self.ignore_whitespace
     }

     /// Returns whether UTF-8 mode is enabled.
     pub fn get_utf8(&self) -> bool {
         self.utf8
     }

     /// Returns the "nest limit" setting.
     pub fn get_nest_limit(&self) -> u32 {
         self.nest_limit
     }

     /// Returns whether "octal" mode is enabled.
     pub fn get_octal(&self) -> bool {
         self.octal
     }

     /// Applies this configuration to the given parser.
     pub(crate) fn apply(&self, builder: &mut ParserBuilder) {
         builder
             .unicode(self.unicode)
             .case_insensitive(self.case_insensitive)
             .multi_line(self.multi_line)
             .dot_matches_new_line(self.dot_matches_new_line)
             .swap_greed(self.swap_greed)
             .ignore_whitespace(self.ignore_whitespace)
             .allow_invalid_utf8(!self.utf8)
             .nest_limit(self.nest_limit)
             .octal(self.octal);
     }
 }

 impl Default for SyntaxConfig {
     fn default() -> SyntaxConfig {
         SyntaxConfig::new()
     }
 }
	use regex_syntax::ParserBuilder;

	/// A common set of configuration options that apply to the syntax of a regex.
	///
	/// This represents a group of configuration options that specifically apply
	/// to how the concrete syntax of a regular expression is interpreted. In
	/// particular, they are generally forwarded to the
	/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html)
	/// in the
	/// [`regex-syntax`](https://docs.rs/regex-syntax)
	/// crate when building a regex from its concrete syntax directly.
	///
	/// These options are defined as a group since they apply to every regex engine
	/// in this crate. Instead of re-defining them on every engine's builder, they
	/// are instead provided here as one cohesive unit.
	#[derive(Clone, Copy, Debug)]
	pub struct SyntaxConfig {
	case_insensitive: bool,
	multi_line: bool,
	dot_matches_new_line: bool,
	swap_greed: bool,
	ignore_whitespace: bool,
	unicode: bool,
	utf8: bool,
	nest_limit: u32,
	octal: bool,
	}

	impl SyntaxConfig {
	/// Return a new default syntax configuration.
	pub fn new() -> SyntaxConfig {
	// These defaults match the ones used in regex-syntax.
	SyntaxConfig {
	case_insensitive: false,
	multi_line: false,
	dot_matches_new_line: false,
	swap_greed: false,
	ignore_whitespace: false,
	unicode: true,
	utf8: true,
	nest_limit: 250,
	octal: false,
	}
	}

	/// Enable or disable the case insensitive flag by default.
	///
	/// When Unicode mode is enabled, case insensitivity is Unicode-aware.
	/// Specifically, it will apply the "simple" case folding rules as
	/// specified by Unicode.
	///
	/// By default this is disabled. It may alternatively be selectively
	/// enabled in the regular expression itself via the `i` flag.
	pub fn case_insensitive(mut self, yes: bool) -> SyntaxConfig {
	self.case_insensitive = yes;
	self
	}

	/// Enable or disable the multi-line matching flag by default.
	///
	/// When this is enabled, the `^` and `$` look-around assertions will
	/// match immediately after and immediately before a new line character,
	/// respectively. Note that the `\A` and `\z` look-around assertions are
	/// unaffected by this setting and always correspond to matching at the
	/// beginning and end of the input.
	///
	/// By default this is disabled. It may alternatively be selectively
	/// enabled in the regular expression itself via the `m` flag.
	pub fn multi_line(mut self, yes: bool) -> SyntaxConfig {
	self.multi_line = yes;
	self
	}

	/// Enable or disable the "dot matches any character" flag by default.
	///
	/// When this is enabled, `.` will match any character. When it's disabled,
	/// then `.` will match any character except for a new line character.
	///
	/// Note that `.` is impacted by whether the "unicode" setting is enabled
	/// or not. When Unicode is enabled (the defualt), `.` will match any UTF-8
	/// encoding of any Unicode scalar value (sans a new line, depending on
	/// whether this "dot matches new line" option is enabled). When Unicode
	/// mode is disabled, `.` will match any byte instead. Because of this,
	/// when Unicode mode is disabled, `.` can only be used when the "allow
	/// invalid UTF-8" option is enabled, since `.` could otherwise match
	/// invalid UTF-8.
	///
	/// By default this is disabled. It may alternatively be selectively
	/// enabled in the regular expression itself via the `s` flag.
	pub fn dot_matches_new_line(mut self, yes: bool) -> SyntaxConfig {
	self.dot_matches_new_line = yes;
	self
	}

	/// Enable or disable the "swap greed" flag by default.
	///
	/// When this is enabled, `.` (for example) will become ungreedy and `.?`
	/// will become greedy.
	///
	/// By default this is disabled. It may alternatively be selectively
	/// enabled in the regular expression itself via the `U` flag.
	pub fn swap_greed(mut self, yes: bool) -> SyntaxConfig {
	self.swap_greed = yes;
	self
	}

	/// Enable verbose mode in the regular expression.
	///
	/// When enabled, verbose mode permits insigificant whitespace in many
	/// places in the regular expression, as well as comments. Comments are
	/// started using `#` and continue until the end of the line.
	///
	/// By default, this is disabled. It may be selectively enabled in the
	/// regular expression by using the `x` flag regardless of this setting.
	pub fn ignore_whitespace(mut self, yes: bool) -> SyntaxConfig {
	self.ignore_whitespace = yes;
	self
	}

	/// Enable or disable the Unicode flag (`u`) by default.
	///
	/// By default this is enabled. It may alternatively be selectively
	/// disabled in the regular expression itself via the `u` flag.
	///
	/// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
	/// default), a regular expression will fail to parse if Unicode mode is
	/// disabled and a sub-expression could possibly match invalid UTF-8.
	///
	/// WARNING: Unicode mode can greatly increase the size of the compiled
	/// DFA, which can noticeably impact both memory usage and compilation
	/// time. This is especially noticeable if your regex contains character
	/// classes like `\w` that are impacted by whether Unicode is enabled or
	/// not. If Unicode is not necessary, you are encouraged to disable it.
	pub fn unicode(mut self, yes: bool) -> SyntaxConfig {
	self.unicode = yes;
	self
	}

	/// When disabled, the builder will permit the construction of a regular
	/// expression that may match invalid UTF-8.
	///
	/// For example, when [`SyntaxConfig::unicode`] is disabled, then
	/// expressions like `[^a]` may match invalid UTF-8 since they can match
	/// any single byte that is not `a`. By default, these sub-expressions
	/// are disallowed to avoid returning offsets that split a UTF-8
	/// encoded codepoint. However, in cases where matching at arbitrary
	/// locations is desired, this option can be disabled to permit all such
	/// sub-expressions.
	///
	/// When enabled (the default), the builder is guaranteed to produce a
	/// regex that will only ever match valid UTF-8 (otherwise, the builder
	/// will return an error).
	pub fn utf8(mut self, yes: bool) -> SyntaxConfig {
	self.utf8 = yes;
	self
	}

	/// Set the nesting limit used for the regular expression parser.
	///
	/// The nesting limit controls how deep the abstract syntax tree is allowed
	/// to be. If the AST exceeds the given limit (e.g., with too many nested
	/// groups), then an error is returned by the parser.
	///
	/// The purpose of this limit is to act as a heuristic to prevent stack
	/// overflow when building a finite automaton from a regular expression's
	/// abstract syntax tree. In particular, construction currently uses
	/// recursion. In the future, the implementation may stop using recursion
	/// and this option will no longer be necessary.
	///
	/// This limit is not checked until the entire AST is parsed. Therefore,
	/// if callers want to put a limit on the amount of heap space used, then
	/// they should impose a limit on the length, in bytes, of the concrete
	/// pattern string. In particular, this is viable since the parser will
	/// limit itself to heap space proportional to the lenth of the pattern
	/// string.
	///
	/// Note that a nest limit of `0` will return a nest limit error for most
	/// patterns but not all. For example, a nest limit of `0` permits `a` but
	/// not `ab`, since `ab` requires a concatenation AST item, which results
	/// in a nest depth of `1`. In general, a nest limit is not something that
	/// manifests in an obvious way in the concrete syntax, therefore, it
	/// should not be used in a granular way.
	pub fn nest_limit(mut self, limit: u32) -> SyntaxConfig {
	self.nest_limit = limit;
	self
	}

	/// Whether to support octal syntax or not.
	///
	/// Octal syntax is a little-known way of uttering Unicode codepoints in
	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
	/// `\141` are all equivalent regular expressions, where the last example
	/// shows octal syntax.
	///
	/// While supporting octal syntax isn't in and of itself a problem, it does
	/// make good error messages harder. That is, in PCRE based regex engines,
	/// syntax like `\1` invokes a backreference, which is explicitly
	/// unsupported in Rust's regex engine. However, many users expect it to
	/// be supported. Therefore, when octal support is disabled, the error
	/// message will explicitly mention that backreferences aren't supported.
	///
	/// Octal syntax is disabled by default.
	pub fn octal(mut self, yes: bool) -> SyntaxConfig {
	self.octal = yes;
	self
	}

	/// Returns whether "unicode" mode is enabled.
	pub fn get_unicode(&self) -> bool {
	self.unicode
	}

	/// Returns whether "case insensitive" mode is enabled.
	pub fn get_case_insensitive(&self) -> bool {
	self.case_insensitive
	}

	/// Returns whether "multi line" mode is enabled.
	pub fn get_multi_line(&self) -> bool {
	self.multi_line
	}

	/// Returns whether "dot matches new line" mode is enabled.
	pub fn get_dot_matches_new_line(&self) -> bool {
	self.dot_matches_new_line
	}

	/// Returns whether "swap greed" mode is enabled.
	pub fn get_swap_greed(&self) -> bool {
	self.swap_greed
	}

	/// Returns whether "ignore whitespace" mode is enabled.
	pub fn get_ignore_whitespace(&self) -> bool {
	self.ignore_whitespace
	}

	/// Returns whether UTF-8 mode is enabled.
	pub fn get_utf8(&self) -> bool {
	self.utf8
	}

	/// Returns the "nest limit" setting.
	pub fn get_nest_limit(&self) -> u32 {
	self.nest_limit
	}

	/// Returns whether "octal" mode is enabled.
	pub fn get_octal(&self) -> bool {
	self.octal
	}

	/// Applies this configuration to the given parser.
	pub(crate) fn apply(&self, builder: &mut ParserBuilder) {
	builder
	.unicode(self.unicode)
	.case_insensitive(self.case_insensitive)
	.multi_line(self.multi_line)
	.dot_matches_new_line(self.dot_matches_new_line)
	.swap_greed(self.swap_greed)
	.ignore_whitespace(self.ignore_whitespace)
	.allow_invalid_utf8(!self.utf8)
	.nest_limit(self.nest_limit)
	.octal(self.octal);
	}
	}

	impl Default for SyntaxConfig {
	fn default() -> SyntaxConfig {
	SyntaxConfig::new()
	}
	}