Blame - vendor/regex-syntax-0.6.22/src/parser.rs - toolchain/rustc

blob: 00f1391642cc47c2960df163417a032632c99ad9 [file] [log] [blame]

Chris Wailes	32f7835	2021-07-20 14:04:55 -0700	[diff] [blame]	1	use ast;
				2	use hir;
				3
				4	use Result;
				5
				6	/// A builder for a regular expression parser.
				7	///
				8	/// This builder permits modifying configuration options for the parser.
				9	///
				10	/// This type combines the builder options for both the
				11	/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
				12	/// and the
				13	/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
				14	#[derive(Clone, Debug, Default)]
				15	pub struct ParserBuilder {
				16	ast: ast::parse::ParserBuilder,
				17	hir: hir::translate::TranslatorBuilder,
				18	}
				19
				20	impl ParserBuilder {
				21	/// Create a new parser builder with a default configuration.
				22	pub fn new() -> ParserBuilder {
				23	ParserBuilder::default()
				24	}
				25
				26	/// Build a parser from this configuration with the given pattern.
				27	pub fn build(&self) -> Parser {
				28	Parser { ast: self.ast.build(), hir: self.hir.build() }
				29	}
				30
				31	/// Set the nesting limit for this parser.
				32	///
				33	/// The nesting limit controls how deep the abstract syntax tree is allowed
				34	/// to be. If the AST exceeds the given limit (e.g., with too many nested
				35	/// groups), then an error is returned by the parser.
				36	///
				37	/// The purpose of this limit is to act as a heuristic to prevent stack
				38	/// overflow for consumers that do structural induction on an `Ast` using
				39	/// explicit recursion. While this crate never does this (instead using
				40	/// constant stack space and moving the call stack to the heap), other
				41	/// crates may.
				42	///
				43	/// This limit is not checked until the entire Ast is parsed. Therefore,
				44	/// if callers want to put a limit on the amount of heap space used, then
				45	/// they should impose a limit on the length, in bytes, of the concrete
				46	/// pattern string. In particular, this is viable since this parser
				47	/// implementation will limit itself to heap space proportional to the
				48	/// lenth of the pattern string.
				49	///
				50	/// Note that a nest limit of `0` will return a nest limit error for most
				51	/// patterns but not all. For example, a nest limit of `0` permits `a` but
				52	/// not `ab`, since `ab` requires a concatenation, which results in a nest
				53	/// depth of `1`. In general, a nest limit is not something that manifests
				54	/// in an obvious way in the concrete syntax, therefore, it should not be
				55	/// used in a granular way.
				56	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
				57	self.ast.nest_limit(limit);
				58	self
				59	}
				60
				61	/// Whether to support octal syntax or not.
				62	///
				63	/// Octal syntax is a little-known way of uttering Unicode codepoints in
				64	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
				65	/// `\141` are all equivalent regular expressions, where the last example
				66	/// shows octal syntax.
				67	///
				68	/// While supporting octal syntax isn't in and of itself a problem, it does
				69	/// make good error messages harder. That is, in PCRE based regex engines,
				70	/// syntax like `\0` invokes a backreference, which is explicitly
				71	/// unsupported in Rust's regex engine. However, many users expect it to
				72	/// be supported. Therefore, when octal support is disabled, the error
				73	/// message will explicitly mention that backreferences aren't supported.
				74	///
				75	/// Octal syntax is disabled by default.
				76	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
				77	self.ast.octal(yes);
				78	self
				79	}
				80
				81	/// When enabled, the parser will permit the construction of a regular
				82	/// expression that may match invalid UTF-8.
				83	///
				84	/// When disabled (the default), the parser is guaranteed to produce
				85	/// an expression that will only ever match valid UTF-8 (otherwise, the
				86	/// parser will return an error).
				87	///
				88	/// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
				89	/// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
				90	/// the parser to return an error. Namely, a negated ASCII word boundary
				91	/// can result in matching positions that aren't valid UTF-8 boundaries.
				92	pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
				93	self.hir.allow_invalid_utf8(yes);
				94	self
				95	}
				96
				97	/// Enable verbose mode in the regular expression.
				98	///
				99	/// When enabled, verbose mode permits insigificant whitespace in many
				100	/// places in the regular expression, as well as comments. Comments are
				101	/// started using `#` and continue until the end of the line.
				102	///
				103	/// By default, this is disabled. It may be selectively enabled in the
				104	/// regular expression by using the `x` flag regardless of this setting.
				105	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
				106	self.ast.ignore_whitespace(yes);
				107	self
				108	}
				109
				110	/// Enable or disable the case insensitive flag by default.
				111	///
				112	/// By default this is disabled. It may alternatively be selectively
				113	/// enabled in the regular expression itself via the `i` flag.
				114	pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
				115	self.hir.case_insensitive(yes);
				116	self
				117	}
				118
				119	/// Enable or disable the multi-line matching flag by default.
				120	///
				121	/// By default this is disabled. It may alternatively be selectively
				122	/// enabled in the regular expression itself via the `m` flag.
				123	pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
				124	self.hir.multi_line(yes);
				125	self
				126	}
				127
				128	/// Enable or disable the "dot matches any character" flag by default.
				129	///
				130	/// By default this is disabled. It may alternatively be selectively
				131	/// enabled in the regular expression itself via the `s` flag.
				132	pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
				133	self.hir.dot_matches_new_line(yes);
				134	self
				135	}
				136
				137	/// Enable or disable the "swap greed" flag by default.
				138	///
				139	/// By default this is disabled. It may alternatively be selectively
				140	/// enabled in the regular expression itself via the `U` flag.
				141	pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
				142	self.hir.swap_greed(yes);
				143	self
				144	}
				145
				146	/// Enable or disable the Unicode flag (`u`) by default.
				147	///
				148	/// By default this is enabled. It may alternatively be selectively
				149	/// disabled in the regular expression itself via the `u` flag.
				150	///
				151	/// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
				152	/// default), a regular expression will fail to parse if Unicode mode is
				153	/// disabled and a sub-expression could possibly match invalid UTF-8.
				154	pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
				155	self.hir.unicode(yes);
				156	self
				157	}
				158	}
				159
				160	/// A convenience parser for regular expressions.
				161	///
				162	/// This parser takes as input a regular expression pattern string (the
				163	/// "concrete syntax") and returns a high-level intermediate representation
				164	/// (the HIR) suitable for most types of analysis. In particular, this parser
				165	/// hides the intermediate state of producing an AST (the "abstract syntax").
				166	/// The AST is itself far more complex than the HIR, so this parser serves as a
				167	/// convenience for never having to deal with it at all.
				168	///
				169	/// If callers have more fine grained use cases that need an AST, then please
				170	/// see the [`ast::parse`](ast/parse/index.html) module.
				171	///
				172	/// A `Parser` can be configured in more detail via a
				173	/// [`ParserBuilder`](struct.ParserBuilder.html).
				174	#[derive(Clone, Debug)]
				175	pub struct Parser {
				176	ast: ast::parse::Parser,
				177	hir: hir::translate::Translator,
				178	}
				179
				180	impl Parser {
				181	/// Create a new parser with a default configuration.
				182	///
				183	/// The parser can be run with `parse` method. The parse method returns
				184	/// a high level intermediate representation of the given regular
				185	/// expression.
				186	///
				187	/// To set configuration options on the parser, use
				188	/// [`ParserBuilder`](struct.ParserBuilder.html).
				189	pub fn new() -> Parser {
				190	ParserBuilder::new().build()
				191	}
				192
				193	/// Parse the regular expression into a high level intermediate
				194	/// representation.
				195	pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
				196	let ast = self.ast.parse(pattern)?;
				197	let hir = self.hir.translate(pattern, &ast)?;
				198	Ok(hir)
				199	}
				200	}