| use alloc::{ |
| format, |
| string::{String, ToString}, |
| vec, |
| vec::Vec, |
| }; |
| |
| use crate::{ast, hir}; |
| |
| /// This error type encompasses any error that can be returned by this crate. |
| /// |
| /// This error type is marked as `non_exhaustive`. This means that adding a |
| /// new variant is not considered a breaking change. |
| #[non_exhaustive] |
| #[derive(Clone, Debug, Eq, PartialEq)] |
| pub enum Error { |
| /// An error that occurred while translating concrete syntax into abstract |
| /// syntax (AST). |
| Parse(ast::Error), |
| /// An error that occurred while translating abstract syntax into a high |
| /// level intermediate representation (HIR). |
| Translate(hir::Error), |
| } |
| |
| impl From<ast::Error> for Error { |
| fn from(err: ast::Error) -> Error { |
| Error::Parse(err) |
| } |
| } |
| |
| impl From<hir::Error> for Error { |
| fn from(err: hir::Error) -> Error { |
| Error::Translate(err) |
| } |
| } |
| |
| #[cfg(feature = "std")] |
| impl std::error::Error for Error {} |
| |
| impl core::fmt::Display for Error { |
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| match *self { |
| Error::Parse(ref x) => x.fmt(f), |
| Error::Translate(ref x) => x.fmt(f), |
| } |
| } |
| } |
| |
| /// A helper type for formatting nice error messages. |
| /// |
| /// This type is responsible for reporting regex parse errors in a nice human |
| /// readable format. Most of its complexity is from interspersing notational |
| /// markers pointing out the position where an error occurred. |
| #[derive(Debug)] |
| pub struct Formatter<'e, E> { |
| /// The original regex pattern in which the error occurred. |
| pattern: &'e str, |
| /// The error kind. It must impl fmt::Display. |
| err: &'e E, |
| /// The primary span of the error. |
| span: &'e ast::Span, |
| /// An auxiliary and optional span, in case the error needs to point to |
| /// two locations (e.g., when reporting a duplicate capture group name). |
| aux_span: Option<&'e ast::Span>, |
| } |
| |
| impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> { |
| fn from(err: &'e ast::Error) -> Self { |
| Formatter { |
| pattern: err.pattern(), |
| err: err.kind(), |
| span: err.span(), |
| aux_span: err.auxiliary_span(), |
| } |
| } |
| } |
| |
| impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { |
| fn from(err: &'e hir::Error) -> Self { |
| Formatter { |
| pattern: err.pattern(), |
| err: err.kind(), |
| span: err.span(), |
| aux_span: None, |
| } |
| } |
| } |
| |
| impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> { |
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| let spans = Spans::from_formatter(self); |
| if self.pattern.contains('\n') { |
| let divider = repeat_char('~', 79); |
| |
| writeln!(f, "regex parse error:")?; |
| writeln!(f, "{}", divider)?; |
| let notated = spans.notate(); |
| write!(f, "{}", notated)?; |
| writeln!(f, "{}", divider)?; |
| // If we have error spans that cover multiple lines, then we just |
| // note the line numbers. |
| if !spans.multi_line.is_empty() { |
| let mut notes = vec![]; |
| for span in &spans.multi_line { |
| notes.push(format!( |
| "on line {} (column {}) through line {} (column {})", |
| span.start.line, |
| span.start.column, |
| span.end.line, |
| span.end.column - 1 |
| )); |
| } |
| writeln!(f, "{}", notes.join("\n"))?; |
| } |
| write!(f, "error: {}", self.err)?; |
| } else { |
| writeln!(f, "regex parse error:")?; |
| let notated = Spans::from_formatter(self).notate(); |
| write!(f, "{}", notated)?; |
| write!(f, "error: {}", self.err)?; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// This type represents an arbitrary number of error spans in a way that makes |
| /// it convenient to notate the regex pattern. ("Notate" means "point out |
| /// exactly where the error occurred in the regex pattern.") |
| /// |
| /// Technically, we can only ever have two spans given our current error |
| /// structure. However, after toiling with a specific algorithm for handling |
| /// two spans, it became obvious that an algorithm to handle an arbitrary |
| /// number of spans was actually much simpler. |
| struct Spans<'p> { |
| /// The original regex pattern string. |
| pattern: &'p str, |
| /// The total width that should be used for line numbers. The width is |
| /// used for left padding the line numbers for alignment. |
| /// |
| /// A value of `0` means line numbers should not be displayed. That is, |
| /// the pattern is itself only one line. |
| line_number_width: usize, |
| /// All error spans that occur on a single line. This sequence always has |
| /// length equivalent to the number of lines in `pattern`, where the index |
| /// of the sequence represents a line number, starting at `0`. The spans |
| /// in each line are sorted in ascending order. |
| by_line: Vec<Vec<ast::Span>>, |
| /// All error spans that occur over one or more lines. That is, the start |
| /// and end position of the span have different line numbers. The spans are |
| /// sorted in ascending order. |
| multi_line: Vec<ast::Span>, |
| } |
| |
| impl<'p> Spans<'p> { |
| /// Build a sequence of spans from a formatter. |
| fn from_formatter<'e, E: core::fmt::Display>( |
| fmter: &'p Formatter<'e, E>, |
| ) -> Spans<'p> { |
| let mut line_count = fmter.pattern.lines().count(); |
| // If the pattern ends with a `\n` literal, then our line count is |
| // off by one, since a span can occur immediately after the last `\n`, |
| // which is consider to be an additional line. |
| if fmter.pattern.ends_with('\n') { |
| line_count += 1; |
| } |
| let line_number_width = |
| if line_count <= 1 { 0 } else { line_count.to_string().len() }; |
| let mut spans = Spans { |
| pattern: &fmter.pattern, |
| line_number_width, |
| by_line: vec![vec![]; line_count], |
| multi_line: vec![], |
| }; |
| spans.add(fmter.span.clone()); |
| if let Some(span) = fmter.aux_span { |
| spans.add(span.clone()); |
| } |
| spans |
| } |
| |
| /// Add the given span to this sequence, putting it in the right place. |
| fn add(&mut self, span: ast::Span) { |
| // This is grossly inefficient since we sort after each add, but right |
| // now, we only ever add two spans at most. |
| if span.is_one_line() { |
| let i = span.start.line - 1; // because lines are 1-indexed |
| self.by_line[i].push(span); |
| self.by_line[i].sort(); |
| } else { |
| self.multi_line.push(span); |
| self.multi_line.sort(); |
| } |
| } |
| |
| /// Notate the pattern string with carents (`^`) pointing at each span |
| /// location. This only applies to spans that occur within a single line. |
| fn notate(&self) -> String { |
| let mut notated = String::new(); |
| for (i, line) in self.pattern.lines().enumerate() { |
| if self.line_number_width > 0 { |
| notated.push_str(&self.left_pad_line_number(i + 1)); |
| notated.push_str(": "); |
| } else { |
| notated.push_str(" "); |
| } |
| notated.push_str(line); |
| notated.push('\n'); |
| if let Some(notes) = self.notate_line(i) { |
| notated.push_str(¬es); |
| notated.push('\n'); |
| } |
| } |
| notated |
| } |
| |
| /// Return notes for the line indexed at `i` (zero-based). If there are no |
| /// spans for the given line, then `None` is returned. Otherwise, an |
| /// appropriately space padded string with correctly positioned `^` is |
| /// returned, accounting for line numbers. |
| fn notate_line(&self, i: usize) -> Option<String> { |
| let spans = &self.by_line[i]; |
| if spans.is_empty() { |
| return None; |
| } |
| let mut notes = String::new(); |
| for _ in 0..self.line_number_padding() { |
| notes.push(' '); |
| } |
| let mut pos = 0; |
| for span in spans { |
| for _ in pos..(span.start.column - 1) { |
| notes.push(' '); |
| pos += 1; |
| } |
| let note_len = span.end.column.saturating_sub(span.start.column); |
| for _ in 0..core::cmp::max(1, note_len) { |
| notes.push('^'); |
| pos += 1; |
| } |
| } |
| Some(notes) |
| } |
| |
| /// Left pad the given line number with spaces such that it is aligned with |
| /// other line numbers. |
| fn left_pad_line_number(&self, n: usize) -> String { |
| let n = n.to_string(); |
| let pad = self.line_number_width.checked_sub(n.len()).unwrap(); |
| let mut result = repeat_char(' ', pad); |
| result.push_str(&n); |
| result |
| } |
| |
| /// Return the line number padding beginning at the start of each line of |
| /// the pattern. |
| /// |
| /// If the pattern is only one line, then this returns a fixed padding |
| /// for visual indentation. |
| fn line_number_padding(&self) -> usize { |
| if self.line_number_width == 0 { |
| 4 |
| } else { |
| 2 + self.line_number_width |
| } |
| } |
| } |
| |
| fn repeat_char(c: char, count: usize) -> String { |
| core::iter::repeat(c).take(count).collect() |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use alloc::string::ToString; |
| |
| use crate::ast::parse::Parser; |
| |
| fn assert_panic_message(pattern: &str, expected_msg: &str) { |
| let result = Parser::new().parse(pattern); |
| match result { |
| Ok(_) => { |
| panic!("regex should not have parsed"); |
| } |
| Err(err) => { |
| assert_eq!(err.to_string(), expected_msg.trim()); |
| } |
| } |
| } |
| |
| // See: https://github.com/rust-lang/regex/issues/464 |
| #[test] |
| fn regression_464() { |
| let err = Parser::new().parse("a{\n").unwrap_err(); |
| // This test checks that the error formatter doesn't panic. |
| assert!(!err.to_string().is_empty()); |
| } |
| |
| // See: https://github.com/rust-lang/regex/issues/545 |
| #[test] |
| fn repetition_quantifier_expects_a_valid_decimal() { |
| assert_panic_message( |
| r"\\u{[^}]*}", |
| r#" |
| regex parse error: |
| \\u{[^}]*} |
| ^ |
| error: repetition quantifier expects a valid decimal |
| "#, |
| ); |
| } |
| } |