Importing rustc-1.56.0
Change-Id: I98941481270706fa55f8fb2cb91686ae3bd30f38
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 4cb2a6c..b64a891c 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -273,24 +273,14 @@
/// a formal definition of valid identifier name.
pub fn is_id_start(c: char) -> bool {
// This is XID_Start OR '_' (which formally is not a XID_Start).
- // We also add fast-path for ascii idents
- ('a'..='z').contains(&c)
- || ('A'..='Z').contains(&c)
- || c == '_'
- || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
+ c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
}
/// True if `c` is valid as a non-first character of an identifier.
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
/// a formal definition of valid identifier name.
pub fn is_id_continue(c: char) -> bool {
- // This is exactly XID_Continue.
- // We also add fast-path for ascii idents
- ('a'..='z').contains(&c)
- || ('A'..='Z').contains(&c)
- || ('0'..='9').contains(&c)
- || c == '_'
- || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
+ unicode_xid::UnicodeXID::is_xid_continue(c)
}
/// The passed string is lexically an identifier.
@@ -499,7 +489,7 @@
// Start is already eaten, eat the rest of identifier.
self.eat_while(is_id_continue);
// Known prefixes must have been handled earlier. So if
- // we see a prefix here, it is definitely a unknown prefix.
+ // we see a prefix here, it is definitely an unknown prefix.
match self.first() {
'#' | '"' | '\'' => UnknownPrefix,
_ => Ident,
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index b4dd0fc..b970c9e 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -7,7 +7,7 @@
#[cfg(test)]
mod tests;
-/// Errors that can occur during string unescaping.
+/// Errors and warnings that can occur during string unescaping.
#[derive(Debug, PartialEq, Eq)]
pub enum EscapeError {
/// Expected 1 char, but 0 were found.
@@ -56,6 +56,24 @@
NonAsciiCharInByte,
/// Non-ascii character in byte string literal.
NonAsciiCharInByteString,
+
+ /// After a line ending with '\', the next line contains whitespace
+ /// characters that are not skipped.
+ UnskippedWhitespaceWarning,
+
+ /// After a line ending with '\', multiple lines are skipped.
+ MultipleSkippedLinesWarning,
+}
+
+impl EscapeError {
+ /// Returns true for actual errors, as opposed to warnings.
+ pub fn is_fatal(&self) -> bool {
+ match self {
+ EscapeError::UnskippedWhitespaceWarning => false,
+ EscapeError::MultipleSkippedLinesWarning => false,
+ _ => true,
+ }
+ }
}
/// Takes a contents of a literal (without quotes) and produces a
@@ -283,7 +301,7 @@
// if unescaped '\' character is followed by '\n'.
// For details see [Rust language reference]
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
- skip_ascii_whitespace(&mut chars);
+ skip_ascii_whitespace(&mut chars, start, callback);
continue;
}
_ => scan_escape(first_char, &mut chars, mode),
@@ -297,13 +315,30 @@
callback(start..end, unescaped_char);
}
- fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
- let str = chars.as_str();
- let first_non_space = str
+ fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+ where
+ F: FnMut(Range<usize>, Result<char, EscapeError>),
+ {
+ let tail = chars.as_str();
+ let first_non_space = tail
.bytes()
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
- .unwrap_or(str.len());
- *chars = str[first_non_space..].chars()
+ .unwrap_or(tail.len());
+ if tail[1..first_non_space].contains('\n') {
+ // The +1 accounts for the escaping slash.
+ let end = start + first_non_space + 1;
+ callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
+ }
+ let tail = &tail[first_non_space..];
+ if let Some(c) = tail.chars().nth(0) {
+ // For error reporting, we would like the span to contain the character that was not
+ // skipped. The +1 is necessary to account for the leading \ that started the escape.
+ let end = start + first_non_space + c.len_utf8() + 1;
+ if c.is_whitespace() {
+ callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
+ }
+ }
+ *chars = tail.chars();
}
}
diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs
index f2b751a..fa61554 100644
--- a/compiler/rustc_lexer/src/unescape/tests.rs
+++ b/compiler/rustc_lexer/src/unescape/tests.rs
@@ -99,6 +99,30 @@
}
#[test]
+fn test_unescape_str_warn() {
+ fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
+ let mut unescaped = Vec::with_capacity(literal.len());
+ unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+ assert_eq!(unescaped, expected);
+ }
+
+ // Check we can handle escaped newlines at the end of a file.
+ check("\\\n", &[]);
+ check("\\\n ", &[]);
+
+ check(
+ "\\\n \u{a0} x",
+ &[
+ (0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
+ (3..5, Ok('\u{a0}')),
+ (5..6, Ok(' ')),
+ (6..7, Ok('x')),
+ ],
+ );
+ check("\\\n \n x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]);
+}
+
+#[test]
fn test_unescape_str_good() {
fn check(literal_text: &str, expected: &str) {
let mut buf = Ok(String::with_capacity(literal_text.len()));