Importing rustc-1.56.0 Change-Id: I98941481270706fa55f8fb2cb91686ae3bd30f38

commit: bcf972c0208490b0eb3ce3c170c2db486ba945b3 [log] [tgz]
author: Chris Wailes <[email protected]> Thu Oct 21 11:03:28 2021 -0700
committer: Chris Wailes <[email protected]> Thu Oct 21 11:09:35 2021 -0700
tree: b1c1dbb1b4ab51d68cdf0d952fd8b6df20856cbd
parent: 54272acac043c1dedfb7db7420545b31ec1ac51f [diff]
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 4cb2a6c..b64a891c 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs

@@ -273,24 +273,14 @@
 /// a formal definition of valid identifier name.
 pub fn is_id_start(c: char) -> bool {
     // This is XID_Start OR '_' (which formally is not a XID_Start).
-    // We also add fast-path for ascii idents
-    ('a'..='z').contains(&c)
-        || ('A'..='Z').contains(&c)
-        || c == '_'
-        || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
+    c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
 }
 
 /// True if `c` is valid as a non-first character of an identifier.
 /// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
 /// a formal definition of valid identifier name.
 pub fn is_id_continue(c: char) -> bool {
-    // This is exactly XID_Continue.
-    // We also add fast-path for ascii idents
-    ('a'..='z').contains(&c)
-        || ('A'..='Z').contains(&c)
-        || ('0'..='9').contains(&c)
-        || c == '_'
-        || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
+    unicode_xid::UnicodeXID::is_xid_continue(c)
 }
 
 /// The passed string is lexically an identifier.
@@ -499,7 +489,7 @@
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(is_id_continue);
         // Known prefixes must have been handled earlier. So if
-        // we see a prefix here, it is definitely a unknown prefix.
+        // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
             '#' | '"' | '\'' => UnknownPrefix,
             _ => Ident,

diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index b4dd0fc..b970c9e 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs

@@ -7,7 +7,7 @@
 #[cfg(test)]
 mod tests;
 
-/// Errors that can occur during string unescaping.
+/// Errors and warnings that can occur during string unescaping.
 #[derive(Debug, PartialEq, Eq)]
 pub enum EscapeError {
     /// Expected 1 char, but 0 were found.
@@ -56,6 +56,24 @@
     NonAsciiCharInByte,
     /// Non-ascii character in byte string literal.
     NonAsciiCharInByteString,
+
+    /// After a line ending with '\', the next line contains whitespace
+    /// characters that are not skipped.
+    UnskippedWhitespaceWarning,
+
+    /// After a line ending with '\', multiple lines are skipped.
+    MultipleSkippedLinesWarning,
+}
+
+impl EscapeError {
+    /// Returns true for actual errors, as opposed to warnings.
+    pub fn is_fatal(&self) -> bool {
+        match self {
+            EscapeError::UnskippedWhitespaceWarning => false,
+            EscapeError::MultipleSkippedLinesWarning => false,
+            _ => true,
+        }
+    }
 }
 
 /// Takes a contents of a literal (without quotes) and produces a
@@ -283,7 +301,7 @@
                         // if unescaped '\' character is followed by '\n'.
                         // For details see [Rust language reference]
                         // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars);
+                        skip_ascii_whitespace(&mut chars, start, callback);
                         continue;
                     }
                     _ => scan_escape(first_char, &mut chars, mode),
@@ -297,13 +315,30 @@
         callback(start..end, unescaped_char);
     }
 
-    fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
-        let str = chars.as_str();
-        let first_non_space = str
+    fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+    where
+        F: FnMut(Range<usize>, Result<char, EscapeError>),
+    {
+        let tail = chars.as_str();
+        let first_non_space = tail
             .bytes()
             .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-            .unwrap_or(str.len());
-        *chars = str[first_non_space..].chars()
+            .unwrap_or(tail.len());
+        if tail[1..first_non_space].contains('\n') {
+            // The +1 accounts for the escaping slash.
+            let end = start + first_non_space + 1;
+            callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
+        }
+        let tail = &tail[first_non_space..];
+        if let Some(c) = tail.chars().nth(0) {
+            // For error reporting, we would like the span to contain the character that was not
+            // skipped.  The +1 is necessary to account for the leading \ that started the escape.
+            let end = start + first_non_space + c.len_utf8() + 1;
+            if c.is_whitespace() {
+                callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
+            }
+        }
+        *chars = tail.chars();
     }
 }
 

diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs
index f2b751a..fa61554 100644
--- a/compiler/rustc_lexer/src/unescape/tests.rs
+++ b/compiler/rustc_lexer/src/unescape/tests.rs

@@ -99,6 +99,30 @@
 }
 
 #[test]
+fn test_unescape_str_warn() {
+    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
+        let mut unescaped = Vec::with_capacity(literal.len());
+        unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+        assert_eq!(unescaped, expected);
+    }
+
+    // Check we can handle escaped newlines at the end of a file.
+    check("\\\n", &[]);
+    check("\\\n ", &[]);
+
+    check(
+        "\\\n \u{a0} x",
+        &[
+            (0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
+            (3..5, Ok('\u{a0}')),
+            (5..6, Ok(' ')),
+            (6..7, Ok('x')),
+        ],
+    );
+    check("\\\n  \n  x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]);
+}
+
+#[test]
 fn test_unescape_str_good() {
     fn check(literal_text: &str, expected: &str) {
         let mut buf = Ok(String::with_capacity(literal_text.len()));
commit	bcf972c0208490b0eb3ce3c170c2db486ba945b3	[log] [tgz]
author	Chris Wailes <[email protected]>	Thu Oct 21 11:03:28 2021 -0700
committer	Chris Wailes <[email protected]>	Thu Oct 21 11:09:35 2021 -0700
tree	b1c1dbb1b4ab51d68cdf0d952fd8b6df20856cbd
parent	54272acac043c1dedfb7db7420545b31ec1ac51f [diff]