| use expect_test::{Expect, expect}; |
| |
| use super::*; |
| |
| fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) { |
| let s = &format!("r{}", s); |
| let mut cursor = Cursor::new(s); |
| cursor.bump(); |
| let res = cursor.raw_double_quoted_string(0); |
| assert_eq!(res, expected); |
| } |
| |
| #[test] |
| fn test_naked_raw_str() { |
| check_raw_str(r#""abc""#, Ok(0)); |
| } |
| |
| #[test] |
| fn test_raw_no_start() { |
| check_raw_str(r##""abc"#"##, Ok(0)); |
| } |
| |
| #[test] |
| fn test_too_many_terminators() { |
| // this error is handled in the parser later |
| check_raw_str(r###"#"abc"##"###, Ok(1)); |
| } |
| |
| #[test] |
| fn test_unterminated() { |
| check_raw_str( |
| r#"#"abc"#, |
| Err(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }), |
| ); |
| check_raw_str( |
| r###"##"abc"#"###, |
| Err(RawStrError::NoTerminator { |
| expected: 2, |
| found: 1, |
| possible_terminator_offset: Some(7), |
| }), |
| ); |
| // We're looking for "# not just any # |
| check_raw_str( |
| r###"##"abc#"###, |
| Err(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }), |
| ) |
| } |
| |
| #[test] |
| fn test_invalid_start() { |
| check_raw_str(r##"#~"abc"#"##, Err(RawStrError::InvalidStarter { bad_char: '~' })); |
| } |
| |
| #[test] |
| fn test_unterminated_no_pound() { |
| // https://github.com/rust-lang/rust/issues/70677 |
| check_raw_str( |
| r#"""#, |
| Err(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }), |
| ); |
| } |
| |
| #[test] |
| fn test_too_many_hashes() { |
| let max_count = u8::MAX; |
| let hashes1 = "#".repeat(max_count as usize); |
| let hashes2 = "#".repeat(max_count as usize + 1); |
| let middle = "\"abc\""; |
| let s1 = [&hashes1, middle, &hashes1].join(""); |
| let s2 = [&hashes2, middle, &hashes2].join(""); |
| |
| // Valid number of hashes (255 = 2^8 - 1 = u8::MAX). |
| check_raw_str(&s1, Ok(255)); |
| |
| // One more hash sign (256 = 2^8) becomes too many. |
| check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 })); |
| } |
| |
| #[test] |
| fn test_valid_shebang() { |
| // https://github.com/rust-lang/rust/issues/70528 |
| let input = "#!/usr/bin/rustrun\nlet x = 5;"; |
| assert_eq!(strip_shebang(input), Some(18)); |
| } |
| |
| #[test] |
| fn test_invalid_shebang_valid_rust_syntax() { |
| // https://github.com/rust-lang/rust/issues/70528 |
| let input = "#! [bad_attribute]"; |
| assert_eq!(strip_shebang(input), None); |
| } |
| |
| #[test] |
| fn test_shebang_second_line() { |
| // Because shebangs are interpreted by the kernel, they must be on the first line |
| let input = "\n#!/bin/bash"; |
| assert_eq!(strip_shebang(input), None); |
| } |
| |
| #[test] |
| fn test_shebang_space() { |
| let input = "#! /bin/bash"; |
| assert_eq!(strip_shebang(input), Some(input.len())); |
| } |
| |
| #[test] |
| fn test_shebang_empty_shebang() { |
| let input = "#! \n[attribute(foo)]"; |
| assert_eq!(strip_shebang(input), None); |
| } |
| |
| #[test] |
| fn test_invalid_shebang_comment() { |
| let input = "#!//bin/ami/a/comment\n["; |
| assert_eq!(strip_shebang(input), None) |
| } |
| |
| #[test] |
| fn test_invalid_shebang_another_comment() { |
| let input = "#!/*bin/ami/a/comment*/\n[attribute"; |
| assert_eq!(strip_shebang(input), None) |
| } |
| |
| #[test] |
| fn test_shebang_valid_rust_after() { |
| let input = "#!/*bin/ami/a/comment*/\npub fn main() {}"; |
| assert_eq!(strip_shebang(input), Some(23)) |
| } |
| |
| #[test] |
| fn test_shebang_followed_by_attrib() { |
| let input = "#!/bin/rust-scripts\n#![allow_unused(true)]"; |
| assert_eq!(strip_shebang(input), Some(19)); |
| } |
| |
| fn check_lexing(src: &str, expect: Expect) { |
| let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect(); |
| expect.assert_eq(&actual) |
| } |
| |
| #[test] |
| fn smoke_test() { |
| check_lexing("/* my source file */ fn main() { println!(\"zebra\"); }\n", expect![[r#" |
| Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Ident, len: 2 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Ident, len: 4 } |
| Token { kind: OpenParen, len: 1 } |
| Token { kind: CloseParen, len: 1 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: OpenBrace, len: 1 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Ident, len: 7 } |
| Token { kind: Bang, len: 1 } |
| Token { kind: OpenParen, len: 1 } |
| Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 } |
| Token { kind: CloseParen, len: 1 } |
| Token { kind: Semi, len: 1 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: CloseBrace, len: 1 } |
| Token { kind: Whitespace, len: 1 } |
| "#]]) |
| } |
| |
| #[test] |
| fn comment_flavors() { |
| check_lexing( |
| r" |
| // line |
| //// line as well |
| /// outer doc line |
| //! inner doc line |
| /* block */ |
| /**/ |
| /*** also block */ |
| /** outer doc block */ |
| /*! inner doc block */ |
| ", |
| expect![[r#" |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: LineComment { doc_style: None }, len: 7 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: LineComment { doc_style: None }, len: 17 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } |
| Token { kind: Whitespace, len: 1 } |
| "#]], |
| ) |
| } |
| |
| #[test] |
| fn nested_block_comments() { |
| check_lexing("/* /* */ */'a'", expect![[r#" |
| Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } |
| Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } |
| "#]]) |
| } |
| |
| #[test] |
| fn characters() { |
| check_lexing("'a' ' ' '\\n'", expect![[r#" |
| Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 } |
| "#]]); |
| } |
| |
| #[test] |
| fn lifetime() { |
| check_lexing("'abc", expect![[r#" |
| Token { kind: Lifetime { starts_with_number: false }, len: 4 } |
| "#]]); |
| } |
| |
| #[test] |
| fn raw_string() { |
| check_lexing("r###\"\"#a\\b\x00c\"\"###", expect![[r#" |
| Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 } |
| "#]]) |
| } |
| |
| #[test] |
| fn literal_suffixes() { |
| check_lexing( |
| r####" |
| 'a' |
| b'a' |
| "a" |
| b"a" |
| 1234 |
| 0b101 |
| 0xABC |
| 1.0 |
| 1.0e10 |
| 2us |
| r###"raw"###suffix |
| br###"raw"###suffix |
| "####, |
| expect![[r#" |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 12 }, len: 18 } |
| Token { kind: Whitespace, len: 1 } |
| Token { kind: Literal { kind: RawByteStr { n_hashes: Some(3) }, suffix_start: 13 }, len: 19 } |
| Token { kind: Whitespace, len: 1 } |
| "#]], |
| ) |
| } |