| //===--- TokenTest.cpp ----------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang-pseudo/Token.h" |
| #include "clang/Basic/LangOptions.h" |
| #include "clang/Basic/TokenKinds.h" |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| |
| namespace clang { |
| namespace pseudo { |
| namespace { |
| |
| using testing::AllOf; |
| using testing::ElementsAre; |
| using testing::ElementsAreArray; |
| using testing::Not; |
| |
| MATCHER_P2(token, Text, Kind, "") { |
| return arg.Kind == Kind && arg.text() == Text; |
| } |
| |
| MATCHER_P(hasFlag, Flag, "") { return arg.flag(Flag); } |
| |
| MATCHER_P2(lineIndent, Line, Indent, "") { |
| return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent; |
| } |
| |
| MATCHER_P(originalIndex, index, "") { |
| return arg.OriginalIndex == (Token::Index)index; |
| } |
| |
| TEST(TokenTest, Lex) { |
| LangOptions Opts; |
| std::string Code = R"cpp( |
| #include <stdio.h> |
| int main() { |
| return 42; // the answer |
| } |
| )cpp"; |
| TokenStream Raw = lex(Code, Opts); |
| ASSERT_TRUE(Raw.isFinalized()); |
| EXPECT_THAT(Raw.tokens(), |
| ElementsAreArray({ |
| // Lexing of directives is weird, especially <angled> strings. |
| token("#", tok::hash), |
| token("include", tok::raw_identifier), |
| token("<", tok::less), |
| token("stdio", tok::raw_identifier), |
| token(".", tok::period), |
| token("h", tok::raw_identifier), |
| token(">", tok::greater), |
| |
| token("int", tok::raw_identifier), |
| token("main", tok::raw_identifier), |
| token("(", tok::l_paren), |
| token(")", tok::r_paren), |
| token("{", tok::l_brace), |
| token("return", tok::raw_identifier), |
| token("42", tok::numeric_constant), |
| token(";", tok::semi), |
| token("// the answer", tok::comment), |
| token("}", tok::r_brace), |
| })); |
| |
| TokenStream Cooked = cook(Raw, Opts); |
| ASSERT_TRUE(Cooked.isFinalized()); |
| EXPECT_THAT(Cooked.tokens(), |
| ElementsAreArray({ |
| // Cooked identifier types in directives are not meaningful. |
| token("#", tok::hash), |
| token("include", tok::identifier), |
| token("<", tok::less), |
| token("stdio", tok::identifier), |
| token(".", tok::period), |
| token("h", tok::identifier), |
| token(">", tok::greater), |
| |
| token("int", tok::kw_int), |
| token("main", tok::identifier), |
| token("(", tok::l_paren), |
| token(")", tok::r_paren), |
| token("{", tok::l_brace), |
| token("return", tok::kw_return), |
| token("42", tok::numeric_constant), |
| token(";", tok::semi), |
| token("// the answer", tok::comment), |
| token("}", tok::r_brace), |
| })); |
| // Check raw tokens point back into original source code. |
| EXPECT_EQ(Raw.tokens().front().text().begin(), &Code[Code.find('#')]); |
| } |
| |
| TEST(TokenTest, LineContinuation) { |
| LangOptions Opts; |
| std::string Code = R"cpp( |
| one_\ |
| token |
| two \ |
| tokens |
| )cpp"; |
| TokenStream Raw = lex(Code, Opts); |
| EXPECT_THAT( |
| Raw.tokens(), |
| ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier), |
| hasFlag(LexFlags::StartsPPLine), |
| hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0), |
| originalIndex(0)), |
| AllOf(token("two", tok::raw_identifier), |
| hasFlag(LexFlags::StartsPPLine), |
| Not(hasFlag(LexFlags::NeedsCleaning)), |
| originalIndex(1)), |
| AllOf(token("\\\ntokens", tok::raw_identifier), |
| Not(hasFlag(LexFlags::StartsPPLine)), |
| hasFlag(LexFlags::NeedsCleaning), originalIndex(2)))); |
| |
| TokenStream Cooked = cook(Raw, Opts); |
| EXPECT_THAT( |
| Cooked.tokens(), |
| ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0), |
| originalIndex(0)), |
| AllOf(token("two", tok::identifier), originalIndex(1)), |
| AllOf(token("tokens", tok::identifier), originalIndex(2)))); |
| } |
| |
| TEST(TokenTest, EncodedCharacters) { |
| LangOptions Opts; |
| Opts.Trigraphs = true; |
| Opts.Digraphs = true; |
| Opts.C99 = true; // UCNs |
| Opts.CXXOperatorNames = true; |
| std::string Code = R"(and <: ??! '??=' \u00E9)"; |
| TokenStream Raw = lex(Code, Opts); |
| EXPECT_THAT( |
| Raw.tokens(), |
| ElementsAre( // and is not recognized as && until cook(). |
| AllOf(token("and", tok::raw_identifier), |
| Not(hasFlag(LexFlags::NeedsCleaning))), |
| // Digraphs are just different spellings of tokens. |
| AllOf(token("<:", tok::l_square), |
| Not(hasFlag(LexFlags::NeedsCleaning))), |
| // Trigraps are interpreted, still need text cleaning. |
| AllOf(token(R"(??!)", tok::pipe), hasFlag(LexFlags::NeedsCleaning)), |
| // Trigraphs must be substituted inside constants too. |
| AllOf(token(R"('??=')", tok::char_constant), |
| hasFlag(LexFlags::NeedsCleaning)), |
| // UCNs need substitution. |
| AllOf(token(R"(\u00E9)", tok::raw_identifier), |
| hasFlag(LexFlags::NeedsCleaning)))); |
| |
| TokenStream Cooked = cook(Raw, Opts); |
| EXPECT_THAT( |
| Cooked.tokens(), |
| ElementsAre(token("and", tok::ampamp), // alternate spelling recognized |
| token("<:", tok::l_square), |
| token("|", tok::pipe), // trigraph substituted |
| token("'#'", tok::char_constant), // trigraph substituted |
| token("é", tok::identifier))); // UCN substituted |
| } |
| |
| TEST(TokenTest, Indentation) { |
| LangOptions Opts; |
| std::string Code = R"cpp( hello world |
| no_indent \ |
| line_was_continued |
| )cpp"; |
| TokenStream Raw = lex(Code, Opts); |
| EXPECT_THAT(Raw.tokens(), ElementsAreArray({ |
| lineIndent(0, 3), // hello |
| lineIndent(0, 3), // world |
| lineIndent(1, 0), // no_indent |
| lineIndent(2, 2), // line_was_continued |
| })); |
| } |
| |
| TEST(TokenTest, SplitGreaterGreater) { |
| LangOptions Opts; |
| std::string Code = R"cpp( |
| >> // split |
| // >> with an escaped newline in the middle, split |
| >\ |
| > |
| >>= // not split |
| )cpp"; |
| TokenStream Cook = cook(lex(Code, Opts), Opts); |
| TokenStream Split = stripComments(Cook); |
| EXPECT_THAT(Split.tokens(), |
| ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)), |
| AllOf(token(">", tok::greater), originalIndex(0)), |
| // Token 1 and 2 are comments. |
| AllOf(token(">", tok::greater), originalIndex(3)), |
| AllOf(token(">", tok::greater), originalIndex(3)), |
| AllOf(token(">>=", tok::greatergreaterequal), |
| originalIndex(4)))); |
| } |
| |
| TEST(TokenTest, DropComments) { |
| LangOptions Opts; |
| std::string Code = R"cpp( |
| // comment |
| int /*abc*/; |
| )cpp"; |
| TokenStream Raw = cook(lex(Code, Opts), Opts); |
| TokenStream Stripped = stripComments(Raw); |
| EXPECT_THAT( |
| Raw.tokens(), |
| ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)), |
| AllOf(token("int", tok::kw_int), originalIndex(1)), |
| AllOf(token("/*abc*/", tok::comment), originalIndex(2)), |
| AllOf(token(";", tok::semi), originalIndex(3)))); |
| |
| EXPECT_THAT(Stripped.tokens(), |
| ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)), |
| AllOf(token(";", tok::semi), originalIndex(3)))); |
| } |
| |
| } // namespace |
| } // namespace pseudo |
| } // namespace clang |