| //===-- ClangHighlighter.cpp ----------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ClangHighlighter.h" |
| |
| #include "lldb/Host/FileSystem.h" |
| #include "lldb/Target/Language.h" |
| #include "lldb/Utility/AnsiTerminal.h" |
| #include "lldb/Utility/StreamString.h" |
| |
| #include "clang/Basic/FileManager.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Lex/Lexer.h" |
| #include "llvm/ADT/StringSet.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| |
| using namespace lldb_private; |
| |
| bool ClangHighlighter::isKeyword(llvm::StringRef token) const { |
| return keywords.find(token) != keywords.end(); |
| } |
| |
| ClangHighlighter::ClangHighlighter() { |
| #define KEYWORD(X, N) keywords.insert(#X); |
| #include "clang/Basic/TokenKinds.def" |
| } |
| |
| /// Determines which style should be applied to the given token. |
| /// \param highlighter |
| /// The current highlighter that should use the style. |
| /// \param token |
| /// The current token. |
| /// \param tok_str |
| /// The string in the source code the token represents. |
| /// \param options |
| /// The style we use for coloring the source code. |
| /// \param in_pp_directive |
| /// If we are currently in a preprocessor directive. NOTE: This is |
| /// passed by reference and will be updated if the current token starts |
| /// or ends a preprocessor directive. |
| /// \return |
| /// The ColorStyle that should be applied to the token. |
| static HighlightStyle::ColorStyle |
| determineClangStyle(const ClangHighlighter &highlighter, |
| const clang::Token &token, llvm::StringRef tok_str, |
| const HighlightStyle &options, bool &in_pp_directive) { |
| using namespace clang; |
| |
| if (token.is(tok::comment)) { |
| // If we were in a preprocessor directive before, we now left it. |
| in_pp_directive = false; |
| return options.comment; |
| } else if (in_pp_directive || token.getKind() == tok::hash) { |
| // Let's assume that the rest of the line is a PP directive. |
| in_pp_directive = true; |
| // Preprocessor directives are hard to match, so we have to hack this in. |
| return options.pp_directive; |
| } else if (tok::isStringLiteral(token.getKind())) |
| return options.string_literal; |
| else if (tok::isLiteral(token.getKind())) |
| return options.scalar_literal; |
| else if (highlighter.isKeyword(tok_str)) |
| return options.keyword; |
| else |
| switch (token.getKind()) { |
| case tok::raw_identifier: |
| case tok::identifier: |
| return options.identifier; |
| case tok::l_brace: |
| case tok::r_brace: |
| return options.braces; |
| case tok::l_square: |
| case tok::r_square: |
| return options.square_brackets; |
| case tok::l_paren: |
| case tok::r_paren: |
| return options.parentheses; |
| case tok::comma: |
| return options.comma; |
| case tok::coloncolon: |
| case tok::colon: |
| return options.colon; |
| |
| case tok::amp: |
| case tok::ampamp: |
| case tok::ampequal: |
| case tok::star: |
| case tok::starequal: |
| case tok::plus: |
| case tok::plusplus: |
| case tok::plusequal: |
| case tok::minus: |
| case tok::arrow: |
| case tok::minusminus: |
| case tok::minusequal: |
| case tok::tilde: |
| case tok::exclaim: |
| case tok::exclaimequal: |
| case tok::slash: |
| case tok::slashequal: |
| case tok::percent: |
| case tok::percentequal: |
| case tok::less: |
| case tok::lessless: |
| case tok::lessequal: |
| case tok::lesslessequal: |
| case tok::spaceship: |
| case tok::greater: |
| case tok::greatergreater: |
| case tok::greaterequal: |
| case tok::greatergreaterequal: |
| case tok::caret: |
| case tok::caretequal: |
| case tok::pipe: |
| case tok::pipepipe: |
| case tok::pipeequal: |
| case tok::question: |
| case tok::equal: |
| case tok::equalequal: |
| return options.operators; |
| default: |
| break; |
| } |
| return HighlightStyle::ColorStyle(); |
| } |
| |
| void ClangHighlighter::Highlight(const HighlightStyle &options, |
| llvm::StringRef line, |
| llvm::Optional<size_t> cursor_pos, |
| llvm::StringRef previous_lines, |
| Stream &result) const { |
| using namespace clang; |
| |
| FileSystemOptions file_opts; |
| FileManager file_mgr(file_opts, |
| FileSystem::Instance().GetVirtualFileSystem()); |
| |
| // The line might end in a backslash which would cause Clang to drop the |
| // backslash and the terminating new line. This makes sense when parsing C++, |
| // but when highlighting we care about preserving the backslash/newline. To |
| // not lose this information we remove the new line here so that Clang knows |
| // this is just a single line we are highlighting. We add back the newline |
| // after tokenizing. |
| llvm::StringRef line_ending = ""; |
| // There are a few legal line endings Clang recognizes and we need to |
| // temporarily remove from the string. |
| if (line.consume_back("\r\n")) |
| line_ending = "\r\n"; |
| else if (line.consume_back("\n")) |
| line_ending = "\n"; |
| else if (line.consume_back("\r")) |
| line_ending = "\r"; |
| |
| unsigned line_number = previous_lines.count('\n') + 1U; |
| |
| // Let's build the actual source code Clang needs and setup some utility |
| // objects. |
| std::string full_source = previous_lines.str() + line.str(); |
| llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); |
| llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( |
| new DiagnosticOptions()); |
| DiagnosticsEngine diags(diag_ids, diags_opts); |
| clang::SourceManager SM(diags, file_mgr); |
| auto buf = llvm::MemoryBuffer::getMemBuffer(full_source); |
| |
| FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get()); |
| |
| // Let's just enable the latest ObjC and C++ which should get most tokens |
| // right. |
| LangOptions Opts; |
| Opts.ObjC = true; |
| // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too |
| Opts.CPlusPlus17 = true; |
| Opts.LineComment = true; |
| |
| Lexer lex(FID, buf.get(), SM, Opts); |
| // The lexer should keep whitespace around. |
| lex.SetKeepWhitespaceMode(true); |
| |
| // Keeps track if we have entered a PP directive. |
| bool in_pp_directive = false; |
| |
| // True once we actually lexed the user provided line. |
| bool found_user_line = false; |
| |
| // True if we already highlighted the token under the cursor, false otherwise. |
| bool highlighted_cursor = false; |
| Token token; |
| bool exit = false; |
| while (!exit) { |
| // Returns true if this is the last token we get from the lexer. |
| exit = lex.LexFromRawLexer(token); |
| |
| bool invalid = false; |
| unsigned current_line_number = |
| SM.getSpellingLineNumber(token.getLocation(), &invalid); |
| if (current_line_number != line_number) |
| continue; |
| found_user_line = true; |
| |
| // We don't need to print any tokens without a spelling line number. |
| if (invalid) |
| continue; |
| |
| // Same as above but with the column number. |
| invalid = false; |
| unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); |
| if (invalid) |
| continue; |
| // Column numbers start at 1, but indexes in our string start at 0. |
| --start; |
| |
| // Annotations don't have a length, so let's skip them. |
| if (token.isAnnotation()) |
| continue; |
| |
| // Extract the token string from our source code. |
| llvm::StringRef tok_str = line.substr(start, token.getLength()); |
| |
| // If the token is just an empty string, we can skip all the work below. |
| if (tok_str.empty()) |
| continue; |
| |
| // If the cursor is inside this token, we have to apply the 'selected' |
| // highlight style before applying the actual token color. |
| llvm::StringRef to_print = tok_str; |
| StreamString storage; |
| auto end = start + token.getLength(); |
| if (cursor_pos && end > *cursor_pos && !highlighted_cursor) { |
| highlighted_cursor = true; |
| options.selected.Apply(storage, tok_str); |
| to_print = storage.GetString(); |
| } |
| |
| // See how we are supposed to highlight this token. |
| HighlightStyle::ColorStyle color = |
| determineClangStyle(*this, token, tok_str, options, in_pp_directive); |
| |
| color.Apply(result, to_print); |
| } |
| |
| // Add the line ending we trimmed before tokenizing. |
| result << line_ending; |
| |
| // If we went over the whole file but couldn't find our own file, then |
| // somehow our setup was wrong. When we're in release mode we just give the |
| // user the normal line and pretend we don't know how to highlight it. In |
| // debug mode we bail out with an assert as this should never happen. |
| if (!found_user_line) { |
| result << line; |
| assert(false && "We couldn't find the user line in the input file?"); |
| } |
| } |