| //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "IncludeCleaner.h" |
| #include "Diagnostics.h" |
| #include "Headers.h" |
| #include "ParsedAST.h" |
| #include "Preamble.h" |
| #include "Protocol.h" |
| #include "SourceCode.h" |
| #include "clang-include-cleaner/Analysis.h" |
| #include "clang-include-cleaner/IncludeSpeller.h" |
| #include "clang-include-cleaner/Record.h" |
| #include "clang-include-cleaner/Types.h" |
| #include "support/Logger.h" |
| #include "support/Path.h" |
| #include "support/Trace.h" |
| #include "clang/AST/ASTContext.h" |
| #include "clang/Basic/Diagnostic.h" |
| #include "clang/Basic/LLVM.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Format/Format.h" |
| #include "clang/Lex/DirectoryLookup.h" |
| #include "clang/Lex/HeaderSearch.h" |
| #include "clang/Lex/Preprocessor.h" |
| #include "clang/Tooling/Core/Replacement.h" |
| #include "clang/Tooling/Inclusions/HeaderIncludes.h" |
| #include "clang/Tooling/Inclusions/StandardLibrary.h" |
| #include "clang/Tooling/Syntax/Tokens.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/GenericUniformityImpl.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/Regex.h" |
| #include <cassert> |
| #include <iterator> |
| #include <map> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| namespace clang::clangd { |
| namespace { |
| |
| bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) { |
| // Convert the path to Unix slashes and try to match against the filter. |
| llvm::SmallString<64> NormalizedPath(HeaderPath); |
| llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix); |
| for (auto &Filter : IgnoreHeaders) { |
| if (Filter(NormalizedPath)) |
| return true; |
| } |
| return false; |
| } |
| |
| bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, |
| const include_cleaner::PragmaIncludes *PI, |
| bool AnalyzeAngledIncludes) { |
| assert(Inc.HeaderID); |
| auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID); |
| auto FE = AST.getSourceManager().getFileManager().getFileRef( |
| AST.getIncludeStructure().getRealPath(HID)); |
| assert(FE); |
| if (FE->getDir() == AST.getPreprocessor() |
| .getHeaderSearchInfo() |
| .getModuleMap() |
| .getBuiltinDir()) |
| return false; |
| if (PI && PI->shouldKeep(*FE)) |
| return false; |
| // FIXME(kirillbobyrev): We currently do not support the umbrella headers. |
| // System headers are likely to be standard library headers. |
| // Until we have good support for umbrella headers, don't warn about them |
| // (unless analysis is explicitly enabled). |
| if (Inc.Written.front() == '<') { |
| if (tooling::stdlib::Header::named(Inc.Written)) |
| return true; |
| if (!AnalyzeAngledIncludes) |
| return false; |
| } |
| if (PI) { |
| // Check if main file is the public interface for a private header. If so we |
| // shouldn't diagnose it as unused. |
| if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) { |
| PHeader = PHeader.trim("<>\""); |
| // Since most private -> public mappings happen in a verbatim way, we |
| // check textually here. This might go wrong in presence of symlinks or |
| // header mappings. But that's not different than rest of the places. |
| if (AST.tuPath().ends_with(PHeader)) |
| return false; |
| } |
| } |
| // Headers without include guards have side effects and are not |
| // self-contained, skip them. |
| if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( |
| *FE)) { |
| dlog("{0} doesn't have header guard and will not be considered unused", |
| FE->getName()); |
| return false; |
| } |
| return true; |
| } |
| |
| std::vector<Diag> generateMissingIncludeDiagnostics( |
| ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes, |
| llvm::StringRef Code, HeaderFilter IgnoreHeaders, const ThreadsafeFS &TFS) { |
| std::vector<Diag> Result; |
| const SourceManager &SM = AST.getSourceManager(); |
| const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); |
| |
| auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false); |
| |
| tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code, |
| FileStyle.IncludeStyle); |
| for (const auto &SymbolWithMissingInclude : MissingIncludes) { |
| llvm::StringRef ResolvedPath = |
| SymbolWithMissingInclude.Providers.front().resolvedPath(); |
| if (isIgnored(ResolvedPath, IgnoreHeaders)) { |
| dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by " |
| "config", |
| ResolvedPath); |
| continue; |
| } |
| |
| std::string Spelling = include_cleaner::spellHeader( |
| {SymbolWithMissingInclude.Providers.front(), |
| AST.getPreprocessor().getHeaderSearchInfo(), MainFile}); |
| |
| llvm::StringRef HeaderRef{Spelling}; |
| bool Angled = HeaderRef.starts_with("<"); |
| // We might suggest insertion of an existing include in edge cases, e.g., |
| // include is present in a PP-disabled region, or spelling of the header |
| // turns out to be the same as one of the unresolved includes in the |
| // main file. |
| std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert( |
| HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include); |
| if (!Replacement.has_value()) |
| continue; |
| |
| Diag &D = Result.emplace_back(); |
| D.Message = |
| llvm::formatv("No header providing \"{0}\" is directly included", |
| SymbolWithMissingInclude.Symbol.name()); |
| D.Name = "missing-includes"; |
| D.Source = Diag::DiagSource::Clangd; |
| D.File = AST.tuPath(); |
| D.InsideMainFile = true; |
| // We avoid the "warning" severity here in favor of LSP's "information". |
| // |
| // Users treat most warnings on code being edited as high-priority. |
| // They don't think of include cleanups the same way: they want to edit |
| // lines with existing violations without fixing them. |
| // Diagnostics at the same level tend to be visually indistinguishable, |
| // and a few missing includes can cause many diagnostics. |
| // Marking these as "information" leaves them visible, but less intrusive. |
| // |
| // (These concerns don't apply to unused #include warnings: these are fewer, |
| // they appear on infrequently-edited lines with few other warnings, and |
| // the 'Unneccesary' tag often result in a different rendering) |
| // |
| // Usually clang's "note" severity usually has special semantics, being |
| // translated into LSP RelatedInformation of a parent diagnostic. |
| // But not here: these aren't processed by clangd's DiagnosticConsumer. |
| D.Severity = DiagnosticsEngine::Note; |
| D.Range = clangd::Range{ |
| offsetToPosition(Code, |
| SymbolWithMissingInclude.SymRefRange.beginOffset()), |
| offsetToPosition(Code, |
| SymbolWithMissingInclude.SymRefRange.endOffset())}; |
| auto &F = D.Fixes.emplace_back(); |
| F.Message = "#include " + Spelling; |
| TextEdit Edit = replacementToEdit(Code, *Replacement); |
| F.Edits.emplace_back(std::move(Edit)); |
| } |
| return Result; |
| } |
| |
| std::vector<Diag> generateUnusedIncludeDiagnostics( |
| PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes, |
| llvm::StringRef Code, HeaderFilter IgnoreHeaders) { |
| std::vector<Diag> Result; |
| for (const auto *Inc : UnusedIncludes) { |
| if (isIgnored(Inc->Resolved, IgnoreHeaders)) |
| continue; |
| Diag &D = Result.emplace_back(); |
| D.Message = |
| llvm::formatv("included header {0} is not used directly", |
| llvm::sys::path::filename( |
| Inc->Written.substr(1, Inc->Written.size() - 2), |
| llvm::sys::path::Style::posix)); |
| D.Name = "unused-includes"; |
| D.Source = Diag::DiagSource::Clangd; |
| D.File = FileName; |
| D.InsideMainFile = true; |
| D.Severity = DiagnosticsEngine::Warning; |
| D.Tags.push_back(Unnecessary); |
| D.Range = rangeTillEOL(Code, Inc->HashOffset); |
| // FIXME(kirillbobyrev): Removing inclusion might break the code if the |
| // used headers are only reachable transitively through this one. Suggest |
| // including them directly instead. |
| // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas |
| // (keep/export) remove the warning once we support IWYU pragmas. |
| auto &F = D.Fixes.emplace_back(); |
| F.Message = "remove #include directive"; |
| F.Edits.emplace_back(); |
| F.Edits.back().range.start.line = Inc->HashLine; |
| F.Edits.back().range.end.line = Inc->HashLine + 1; |
| } |
| return Result; |
| } |
| |
| std::optional<Fix> |
| removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) { |
| if (UnusedIncludes.empty()) |
| return std::nullopt; |
| |
| Fix RemoveAll; |
| RemoveAll.Message = "remove all unused includes"; |
| for (const auto &Diag : UnusedIncludes) { |
| assert(Diag.Fixes.size() == 1 && "Expected exactly one fix."); |
| RemoveAll.Edits.insert(RemoveAll.Edits.end(), |
| Diag.Fixes.front().Edits.begin(), |
| Diag.Fixes.front().Edits.end()); |
| } |
| return RemoveAll; |
| } |
| |
| std::optional<Fix> |
| addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) { |
| if (MissingIncludeDiags.empty()) |
| return std::nullopt; |
| |
| Fix AddAllMissing; |
| AddAllMissing.Message = "add all missing includes"; |
| // A map to deduplicate the edits with the same new text. |
| // newText (#include "my_missing_header.h") -> TextEdit. |
| std::map<std::string, TextEdit> Edits; |
| for (const auto &Diag : MissingIncludeDiags) { |
| assert(Diag.Fixes.size() == 1 && "Expected exactly one fix."); |
| for (const auto &Edit : Diag.Fixes.front().Edits) { |
| Edits.try_emplace(Edit.newText, Edit); |
| } |
| } |
| for (auto &It : Edits) |
| AddAllMissing.Edits.push_back(std::move(It.second)); |
| return AddAllMissing; |
| } |
| Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) { |
| Fix FixAll; |
| FixAll.Message = "fix all includes"; |
| |
| for (const auto &F : RemoveAllUnused.Edits) |
| FixAll.Edits.push_back(F); |
| for (const auto &F : AddAllMissing.Edits) |
| FixAll.Edits.push_back(F); |
| return FixAll; |
| } |
| |
| std::vector<const Inclusion *> |
| getUnused(ParsedAST &AST, |
| const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles, |
| bool AnalyzeAngledIncludes) { |
| trace::Span Tracer("IncludeCleaner::getUnused"); |
| std::vector<const Inclusion *> Unused; |
| for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) { |
| if (!MFI.HeaderID) |
| continue; |
| auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID); |
| if (ReferencedFiles.contains(IncludeID)) |
| continue; |
| if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes(), |
| AnalyzeAngledIncludes)) { |
| dlog("{0} was not used, but is not eligible to be diagnosed as unused", |
| MFI.Written); |
| continue; |
| } |
| Unused.push_back(&MFI); |
| } |
| return Unused; |
| } |
| |
| } // namespace |
| |
| std::vector<include_cleaner::SymbolReference> |
| collectMacroReferences(ParsedAST &AST) { |
| const auto &SM = AST.getSourceManager(); |
| auto &PP = AST.getPreprocessor(); |
| std::vector<include_cleaner::SymbolReference> Macros; |
| for (const auto &[_, Refs] : AST.getMacros().MacroRefs) { |
| for (const auto &Ref : Refs) { |
| auto Loc = SM.getComposedLoc(SM.getMainFileID(), Ref.StartOffset); |
| const auto *Tok = AST.getTokens().spelledTokenContaining(Loc); |
| if (!Tok) |
| continue; |
| auto Macro = locateMacroAt(*Tok, PP); |
| if (!Macro) |
| continue; |
| auto DefLoc = Macro->NameLoc; |
| if (!DefLoc.isValid()) |
| continue; |
| Macros.push_back( |
| {include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok->text(SM)), |
| DefLoc}, |
| Tok->location(), |
| Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous |
| : include_cleaner::RefType::Explicit}); |
| } |
| } |
| |
| return Macros; |
| } |
| |
| include_cleaner::Includes convertIncludes(const ParsedAST &AST) { |
| auto &SM = AST.getSourceManager(); |
| |
| include_cleaner::Includes ConvertedIncludes; |
| // We satisfy Includes's contract that search dirs and included files have |
| // matching path styles: both ultimately use FileManager::getCanonicalName(). |
| for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical) |
| ConvertedIncludes.addSearchDirectory(Dir); |
| |
| for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) { |
| include_cleaner::Include TransformedInc; |
| llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); |
| TransformedInc.Spelled = WrittenRef.trim("\"<>"); |
| TransformedInc.HashLocation = |
| SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); |
| TransformedInc.Line = Inc.HashLine + 1; |
| TransformedInc.Angled = WrittenRef.starts_with("<"); |
| // Inc.Resolved is canonicalized with clangd::getCanonicalPath(), |
| // which is based on FileManager::getCanonicalName(ParentDir). |
| auto FE = SM.getFileManager().getFileRef(Inc.Resolved); |
| if (!FE) { |
| elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", |
| Inc.Resolved, FE.takeError()); |
| continue; |
| } |
| TransformedInc.Resolved = *FE; |
| ConvertedIncludes.add(std::move(TransformedInc)); |
| } |
| return ConvertedIncludes; |
| } |
| |
| IncludeCleanerFindings |
| computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeAngledIncludes) { |
| // Interaction is only polished for C/CPP. |
| if (AST.getLangOpts().ObjC) |
| return {}; |
| const auto &SM = AST.getSourceManager(); |
| include_cleaner::Includes ConvertedIncludes = convertIncludes(AST); |
| const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); |
| auto PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM); |
| |
| std::vector<include_cleaner::SymbolReference> Macros = |
| collectMacroReferences(AST); |
| std::vector<MissingIncludeDiagInfo> MissingIncludes; |
| llvm::DenseSet<IncludeStructure::HeaderID> Used; |
| trace::Span Tracer("include_cleaner::walkUsed"); |
| OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor() |
| .getHeaderSearchInfo() |
| .getModuleMap() |
| .getBuiltinDir(); |
| include_cleaner::walkUsed( |
| AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros, |
| &AST.getPragmaIncludes(), AST.getPreprocessor(), |
| [&](const include_cleaner::SymbolReference &Ref, |
| llvm::ArrayRef<include_cleaner::Header> Providers) { |
| bool Satisfied = false; |
| for (const auto &H : Providers) { |
| if (H.kind() == include_cleaner::Header::Physical && |
| (H.physical() == MainFile || H.physical() == PreamblePatch || |
| H.physical().getDir() == ResourceDir)) { |
| Satisfied = true; |
| continue; |
| } |
| for (auto *Inc : ConvertedIncludes.match(H)) { |
| Satisfied = true; |
| auto HeaderID = |
| AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry()); |
| assert(HeaderID.has_value() && |
| "ConvertedIncludes only contains resolved includes."); |
| Used.insert(*HeaderID); |
| } |
| } |
| |
| if (Satisfied || Providers.empty() || |
| Ref.RT != include_cleaner::RefType::Explicit) |
| return; |
| |
| // Check if we have any headers with the same spelling, in edge cases |
| // like `#include_next "foo.h"`, the user can't ever include the |
| // physical foo.h, but can have a spelling that refers to it. |
| // We postpone this check because spelling a header for every usage is |
| // expensive. |
| std::string Spelling = include_cleaner::spellHeader( |
| {Providers.front(), AST.getPreprocessor().getHeaderSearchInfo(), |
| MainFile}); |
| for (auto *Inc : |
| ConvertedIncludes.match(include_cleaner::Header{Spelling})) { |
| Satisfied = true; |
| auto HeaderID = |
| AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry()); |
| assert(HeaderID.has_value() && |
| "ConvertedIncludes only contains resolved includes."); |
| Used.insert(*HeaderID); |
| } |
| if (Satisfied) |
| return; |
| |
| // We actually always want to map usages to their spellings, but |
| // spelling locations can point into preamble section. Using these |
| // offsets could lead into crashes in presence of stale preambles. Hence |
| // we use "getFileLoc" instead to make sure it always points into main |
| // file. |
| // FIXME: Use presumed locations to map such usages back to patched |
| // locations safely. |
| auto Loc = SM.getFileLoc(Ref.RefLocation); |
| // File locations can be outside of the main file if macro is expanded |
| // through an #include. |
| while (SM.getFileID(Loc) != SM.getMainFileID()) |
| Loc = SM.getIncludeLoc(SM.getFileID(Loc)); |
| auto TouchingTokens = |
| syntax::spelledTokensTouching(Loc, AST.getTokens()); |
| assert(!TouchingTokens.empty()); |
| // Loc points to the start offset of the ref token, here we use the last |
| // element of the TouchingTokens, e.g. avoid getting the "::" for |
| // "ns::^abc". |
| MissingIncludeDiagInfo DiagInfo{ |
| Ref.Target, TouchingTokens.back().range(SM), Providers}; |
| MissingIncludes.push_back(std::move(DiagInfo)); |
| }); |
| // Put possibly equal diagnostics together for deduplication. |
| // The duplicates might be from macro arguments that get expanded multiple |
| // times. |
| llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS, |
| const MissingIncludeDiagInfo &RHS) { |
| // First sort by reference location. |
| if (LHS.SymRefRange != RHS.SymRefRange) { |
| // We can get away just by comparing the offsets as all the ranges are in |
| // main file. |
| return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset(); |
| } |
| // For the same location, break ties using the symbol. Note that this won't |
| // be stable across runs. |
| using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>; |
| return MapInfo::getHashValue(LHS.Symbol) < |
| MapInfo::getHashValue(RHS.Symbol); |
| }); |
| MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end()); |
| std::vector<const Inclusion *> UnusedIncludes = |
| getUnused(AST, Used, AnalyzeAngledIncludes); |
| return {std::move(UnusedIncludes), std::move(MissingIncludes)}; |
| } |
| |
| bool isPreferredProvider(const Inclusion &Inc, |
| const include_cleaner::Includes &Includes, |
| llvm::ArrayRef<include_cleaner::Header> Providers) { |
| for (const auto &H : Providers) { |
| auto Matches = Includes.match(H); |
| for (const include_cleaner::Include *Match : Matches) |
| if (Match->Line == unsigned(Inc.HashLine + 1)) |
| return true; // this header is (equal) best |
| if (!Matches.empty()) |
| return false; // another header is better |
| } |
| return false; // no header provides the symbol |
| } |
| |
| std::vector<Diag> |
| issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code, |
| const IncludeCleanerFindings &Findings, |
| const ThreadsafeFS &TFS, |
| HeaderFilter IgnoreHeaders) { |
| trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics"); |
| std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics( |
| AST.tuPath(), Findings.UnusedIncludes, Code, IgnoreHeaders); |
| std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes); |
| |
| std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics( |
| AST, Findings.MissingIncludes, Code, IgnoreHeaders, TFS); |
| std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags); |
| |
| std::optional<Fix> FixAll; |
| if (RemoveAllUnused && AddAllMissing) |
| FixAll = fixAll(*RemoveAllUnused, *AddAllMissing); |
| |
| auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) { |
| if (!F) |
| return; |
| Out->Fixes.push_back(*F); |
| }; |
| for (auto &Diag : MissingIncludeDiags) { |
| AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt, |
| &Diag); |
| AddBatchFix(FixAll, &Diag); |
| } |
| for (auto &Diag : UnusedIncludes) { |
| AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt, |
| &Diag); |
| AddBatchFix(FixAll, &Diag); |
| } |
| |
| auto Result = std::move(MissingIncludeDiags); |
| llvm::move(UnusedIncludes, std::back_inserter(Result)); |
| return Result; |
| } |
| |
| } // namespace clang::clangd |