clang-tools  14.0.0git
IncludeCleaner.cpp
Go to the documentation of this file.
1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "IncludeCleaner.h"
10 #include "Config.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/AST/ExprCXX.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/HeaderSearch.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Tooling/Syntax/Tokens.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/Path.h"
26 
27 namespace clang {
28 namespace clangd {
29 namespace {
30 
31 /// Crawler traverses the AST and feeds in the locations of (sometimes
32 /// implicitly) used symbols into \p Result.
33 class ReferencedLocationCrawler
34  : public RecursiveASTVisitor<ReferencedLocationCrawler> {
35 public:
36  ReferencedLocationCrawler(ReferencedLocations &Result) : Result(Result) {}
37 
38  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
39  add(DRE->getDecl());
40  add(DRE->getFoundDecl());
41  return true;
42  }
43 
44  bool VisitMemberExpr(MemberExpr *ME) {
45  add(ME->getMemberDecl());
46  add(ME->getFoundDecl().getDecl());
47  return true;
48  }
49 
50  bool VisitTagType(TagType *TT) {
51  add(TT->getDecl());
52  return true;
53  }
54 
55  bool VisitFunctionDecl(FunctionDecl *FD) {
56  // Function definition will require redeclarations to be included.
57  if (FD->isThisDeclarationADefinition())
58  add(FD);
59  return true;
60  }
61 
62  bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
63  add(CCE->getConstructor());
64  return true;
65  }
66 
67  bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
68  if (isNew(TST)) {
69  add(TST->getTemplateName().getAsTemplateDecl()); // Primary template.
70  add(TST->getAsCXXRecordDecl()); // Specialization
71  }
72  return true;
73  }
74 
75  bool VisitTypedefType(TypedefType *TT) {
76  add(TT->getDecl());
77  return true;
78  }
79 
80  // Consider types of any subexpression used, even if the type is not named.
81  // This is helpful in getFoo().bar(), where Foo must be complete.
82  // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
83  // consider types "used" when they are not directly spelled in code.
84  bool VisitExpr(Expr *E) {
85  TraverseType(E->getType());
86  return true;
87  }
88 
89  bool TraverseType(QualType T) {
90  if (isNew(T.getTypePtrOrNull())) // don't care about quals
91  Base::TraverseType(T);
92  return true;
93  }
94 
95  bool VisitUsingDecl(UsingDecl *D) {
96  for (const auto *Shadow : D->shadows())
97  add(Shadow->getTargetDecl());
98  return true;
99  }
100 
101  // Enums may be usefully forward-declared as *complete* types by specifying
102  // an underlying type. In this case, the definition should see the declaration
103  // so they can be checked for compatibility.
104  bool VisitEnumDecl(EnumDecl *D) {
105  if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
106  add(D);
107  return true;
108  }
109 
110  // When the overload is not resolved yet, mark all candidates as used.
111  bool VisitOverloadExpr(OverloadExpr *E) {
112  for (const auto *ResolutionDecl : E->decls())
113  add(ResolutionDecl);
114  return true;
115  }
116 
117 private:
119 
120  void add(const Decl *D) {
121  if (!D || !isNew(D->getCanonicalDecl()))
122  return;
123  for (const Decl *Redecl : D->redecls())
124  Result.insert(Redecl->getLocation());
125  }
126 
127  bool isNew(const void *P) { return P && Visited.insert(P).second; }
128 
129  ReferencedLocations &Result;
130  llvm::DenseSet<const void *> Visited;
131 };
132 
133 // Given a set of referenced FileIDs, determines all the potentially-referenced
134 // files and macros by traversing expansion/spelling locations of macro IDs.
135 // This is used to map the referenced SourceLocations onto real files.
136 struct ReferencedFiles {
137  ReferencedFiles(const SourceManager &SM) : SM(SM) {}
138  llvm::DenseSet<FileID> Files;
139  llvm::DenseSet<FileID> Macros;
140  const SourceManager &SM;
141 
142  void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
143 
144  void add(FileID FID, SourceLocation Loc) {
145  if (FID.isInvalid())
146  return;
147  assert(SM.isInFileID(Loc, FID));
148  if (Loc.isFileID()) {
149  Files.insert(FID);
150  return;
151  }
152  // Don't process the same macro FID twice.
153  if (!Macros.insert(FID).second)
154  return;
155  const auto &Exp = SM.getSLocEntry(FID).getExpansion();
156  add(Exp.getSpellingLoc());
157  add(Exp.getExpansionLocStart());
158  add(Exp.getExpansionLocEnd());
159  }
160 };
161 
162 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
163 // handled.
164 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
165  clangd::Range Result;
166  Result.end = Result.start = offsetToPosition(Code, HashOffset);
167 
168  // Span the warning until the EOL or EOF.
169  Result.end.character +=
170  lspLength(Code.drop_front(HashOffset).take_until([](char C) {
171  return C == '\n' || C == '\r';
172  }));
173  return Result;
174 }
175 
176 // Finds locations of macros referenced from within the main file. That includes
177 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
178 void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) {
179  trace::Span Tracer("IncludeCleaner::findReferencedMacros");
180  auto &SM = AST.getSourceManager();
181  auto &PP = AST.getPreprocessor();
182  // FIXME(kirillbobyrev): The macros from the main file are collected in
183  // ParsedAST's MainFileMacros. However, we can't use it here because it
184  // doesn't handle macro references that were not expanded, e.g. in macro
185  // definitions or preprocessor-disabled sections.
186  //
187  // Extending MainFileMacros to collect missing references and switching to
188  // this mechanism (as opposed to iterating through all tokens) will improve
189  // the performance of findReferencedMacros and also improve other features
190  // relying on MainFileMacros.
191  for (const syntax::Token &Tok :
192  AST.getTokens().spelledTokens(SM.getMainFileID())) {
193  auto Macro = locateMacroAt(Tok, PP);
194  if (!Macro)
195  continue;
196  auto Loc = Macro->Info->getDefinitionLoc();
197  if (Loc.isValid())
198  Result.insert(Loc);
199  }
200 }
201 
202 bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
203  // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
204  // Standard Library headers are typically umbrella headers, and system
205  // headers are likely to be the Standard Library headers. Until we have a
206  // good support for umbrella headers and Standard Library headers, don't warn
207  // about them.
208  if (Inc.Written.front() == '<')
209  return false;
210  // Headers without include guards have side effects and are not
211  // self-contained, skip them.
212  assert(Inc.HeaderID);
213  auto FE = AST.getSourceManager().getFileManager().getFile(
215  static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID)));
216  assert(FE);
217  if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
218  *FE)) {
219  dlog("{0} doesn't have header guard and will not be considered unused",
220  (*FE)->getName());
221  return false;
222  }
223  return true;
224 }
225 
226 // In case symbols are coming from non self-contained header, we need to find
227 // its first includer that is self-contained. This is the header users can
228 // include, so it will be responsible for bringing the symbols from given
229 // header into the scope.
230 FileID headerResponsible(FileID ID, const SourceManager &SM,
231  const IncludeStructure &Includes) {
232  // Unroll the chain of non self-contained headers until we find the one that
233  // can be included.
234  for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
235  FE = SM.getFileEntryForID(ID)) {
236  // If FE is nullptr, we consider it to be the responsible header.
237  if (!FE)
238  break;
239  auto HID = Includes.getID(FE);
240  assert(HID && "We're iterating over headers already existing in "
241  "IncludeStructure");
242  if (Includes.isSelfContained(*HID))
243  break;
244  // The header is not self-contained: put the responsibility for its symbols
245  // on its includer.
246  ID = SM.getFileID(SM.getIncludeLoc(ID));
247  }
248  return ID;
249 }
250 
251 } // namespace
252 
254  trace::Span Tracer("IncludeCleaner::findReferencedLocations");
255  ReferencedLocations Result;
256  ReferencedLocationCrawler Crawler(Result);
257  Crawler.TraverseAST(AST.getASTContext());
258  findReferencedMacros(AST, Result);
259  return Result;
260 }
261 
262 llvm::DenseSet<FileID>
263 findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
264  const IncludeStructure &Includes, const SourceManager &SM) {
265  std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
266  llvm::sort(Sorted); // Group by FileID.
267  ReferencedFiles Files(SM);
268  for (auto It = Sorted.begin(); It < Sorted.end();) {
269  FileID FID = SM.getFileID(*It);
270  Files.add(FID, *It);
271  // Cheaply skip over all the other locations from the same FileID.
272  // This avoids lots of redundant Loc->File lookups for the same file.
273  do
274  ++It;
275  while (It != Sorted.end() && SM.isInFileID(*It, FID));
276  }
277  // If a header is not self-contained, we consider its symbols a logical part
278  // of the including file. Therefore, mark the parents of all used
279  // non-self-contained FileIDs as used. Perform this on FileIDs rather than
280  // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
281  llvm::DenseSet<FileID> Result;
282  for (FileID ID : Files.Files)
283  Result.insert(headerResponsible(ID, SM, Includes));
284  return Result;
285 }
286 
287 std::vector<const Inclusion *>
289  const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
290  trace::Span Tracer("IncludeCleaner::getUnused");
291  std::vector<const Inclusion *> Unused;
292  for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
293  if (!MFI.HeaderID)
294  continue;
295  auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
296  bool Used = ReferencedFiles.contains(IncludeID);
297  if (!Used && !mayConsiderUnused(MFI, AST)) {
298  dlog("{0} was not used, but is not eligible to be diagnosed as unused",
299  MFI.Written);
300  continue;
301  }
302  if (!Used)
303  Unused.push_back(&MFI);
304  dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
305  }
306  return Unused;
307 }
308 
309 #ifndef NDEBUG
310 // Is FID a <built-in>, <scratch space> etc?
311 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
312  const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
313  return FI.getName().startswith("<");
314 }
315 #endif
316 
317 llvm::DenseSet<IncludeStructure::HeaderID>
318 translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
319  const IncludeStructure &Includes,
320  const SourceManager &SM) {
321  trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
322  llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
323  TranslatedHeaderIDs.reserve(Files.size());
324  for (FileID FID : Files) {
325  const FileEntry *FE = SM.getFileEntryForID(FID);
326  if (!FE) {
327  assert(isSpecialBuffer(FID, SM));
328  continue;
329  }
330  const auto File = Includes.getID(FE);
331  assert(File);
332  TranslatedHeaderIDs.insert(*File);
333  }
334  return TranslatedHeaderIDs;
335 }
336 
337 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
338  const auto &SM = AST.getSourceManager();
339 
340  auto Refs = findReferencedLocations(AST);
341  auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
342  AST.getSourceManager());
343  auto ReferencedHeaders =
344  translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
345  return getUnused(AST, ReferencedHeaders);
346 }
347 
349  llvm::StringRef Code) {
350  const Config &Cfg = Config::current();
351  if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
352  Cfg.Diagnostics.SuppressAll ||
353  Cfg.Diagnostics.Suppress.contains("unused-includes"))
354  return {};
355  trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
356  std::vector<Diag> Result;
357  std::string FileName =
358  AST.getSourceManager()
359  .getFileEntryForID(AST.getSourceManager().getMainFileID())
360  ->getName()
361  .str();
362  for (const auto *Inc : computeUnusedIncludes(AST)) {
363  Diag D;
364  D.Message =
365  llvm::formatv("included header {0} is not used",
366  llvm::sys::path::filename(
367  Inc->Written.substr(1, Inc->Written.size() - 2),
368  llvm::sys::path::Style::posix));
369  D.Name = "unused-includes";
370  D.Source = Diag::DiagSource::Clangd;
371  D.File = FileName;
372  D.Severity = DiagnosticsEngine::Warning;
373  D.Tags.push_back(Unnecessary);
374  D.Range = getDiagnosticRange(Code, Inc->HashOffset);
375  // FIXME(kirillbobyrev): Removing inclusion might break the code if the
376  // used headers are only reachable transitively through this one. Suggest
377  // including them directly instead.
378  // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
379  // (keep/export) remove the warning once we support IWYU pragmas.
380  D.Fixes.emplace_back();
381  D.Fixes.back().Message = "remove #include directive";
382  D.Fixes.back().Edits.emplace_back();
383  D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
384  D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
385  D.InsideMainFile = true;
386  Result.push_back(std::move(D));
387  }
388  return Result;
389 }
390 
391 } // namespace clangd
392 } // namespace clang
dlog
#define dlog(...)
Definition: Logger.h:102
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
Base
std::unique_ptr< GlobalCompilationDatabase > Base
Definition: GlobalCompilationDatabaseTests.cpp:89
Headers.h
IncludeCleaner.h
RecursiveASTVisitor
clang::clangd::isSpecialBuffer
static bool isSpecialBuffer(FileID FID, const SourceManager &SM)
Definition: IncludeCleaner.cpp:311
clang::clangd::locateMacroAt
llvm::Optional< DefinedMacro > locateMacroAt(const syntax::Token &SpelledTok, Preprocessor &PP)
Gets the macro referenced by SpelledTok.
Definition: SourceCode.cpp:976
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::translateToHeaderIDs
llvm::DenseSet< IncludeStructure::HeaderID > translateToHeaderIDs(const llvm::DenseSet< FileID > &Files, const IncludeStructure &Includes, const SourceManager &SM)
Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
Definition: IncludeCleaner.cpp:318
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:311
Macros
llvm::DenseSet< FileID > Macros
Definition: IncludeCleaner.cpp:139
Tracer
std::unique_ptr< trace::EventTracer > Tracer
Definition: TraceTests.cpp:164
clang::clangd::IncludeStructure::getRealPath
StringRef getRealPath(HeaderID ID) const
Definition: Headers.h:138
clang::clangd::IncludeStructure::getID
llvm::Optional< HeaderID > getID(const FileEntry *Entry) const
Definition: Headers.cpp:169
clang::clangd::ParsedAST::getIncludeStructure
const IncludeStructure & getIncludeStructure() const
Definition: ParsedAST.cpp:609
clang::clangd::ParsedAST::getASTContext
ASTContext & getASTContext()
Note that the returned ast will not contain decls from the preamble that were not deserialized during...
Definition: ParsedAST.cpp:554
clang::clangd::lspLength
size_t lspLength(llvm::StringRef Code)
Definition: SourceCode.cpp:151
clang::clangd::issueUnusedIncludesDiagnostics
std::vector< Diag > issueUnusedIncludesDiagnostics(ParsedAST &AST, llvm::StringRef Code)
Definition: IncludeCleaner.cpp:348
Trace.h
clang::clangd::Config
Settings that express user/project preferences and control clangd behavior.
Definition: Config.h:43
clang::clangd::IncludeStructure::HeaderID
HeaderID
Definition: Headers.h:133
clang::clangd::IncludeStructure::MainFileIncludes
std::vector< Inclusion > MainFileIncludes
Definition: Headers.h:160
clang::clangd::HighlightingKind::Macro
@ Macro
Protocol.h
clang::clangd::IncludeStructure
Definition: Headers.h:119
Code
std::string Code
Definition: FindTargetTests.cpp:67
ns1::ns2::D
@ D
Definition: CategoricalFeature.h:3
Decl
const FunctionDecl * Decl
Definition: AvoidBindCheck.cpp:100
clang::clangd::Diag
A top-level diagnostic that may have Notes and Fixes.
Definition: Diagnostics.h:97
clang::clangd::Config::Suppress
llvm::StringSet Suppress
Definition: Config.h:93
Logger.h
FileName
StringRef FileName
Definition: KernelNameRestrictionCheck.cpp:46
clang::clangd::offsetToPosition
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
Definition: SourceCode.cpp:204
clang::tidy::readability::Unused
@ Unused
Definition: MakeMemberFunctionConstCheck.cpp:52
clang::clangd::Config::SuppressAll
bool SuppressAll
Definition: Config.h:92
Files
llvm::DenseSet< FileID > Files
Definition: IncludeCleaner.cpp:138
clang::clangd::ParsedAST::getPreprocessor
Preprocessor & getPreprocessor()
Definition: ParsedAST.cpp:560
clang::clangd::Config::UnusedIncludes
UnusedIncludesPolicy UnusedIncludes
Definition: Config.h:102
clang::tidy::bugprone::PP
static Preprocessor * PP
Definition: BadSignalToKillThreadCheck.cpp:29
SourceCode.h
Config.h
clang::clangd::computeUnusedIncludes
std::vector< const Inclusion * > computeUnusedIncludes(ParsedAST &AST)
Definition: IncludeCleaner.cpp:337
clang::clangd::Inclusion
Definition: Headers.h:59
clang::clangd::findReferencedLocations
ReferencedLocations findReferencedLocations(ParsedAST &AST)
Finds locations of all symbols used in the main file.
Definition: IncludeCleaner.cpp:253
ID
static char ID
Definition: Logger.cpp:74
clang::clangd::getUnused
std::vector< const Inclusion * > getUnused(ParsedAST &AST, const llvm::DenseSet< IncludeStructure::HeaderID > &ReferencedFiles)
Retrieves headers that are referenced from the main file but not used.
Definition: IncludeCleaner.cpp:288
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::ParsedAST::getTokens
const syntax::TokenBuffer & getTokens() const
Tokens recorded while parsing the main file.
Definition: ParsedAST.h:108
clang::clangd::Config::current
static const Config & current()
Returns the Config of the current Context, or an empty configuration.
Definition: Config.cpp:17
clang::clangd::ReferencedLocations
llvm::DenseSet< SourceLocation > ReferencedLocations
Definition: IncludeCleaner.h:33
SM
const SourceManager & SM
Definition: IncludeCleaner.cpp:140
clang::clangd::ParsedAST
Stores and provides access to parsed AST.
Definition: ParsedAST.h:49
clang::clangd::Config::Diagnostics
struct clang::clangd::Config::@4 Diagnostics
Controls warnings and errors when parsing code.
clang::clangd::findReferencedFiles
llvm::DenseSet< FileID > findReferencedFiles(const llvm::DenseSet< SourceLocation > &Locs, const IncludeStructure &Includes, const SourceManager &SM)
Retrieves IDs of all files containing SourceLocations from Locs.
Definition: IncludeCleaner.cpp:263
clang::clangd::Unnecessary
@ Unnecessary
Unused or unnecessary code.
Definition: Protocol.h:825
clang::clangd::ParsedAST::getSourceManager
SourceManager & getSourceManager()
Definition: ParsedAST.h:75
Warning
constexpr static llvm::SourceMgr::DiagKind Warning
Definition: ConfigCompile.cpp:511
clang::clangd::IncludeStructure::isSelfContained
bool isSelfContained(HeaderID ID) const
Definition: Headers.h:143
ParsedAST.h
clang::clangd::trace::Span
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143