clang-tools  11.0.0git
SymbolCollector.cpp
Go to the documentation of this file.
1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/SymbolID.h"
19 #include "support/Logger.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclBase.h"
22 #include "clang/AST/DeclCXX.h"
23 #include "clang/AST/DeclTemplate.h"
24 #include "clang/Basic/SourceLocation.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/Basic/Specifiers.h"
27 #include "clang/Index/IndexSymbol.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/USRGeneration.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "clang/Tooling/Syntax/Tokens.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 
37 namespace clang {
38 namespace clangd {
39 namespace {
40 
41 /// If \p ND is a template specialization, returns the described template.
42 /// Otherwise, returns \p ND.
43 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
44  if (auto T = ND.getDescribedTemplate())
45  return *T;
46  return ND;
47 }
48 
49 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
50 // current working directory of the given SourceManager if the Path is not an
51 // absolute path. If failed, this resolves relative paths against \p FallbackDir
52 // to get an absolute path. Then, this tries creating an URI for the absolute
53 // path with schemes specified in \p Opts. This returns an URI with the first
54 // working scheme, if there is any; otherwise, this returns None.
55 //
56 // The Path can be a path relative to the build directory, or retrieved from
57 // the SourceManager.
58 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
59  const SymbolCollector::Options &Opts) {
60  llvm::SmallString<128> AbsolutePath(Path);
61  if (auto File = SM.getFileManager().getFile(Path)) {
62  if (auto CanonPath = getCanonicalPath(*File, SM)) {
63  AbsolutePath = *CanonPath;
64  }
65  }
66  // We don't perform is_absolute check in an else branch because makeAbsolute
67  // might return a relative path on some InMemoryFileSystems.
68  if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
69  llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
70  llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
71  return URI::create(AbsolutePath).toString();
72 }
73 
74 // Checks whether the decl is a private symbol in a header generated by
75 // protobuf compiler.
76 // FIXME: make filtering extensible when there are more use cases for symbol
77 // filters.
78 bool isPrivateProtoDecl(const NamedDecl &ND) {
79  const auto &SM = ND.getASTContext().getSourceManager();
80  if (!isProtoFile(nameLocation(ND, SM), SM))
81  return false;
82 
83  // ND without identifier can be operators.
84  if (ND.getIdentifier() == nullptr)
85  return false;
86  auto Name = ND.getIdentifier()->getName();
87  if (!Name.contains('_'))
88  return false;
89  // Nested proto entities (e.g. Message::Nested) have top-level decls
90  // that shouldn't be used (Message_Nested). Ignore them completely.
91  // The nested entities are dangling type aliases, we may want to reconsider
92  // including them in the future.
93  // For enum constants, SOME_ENUM_CONSTANT is not private and should be
94  // indexed. Outer_INNER is private. This heuristic relies on naming style, it
95  // will include OUTER_INNER and exclude some_enum_constant.
96  // FIXME: the heuristic relies on naming style (i.e. no underscore in
97  // user-defined names) and can be improved.
98  return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
99 }
100 
101 // We only collect #include paths for symbols that are suitable for global code
102 // completion, except for namespaces since #include path for a namespace is hard
103 // to define.
104 bool shouldCollectIncludePath(index::SymbolKind Kind) {
105  using SK = index::SymbolKind;
106  switch (Kind) {
107  case SK::Macro:
108  case SK::Enum:
109  case SK::Struct:
110  case SK::Class:
111  case SK::Union:
112  case SK::TypeAlias:
113  case SK::Using:
114  case SK::Function:
115  case SK::Variable:
116  case SK::EnumConstant:
117  return true;
118  default:
119  return false;
120  }
121 }
122 
123 // Return the symbol range of the token at \p TokLoc.
124 std::pair<SymbolLocation::Position, SymbolLocation::Position>
125 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
126  const LangOptions &LangOpts) {
127  auto CreatePosition = [&SM](SourceLocation Loc) {
128  auto LSPLoc = sourceLocToPosition(SM, Loc);
129  SymbolLocation::Position Pos;
130  Pos.setLine(LSPLoc.line);
131  Pos.setColumn(LSPLoc.character);
132  return Pos;
133  };
134 
135  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
136  return {CreatePosition(TokLoc),
137  CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
138 }
139 
140 // Return the symbol location of the token at \p TokLoc.
141 llvm::Optional<SymbolLocation>
142 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
143  const SymbolCollector::Options &Opts,
144  const clang::LangOptions &LangOpts,
145  std::string &FileURIStorage) {
146  auto Path = SM.getFilename(TokLoc);
147  if (Path.empty())
148  return None;
149  FileURIStorage = toURI(SM, Path, Opts);
150  SymbolLocation Result;
151  Result.FileURI = FileURIStorage.c_str();
152  auto Range = getTokenRange(TokLoc, SM, LangOpts);
153  Result.Start = Range.first;
154  Result.End = Range.second;
155 
156  return Result;
157 }
158 
159 // Checks whether \p ND is a definition of a TagDecl (class/struct/enum/union)
160 // in a header file, in which case clangd would prefer to use ND as a canonical
161 // declaration.
162 // FIXME: handle symbol types that are not TagDecl (e.g. functions), if using
163 // the first seen declaration as canonical declaration is not a good enough
164 // heuristic.
165 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
166  const auto &SM = ND.getASTContext().getSourceManager();
167  return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
168  isa<TagDecl>(&ND) && !isInsideMainFile(ND.getLocation(), SM);
169 }
170 
171 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
172  RefKind Result = RefKind::Unknown;
173  if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
174  Result |= RefKind::Declaration;
175  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
176  Result |= RefKind::Definition;
177  if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
178  Result |= RefKind::Reference;
179  if (Spelled)
180  Result |= RefKind::Spelled;
181  return Result;
182 }
183 
184 bool shouldIndexRelation(const index::SymbolRelation &R) {
185  // We currently only index BaseOf relations, for type hierarchy subtypes.
186  return R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf);
187 }
188 
189 } // namespace
190 
191 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
192 
193 void SymbolCollector::initialize(ASTContext &Ctx) {
194  ASTCtx = &Ctx;
195  CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
196  CompletionTUInfo =
197  std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
198 }
199 
200 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
201  const ASTContext &ASTCtx,
202  const Options &Opts,
203  bool IsMainFileOnly) {
204  // Skip anonymous declarations, e.g (anonymous enum/class/struct).
205  if (ND.getDeclName().isEmpty())
206  return false;
207 
208  // Skip main-file symbols if we are not collecting them.
209  if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
210  return false;
211 
212  // Skip symbols in anonymous namespaces in header files.
213  if (!IsMainFileOnly && ND.isInAnonymousNamespace())
214  return false;
215 
216  // We want most things but not "local" symbols such as symbols inside
217  // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
218  // FIXME: Need a matcher for ExportDecl in order to include symbols declared
219  // within an export.
220  const auto *DeclCtx = ND.getDeclContext();
221  switch (DeclCtx->getDeclKind()) {
222  case Decl::TranslationUnit:
223  case Decl::Namespace:
224  case Decl::LinkageSpec:
225  case Decl::Enum:
226  case Decl::ObjCProtocol:
227  case Decl::ObjCInterface:
228  case Decl::ObjCCategory:
229  case Decl::ObjCCategoryImpl:
230  case Decl::ObjCImplementation:
231  break;
232  default:
233  // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
234  // easier to cast.
235  if (!isa<RecordDecl>(DeclCtx))
236  return false;
237  }
238 
239  // Avoid indexing internal symbols in protobuf generated headers.
240  if (isPrivateProtoDecl(ND))
241  return false;
242  return true;
243 }
244 
245 // Always return true to continue indexing.
247  const Decl *D, index::SymbolRoleSet Roles,
248  llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
249  index::IndexDataConsumer::ASTNodeInfo ASTNode) {
250  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
251  assert(CompletionAllocator && CompletionTUInfo);
252  assert(ASTNode.OrigD);
253  // Indexing API puts canonical decl into D, which might not have a valid
254  // source location for implicit/built-in decls. Fallback to original decl in
255  // such cases.
256  if (D->getLocation().isInvalid())
257  D = ASTNode.OrigD;
258  // If OrigD is an declaration associated with a friend declaration and it's
259  // not a definition, skip it. Note that OrigD is the occurrence that the
260  // collector is currently visiting.
261  if ((ASTNode.OrigD->getFriendObjectKind() !=
262  Decl::FriendObjectKind::FOK_None) &&
263  !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
264  return true;
265  // A declaration created for a friend declaration should not be used as the
266  // canonical declaration in the index. Use OrigD instead, unless we've already
267  // picked a replacement for D
268  if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
269  D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
270  const NamedDecl *ND = dyn_cast<NamedDecl>(D);
271  if (!ND)
272  return true;
273 
274  // Mark D as referenced if this is a reference coming from the main file.
275  // D may not be an interesting symbol, but it's cheaper to check at the end.
276  auto &SM = ASTCtx->getSourceManager();
277  if (Opts.CountReferences &&
278  (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
279  SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
280  ReferencedDecls.insert(ND);
281 
282  auto ID = getSymbolID(ND);
283  if (!ID)
284  return true;
285 
286  // ND is the canonical (i.e. first) declaration. If it's in the main file
287  // (which is not a header), then no public declaration was visible, so assume
288  // it's main-file only.
289  bool IsMainFileOnly =
290  SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
291  !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
292  ASTCtx->getLangOpts());
293  // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
294  if (ASTNode.OrigD->isImplicit() ||
295  !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
296  return true;
297 
298  // Note: we need to process relations for all decl occurrences, including
299  // refs, because the indexing code only populates relations for specific
300  // occurrences. For example, RelationBaseOf is only populated for the
301  // occurrence inside the base-specifier.
302  processRelations(*ND, *ID, Relations);
303 
304  bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
305  bool IsOnlyRef =
306  !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
307  static_cast<unsigned>(index::SymbolRole::Definition)));
308 
309  if (IsOnlyRef && !CollectRef)
310  return true;
311 
312  // Do not store references to main-file symbols.
313  // Unlike other fields, e.g. Symbols (which use spelling locations), we use
314  // file locations for references (as it aligns the behavior of clangd's
315  // AST-based xref).
316  // FIXME: we should try to use the file locations for other fields.
317  if (CollectRef && !IsMainFileOnly && !isa<NamespaceDecl>(ND) &&
318  (Opts.RefsInHeaders ||
319  SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
320  DeclRefs[ND].emplace_back(SM.getFileLoc(Loc), Roles);
321  // Don't continue indexing if this is a mere reference.
322  if (IsOnlyRef)
323  return true;
324 
325  // FIXME: ObjCPropertyDecl are not properly indexed here:
326  // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
327  // not a NamedDecl.
328  auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
329  if (!OriginalDecl)
330  return true;
331 
332  const Symbol *BasicSymbol = Symbols.find(*ID);
333  if (!BasicSymbol) // Regardless of role, ND is the canonical declaration.
334  BasicSymbol = addDeclaration(*ND, std::move(*ID), IsMainFileOnly);
335  else if (isPreferredDeclaration(*OriginalDecl, Roles))
336  // If OriginalDecl is preferred, replace the existing canonical
337  // declaration (e.g. a class forward declaration). There should be at most
338  // one duplicate as we expect to see only one preferred declaration per
339  // TU, because in practice they are definitions.
340  BasicSymbol = addDeclaration(*OriginalDecl, std::move(*ID), IsMainFileOnly);
341 
342  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
343  addDefinition(*OriginalDecl, *BasicSymbol);
344 
345  return true;
346 }
347 
348 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
349  assert(PP.get());
350  const auto &SM = PP->getSourceManager();
351  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
352  assert(MainFileEntry);
353 
354  const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
355  // Add macro references.
356  for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
357  for (const auto &Range : IDToRefs.second) {
358  Ref R;
363  R.Location.FileURI = MainFileURI.c_str();
364  // FIXME: Add correct RefKind information to MainFileMacros.
366  Refs.insert(IDToRefs.first, R);
367  }
368  }
369 }
370 
371 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
372  const MacroInfo *MI,
373  index::SymbolRoleSet Roles,
374  SourceLocation Loc) {
375  assert(PP.get());
376 
377  const auto &SM = PP->getSourceManager();
378  auto DefLoc = MI->getDefinitionLoc();
379  auto SpellingLoc = SM.getSpellingLoc(Loc);
380  bool IsMainFileSymbol = SM.isInMainFile(SM.getExpansionLoc(DefLoc));
381 
382  // Builtin macros don't have useful locations and aren't needed in completion.
383  if (MI->isBuiltinMacro())
384  return true;
385 
386  // Also avoid storing predefined macros like __DBL_MIN__.
387  if (SM.isWrittenInBuiltinFile(DefLoc))
388  return true;
389 
390  auto ID = getSymbolID(Name->getName(), MI, SM);
391  if (!ID)
392  return true;
393 
394  // Do not store references to main-file macros.
395  if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileSymbol &&
396  (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
397  MacroRefs[*ID].push_back({Loc, Roles});
398 
399  // Collect symbols.
400  if (!Opts.CollectMacro)
401  return true;
402 
403  // Skip main-file macros if we are not collecting them.
404  if (IsMainFileSymbol && !Opts.CollectMainFileSymbols)
405  return false;
406 
407  // Mark the macro as referenced if this is a reference coming from the main
408  // file. The macro may not be an interesting symbol, but it's cheaper to check
409  // at the end.
410  if (Opts.CountReferences &&
411  (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
412  SM.getFileID(SpellingLoc) == SM.getMainFileID())
413  ReferencedMacros.insert(Name);
414 
415  // Don't continue indexing if this is a mere reference.
416  // FIXME: remove macro with ID if it is undefined.
417  if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
418  Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
419  return true;
420 
421  // Only collect one instance in case there are multiple.
422  if (Symbols.find(*ID) != nullptr)
423  return true;
424 
425  Symbol S;
426  S.ID = std::move(*ID);
427  S.Name = Name->getName();
428  if (!IsMainFileSymbol) {
431  }
432  S.SymInfo = index::getSymbolInfoForMacro(*MI);
433  std::string FileURI;
434  // FIXME: use the result to filter out symbols.
435  shouldIndexFile(SM.getFileID(Loc));
436  if (auto DeclLoc =
437  getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
438  S.CanonicalDeclaration = *DeclLoc;
439 
440  CodeCompletionResult SymbolCompletion(Name);
441  const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
442  *PP, *CompletionAllocator, *CompletionTUInfo);
443  std::string Signature;
444  std::string SnippetSuffix;
445  getSignature(*CCS, &Signature, &SnippetSuffix);
446  S.Signature = Signature;
448 
449  IndexedMacros.insert(Name);
450  setIncludeLocation(S, DefLoc);
451  Symbols.insert(S);
452  return true;
453 }
454 
455 void SymbolCollector::processRelations(
456  const NamedDecl &ND, const SymbolID &ID,
457  ArrayRef<index::SymbolRelation> Relations) {
458  // Store subtype relations.
459  if (!dyn_cast<TagDecl>(&ND))
460  return;
461 
462  for (const auto &R : Relations) {
463  if (!shouldIndexRelation(R))
464  continue;
465 
466  const Decl *Object = R.RelatedSymbol;
467 
468  auto ObjectID = getSymbolID(Object);
469  if (!ObjectID)
470  continue;
471 
472  // Record the relation.
473  // TODO: There may be cases where the object decl is not indexed for some
474  // reason. Those cases should probably be removed in due course, but for
475  // now there are two possible ways to handle it:
476  // (A) Avoid storing the relation in such cases.
477  // (B) Store it anyways. Clients will likely lookup() the SymbolID
478  // in the index and find nothing, but that's a situation they
479  // probably need to handle for other reasons anyways.
480  // We currently do (B) because it's simpler.
481  this->Relations.insert(Relation{ID, RelationKind::BaseOf, *ObjectID});
482  }
483 }
484 
485 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
486  if (Opts.CollectIncludePath)
487  if (shouldCollectIncludePath(S.SymInfo.Kind))
488  // Use the expansion location to get the #include header since this is
489  // where the symbol is exposed.
490  IncludeFiles[S.ID] =
491  PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
492 }
493 
495  // At the end of the TU, add 1 to the refcount of all referenced symbols.
496  auto IncRef = [this](const SymbolID &ID) {
497  if (const auto *S = Symbols.find(ID)) {
498  Symbol Inc = *S;
499  ++Inc.References;
500  Symbols.insert(Inc);
501  }
502  };
503  for (const NamedDecl *ND : ReferencedDecls) {
504  if (auto ID = getSymbolID(ND)) {
505  IncRef(*ID);
506  }
507  }
508  if (Opts.CollectMacro) {
509  assert(PP);
510  // First, drop header guards. We can't identify these until EOF.
511  for (const IdentifierInfo *II : IndexedMacros) {
512  if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
513  if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
514  if (MI->isUsedForHeaderGuard())
515  Symbols.erase(*ID);
516  }
517  // Now increment refcounts.
518  for (const IdentifierInfo *II : ReferencedMacros) {
519  if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
520  if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
521  IncRef(*ID);
522  }
523  }
524  // Fill in IncludeHeaders.
525  // We delay this until end of TU so header guards are all resolved.
526  // Symbols in slabs aren' mutable, so insert() has to walk all the strings
527  // :-(
528  llvm::SmallString<256> QName;
529  for (const auto &Entry : IncludeFiles)
530  if (const Symbol *S = Symbols.find(Entry.first)) {
531  QName = S->Scope;
532  QName.append(S->Name);
533  if (auto Header = getIncludeHeader(QName, Entry.second)) {
534  Symbol NewSym = *S;
535  NewSym.IncludeHeaders.push_back({*Header, 1});
536  Symbols.insert(NewSym);
537  }
538  }
539 
540  const auto &SM = ASTCtx->getSourceManager();
541  llvm::DenseMap<FileID, std::string> URICache;
542  auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
543  auto Found = URICache.find(FID);
544  if (Found == URICache.end()) {
545  if (auto *FileEntry = SM.getFileEntryForID(FID)) {
546  auto FileURI = toURI(SM, FileEntry->getName(), Opts);
547  Found = URICache.insert({FID, FileURI}).first;
548  } else {
549  // Ignore cases where we can not find a corresponding file entry for
550  // given location, e.g. symbols formed via macro concatenation.
551  return None;
552  }
553  }
554  return Found->second;
555  };
556  auto CollectRef =
557  [&](SymbolID ID,
558  const std::pair<SourceLocation, index::SymbolRoleSet> &LocAndRole,
559  bool Spelled = false) {
560  auto FileID = SM.getFileID(LocAndRole.first);
561  // FIXME: use the result to filter out references.
562  shouldIndexFile(FileID);
563  if (auto FileURI = GetURI(FileID)) {
564  auto Range =
565  getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
566  Ref R;
567  R.Location.Start = Range.first;
568  R.Location.End = Range.second;
569  R.Location.FileURI = FileURI->c_str();
570  R.Kind = toRefKind(LocAndRole.second, Spelled);
571  Refs.insert(ID, R);
572  }
573  };
574  // Populate Refs slab from MacroRefs.
575  // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
576  for (const auto &IDAndRefs : MacroRefs)
577  for (const auto &LocAndRole : IDAndRefs.second)
578  CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
579  // Populate Refs slab from DeclRefs.
580  llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
581  for (auto &DeclAndRef : DeclRefs) {
582  if (auto ID = getSymbolID(DeclAndRef.first)) {
583  for (auto &LocAndRole : DeclAndRef.second) {
584  const auto FileID = SM.getFileID(LocAndRole.first);
585  // FIXME: It's better to use TokenBuffer by passing spelled tokens from
586  // the caller of SymbolCollector.
587  if (!FilesToTokensCache.count(FileID))
588  FilesToTokensCache[FileID] =
589  syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
590  llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
591  // Check if the referenced symbol is spelled exactly the same way the
592  // corresponding NamedDecl is. If it is, mark this reference as spelled.
593  const auto *IdentifierToken =
594  spelledIdentifierTouching(LocAndRole.first, Tokens);
595  DeclarationName Name = DeclAndRef.first->getDeclName();
596  const auto NameKind = Name.getNameKind();
597  bool IsTargetKind = NameKind == DeclarationName::Identifier ||
598  NameKind == DeclarationName::CXXConstructorName;
599  bool Spelled = IdentifierToken && IsTargetKind &&
600  Name.getAsString() == IdentifierToken->text(SM);
601  CollectRef(*ID, LocAndRole, Spelled);
602  }
603  }
604  }
605 
606  ReferencedDecls.clear();
607  ReferencedMacros.clear();
608  DeclRefs.clear();
609  FilesToIndexCache.clear();
610  HeaderIsSelfContainedCache.clear();
611  IncludeFiles.clear();
612 }
613 
614 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
615  bool IsMainFileOnly) {
616  auto &Ctx = ND.getASTContext();
617  auto &SM = Ctx.getSourceManager();
618 
619  Symbol S;
620  S.ID = std::move(ID);
621  std::string QName = printQualifiedName(ND);
622  // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
623  // for consistency with CodeCompletionString and a clean name/signature split.
624  std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
625  std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
626  S.TemplateSpecializationArgs = TemplateSpecializationArgs;
627 
628  // We collect main-file symbols, but do not use them for code completion.
629  if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
631  if (isImplementationDetail(&ND))
633  if (!IsMainFileOnly)
635  S.SymInfo = index::getSymbolInfo(&ND);
636  std::string FileURI;
637  auto Loc = nameLocation(ND, SM);
638  assert(Loc.isValid() && "Invalid source location for NamedDecl");
639  // FIXME: use the result to filter out symbols.
640  shouldIndexFile(SM.getFileID(Loc));
641  if (auto DeclLoc =
642  getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
643  S.CanonicalDeclaration = *DeclLoc;
644 
645  S.Origin = Opts.Origin;
646  if (ND.getAvailability() == AR_Deprecated)
648 
649  // Add completion info.
650  // FIXME: we may want to choose a different redecl, or combine from several.
651  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
652  // We use the primary template, as clang does during code completion.
653  CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
654  const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
655  *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
656  *CompletionTUInfo,
657  /*IncludeBriefComments*/ false);
658  std::string Documentation =
659  formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
660  /*CommentsFromHeaders=*/true));
662  if (Opts.StoreAllDocumentation)
663  S.Documentation = Documentation;
664  Symbols.insert(S);
665  return Symbols.find(S.ID);
666  }
667  S.Documentation = Documentation;
668  std::string Signature;
669  std::string SnippetSuffix;
670  getSignature(*CCS, &Signature, &SnippetSuffix);
671  S.Signature = Signature;
673  std::string ReturnType = getReturnType(*CCS);
675 
676  llvm::Optional<OpaqueType> TypeStorage;
678  TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
679  if (TypeStorage)
680  S.Type = TypeStorage->raw();
681  }
682 
683  Symbols.insert(S);
684  setIncludeLocation(S, ND.getLocation());
685  return Symbols.find(S.ID);
686 }
687 
688 void SymbolCollector::addDefinition(const NamedDecl &ND,
689  const Symbol &DeclSym) {
690  if (DeclSym.Definition)
691  return;
692  // If we saw some forward declaration, we end up copying the symbol.
693  // This is not ideal, but avoids duplicating the "is this a definition" check
694  // in clang::index. We should only see one definition.
695  Symbol S = DeclSym;
696  std::string FileURI;
697  const auto &SM = ND.getASTContext().getSourceManager();
698  auto Loc = nameLocation(ND, SM);
699  // FIXME: use the result to filter out symbols.
700  shouldIndexFile(SM.getFileID(Loc));
701  if (auto DefLoc =
702  getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
703  S.Definition = *DefLoc;
704  Symbols.insert(S);
705 }
706 
707 /// Gets a canonical include (URI of the header or <header> or "header") for
708 /// header of \p FID (which should usually be the *expansion* file).
709 /// Returns None if includes should not be inserted for this file.
710 llvm::Optional<std::string>
711 SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) {
712  const SourceManager &SM = ASTCtx->getSourceManager();
713  const FileEntry *FE = SM.getFileEntryForID(FID);
714  if (!FE || FE->getName().empty())
715  return llvm::None;
716  llvm::StringRef Filename = FE->getName();
717  // If a file is mapped by canonical headers, use that mapping, regardless
718  // of whether it's an otherwise-good header (header guards etc).
719  if (Opts.Includes) {
720  llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
721  // If we had a mapping, always use it.
722  if (Canonical.startswith("<") || Canonical.startswith("\""))
723  return Canonical.str();
724  if (Canonical != Filename)
725  return toURI(SM, Canonical, Opts);
726  }
727  if (!isSelfContainedHeader(FID)) {
728  // A .inc or .def file is often included into a real header to define
729  // symbols (e.g. LLVM tablegen files).
730  if (Filename.endswith(".inc") || Filename.endswith(".def"))
731  return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID)));
732  // Conservatively refuse to insert #includes to files without guards.
733  return llvm::None;
734  }
735  // Standard case: just insert the file itself.
736  return toURI(SM, Filename, Opts);
737 }
738 
739 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
740  // The real computation (which will be memoized).
741  auto Compute = [&] {
742  const SourceManager &SM = ASTCtx->getSourceManager();
743  const FileEntry *FE = SM.getFileEntryForID(FID);
744  if (!FE)
745  return false;
746  // FIXME: Should files that have been #import'd be considered
747  // self-contained? That's really a property of the includer,
748  // not of the file.
749  if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
750  !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
751  return false;
752  // This pattern indicates that a header can't be used without
753  // particular preprocessor state, usually set up by another header.
754  if (isDontIncludeMeHeader(SM.getBufferData(FID)))
755  return false;
756  return true;
757  };
758 
759  auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
760  if (R.second)
761  R.first->second = Compute();
762  return R.first->second;
763 }
764 
765 // Is Line an #if or #ifdef directive?
766 static bool isIf(llvm::StringRef Line) {
767  Line = Line.ltrim();
768  if (!Line.consume_front("#"))
769  return false;
770  Line = Line.ltrim();
771  return Line.startswith("if");
772 }
773 // Is Line an #error directive mentioning includes?
774 static bool isErrorAboutInclude(llvm::StringRef Line) {
775  Line = Line.ltrim();
776  if (!Line.consume_front("#"))
777  return false;
778  Line = Line.ltrim();
779  if (!Line.startswith("error"))
780  return false;
781  return Line.contains_lower("includ"); // Matches "include" or "including".
782 }
783 
784 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
785  llvm::StringRef Line;
786  // Only sniff up to 100 lines or 10KB.
787  Content = Content.take_front(100 * 100);
788  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
789  std::tie(Line, Content) = Content.split('\n');
790  if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
791  return true;
792  }
793  return false;
794 }
795 
797  if (!Opts.FileFilter)
798  return true;
799  auto I = FilesToIndexCache.try_emplace(FID);
800  if (I.second)
801  I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
802  return I.first->second;
803 }
804 
805 } // namespace clangd
806 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, index::SymbolRoleSet Roles, SourceLocation Loc) override
bool isProtoFile(SourceLocation Loc, const SourceManager &SM)
Returns true if the given location is in a generated protobuf file.
const FunctionDecl * Decl
std::string printQualifiedName(const NamedDecl &ND)
Returns the qualified name of ND.
Definition: AST.cpp:168
SourceLocation nameLocation(const clang::Decl &D, const SourceManager &SM)
Find the source location of the identifier for D.
Definition: AST.cpp:161
Position start
The range&#39;s start position.
Definition: Protocol.h:175
Represents a relation between two symbols.
Definition: Relation.h:29
llvm::Optional< SymbolID > getSymbolID(const Decl *D)
Gets the symbol ID for a declaration, if possible.
Definition: AST.cpp:285
const Symbol * find(const SymbolID &ID)
Returns the symbol with an ID, if it exists. Valid until insert/remove.
Definition: Symbol.h:212
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
Definition: SymbolInfo.cpp:21
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:489
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:421
Represents a symbol occurrence in the source file.
Definition: Ref.h:87
Symbol is visible to other files (not e.g. a static helper function).
Definition: Symbol.h:125
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:50
def make_absolute(f, directory)
llvm::StringRef Scope
The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
Definition: Symbol.h:44
std::string getDocComment(const ASTContext &Ctx, const CodeCompletionResult &Result, bool CommentsFromHeaders)
Gets a minimally formatted documentation comment of Result, with comment markers stripped.
Documents should not be synced at all.
std::string printTemplateSpecializationArgs(const NamedDecl &ND)
Prints template arguments of a decl as written in the source code, including enclosing &#39;<&#39; and &#39;>&#39;...
Definition: AST.cpp:248
void erase(const SymbolID &ID)
Removes the symbol with an ID, if it exists.
Definition: Symbol.h:209
void initialize(ASTContext &Ctx) override
unsigned References
The number of translation units that reference this symbol from their main file.
Definition: Symbol.h:59
SymbolID ID
The ID of the symbol.
Definition: Symbol.h:38
index::SymbolInfo SymInfo
The symbol information, like symbol kind.
Definition: Symbol.h:40
std::string getReturnType(const CodeCompletionString &CCS)
Gets detail to be used as the detail field in an LSP completion item.
BindArgumentKind Kind
llvm::DenseMap< SymbolID, std::vector< Range > > MacroRefs
Definition: CollectMacros.h:28
Symbol is an implementation detail.
Definition: Symbol.h:123
SymbolLocation Definition
The location of the symbol&#39;s definition, if one was found.
Definition: Symbol.h:47
Context Ctx
std::vector< SymbolDetails > getSymbolInfo(ParsedAST &AST, Position Pos)
Get info about symbols at Pos.
Definition: XRefs.cpp:1123
std::string QName
std::string Filename
Filename as a string.
Whether or not this symbol is meant to be used for the code completion.
Definition: Symbol.h:119
bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx)
llvm::SmallVector< IncludeHeaderWithReferences, 1 > IncludeHeaders
One Symbol can potentially be included via different headers.
Definition: Symbol.h:111
SymbolFlag Flags
Definition: Symbol.h:128
static bool isErrorAboutInclude(llvm::StringRef Line)
llvm::StringRef Signature
A brief description of the symbol that can be appended in the completion candidate list...
Definition: Symbol.h:65
SymbolLocation Location
The source location where the symbol is named.
Definition: Ref.h:89
std::string Signature
llvm::StringRef Documentation
Documentation including comment for the symbol declaration.
Definition: Symbol.h:76
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
static constexpr llvm::StringLiteral Name
SymbolLocation CanonicalDeclaration
The location of the preferred declaration of the symbol.
Definition: Symbol.h:56
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:30
std::string formatDocumentation(const CodeCompletionString &CCS, llvm::StringRef DocComment)
Assembles formatted documentation for a completion result.
std::string ReturnType
Position Pos
Definition: SourceCode.cpp:649
void handleMacros(const MainFileMacros &MacroRefsToIndex)
bool shouldIndexFile(FileID FID)
Returns true if we are interested in references and declarations from FID.
std::string SnippetSuffix
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:220
bool CollectMainFileSymbols
Collect symbols local to main-files, such as static functions and symbols inside an anonymous namespa...
bool handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, ArrayRef< index::SymbolRelation > Relations, SourceLocation Loc, index::IndexDataConsumer::ASTNodeInfo ASTNode) override
int line
Line position in a document (zero-based).
Definition: Protocol.h:146
int character
Character offset on a line in a document (zero-based).
Definition: Protocol.h:151
static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, const Options &Opts, bool IsMainFileSymbol)
Returns true is ND should be collected.
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:196
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
Position Start
The symbol range, using half-open range [Start, End).
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::StringRef Name
The unqualified name of the symbol, e.g. "bar" (for ns::bar).
Definition: Symbol.h:42
CharSourceRange Range
SourceRange for the file name.
llvm::Optional< std::string > getCanonicalPath(const FileEntry *F, const SourceManager &SourceMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:512
void getSignature(const CodeCompletionString &CCS, std::string *Signature, std::string *Snippet, std::string *RequiredQualifiers, bool CompletingPattern)
Formats the signature for an item, as a display string and snippet.
void insert(const SymbolID &ID, const Ref &S)
Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
Definition: Ref.cpp:36
bool isHeaderFile(llvm::StringRef FileName, llvm::Optional< LangOptions > LangOpts)
Infers whether this is a header from the FileName and LangOpts (if presents).
bool isImplementationDetail(const Decl *D)
Returns true if the declaration is considered implementation detail based on heuristics.
Definition: AST.cpp:156
llvm::StringRef CompletionSnippetSuffix
What to insert when completing this symbol, after the symbol name.
Definition: Symbol.h:74
Indicates if the symbol is deprecated.
Definition: Symbol.h:121
static bool isIf(llvm::StringRef Line)
Position end
The range&#39;s end position.
Definition: Protocol.h:178
llvm::StringRef Type
Raw representation of the OpaqueType of the symbol, used for scoring purposes.
Definition: Symbol.h:85
SymbolOrigin Origin
Where this symbol came from. Usually an index provides a constant value.
Definition: Symbol.h:61
llvm::StringRef TemplateSpecializationArgs
Argument list in human-readable format, will be displayed to help disambiguate between different spec...
Definition: Symbol.h:69
llvm::StringRef ReturnType
Type when this symbol is used in an expression.
Definition: Symbol.h:80
RefKind Kind
Definition: Ref.h:90
static llvm::Optional< OpaqueType > fromCompletionResult(ASTContext &Ctx, const CodeCompletionResult &R)
Create a type from a code completion result.