clang-tools  15.0.0git
SymbolIndexManager.cpp
Go to the documentation of this file.
1 //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolIndexManager.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Debug.h"
15 #include "llvm/Support/Path.h"
16 
17 #define DEBUG_TYPE "clang-include-fixer"
18 
19 namespace clang {
20 namespace include_fixer {
21 
23 using find_all_symbols::SymbolAndSignals;
24 
25 // Calculate a score based on whether we think the given header is closely
26 // related to the given source file.
27 static double similarityScore(llvm::StringRef FileName,
28  llvm::StringRef Header) {
29  // Compute the maximum number of common path segments between Header and
30  // a suffix of FileName.
31  // We do not do a full longest common substring computation, as Header
32  // specifies the path we would directly #include, so we assume it is rooted
33  // relatively to a subproject of the repository.
34  int MaxSegments = 1;
35  for (auto FileI = llvm::sys::path::begin(FileName),
36  FileE = llvm::sys::path::end(FileName);
37  FileI != FileE; ++FileI) {
38  int Segments = 0;
39  for (auto HeaderI = llvm::sys::path::begin(Header),
40  HeaderE = llvm::sys::path::end(Header), I = FileI;
41  HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
42  ++Segments;
43  }
44  MaxSegments = std::max(Segments, MaxSegments);
45  }
46  return MaxSegments;
47 }
48 
49 static void rank(std::vector<SymbolAndSignals> &Symbols,
50  llvm::StringRef FileName) {
51  llvm::StringMap<double> Score;
52  for (const auto &Symbol : Symbols) {
53  // Calculate a score from the similarity of the header the symbol is in
54  // with the current file and the popularity of the symbol.
55  double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
56  (1.0 + std::log2(1 + Symbol.Signals.Seen));
57  double &S = Score[Symbol.Symbol.getFilePath()];
58  S = std::max(S, NewScore);
59  }
60  // Sort by the gathered scores. Use file name as a tie breaker so we can
61  // deduplicate.
62  llvm::sort(Symbols.begin(), Symbols.end(),
63  [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
64  auto AS = Score[A.Symbol.getFilePath()];
65  auto BS = Score[B.Symbol.getFilePath()];
66  if (AS != BS)
67  return AS > BS;
68  return A.Symbol.getFilePath() < B.Symbol.getFilePath();
69  });
70 }
71 
72 std::vector<find_all_symbols::SymbolInfo>
73 SymbolIndexManager::search(llvm::StringRef Identifier,
74  bool IsNestedSearch,
75  llvm::StringRef FileName) const {
76  // The identifier may be fully qualified, so split it and get all the context
77  // names.
78  llvm::SmallVector<llvm::StringRef, 8> Names;
79  Identifier.split(Names, "::");
80 
81  bool IsFullyQualified = false;
82  if (Identifier.startswith("::")) {
83  Names.erase(Names.begin()); // Drop first (empty) element.
84  IsFullyQualified = true;
85  }
86 
87  // As long as we don't find a result keep stripping name parts from the end.
88  // This is to support nested classes which aren't recorded in the database.
89  // Eventually we will either hit a class (namespaces aren't in the database
90  // either) and can report that result.
91  bool TookPrefix = false;
92  std::vector<SymbolAndSignals> MatchedSymbols;
93  do {
94  std::vector<SymbolAndSignals> Symbols;
95  for (const auto &DB : SymbolIndices) {
96  auto Res = DB.get()->search(Names.back());
97  Symbols.insert(Symbols.end(), Res.begin(), Res.end());
98  }
99 
100  LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
101  << Symbols.size() << " results...\n");
102 
103  for (auto &SymAndSig : Symbols) {
104  const SymbolInfo &Symbol = SymAndSig.Symbol;
105  // Match the identifier name without qualifier.
106  bool IsMatched = true;
107  auto SymbolContext = Symbol.getContexts().begin();
108  auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
109  // Match the remaining context names.
110  while (IdentiferContext != Names.rend() &&
111  SymbolContext != Symbol.getContexts().end()) {
112  if (SymbolContext->second == *IdentiferContext) {
113  ++IdentiferContext;
114  ++SymbolContext;
115  } else if (SymbolContext->first ==
117  // Skip non-scoped enum context.
118  ++SymbolContext;
119  } else {
120  IsMatched = false;
121  break;
122  }
123  }
124 
125  // If the name was qualified we only want to add results if we evaluated
126  // all contexts.
127  if (IsFullyQualified)
128  IsMatched &= (SymbolContext == Symbol.getContexts().end());
129 
130  // FIXME: Support full match. At this point, we only find symbols in
131  // database which end with the same contexts with the identifier.
132  if (IsMatched && IdentiferContext == Names.rend()) {
133  // If we're in a situation where we took a prefix but the thing we
134  // found couldn't possibly have a nested member ignore it.
135  if (TookPrefix &&
136  (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
137  Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
138  Symbol.getSymbolKind() ==
140  Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
141  continue;
142 
143  MatchedSymbols.push_back(std::move(SymAndSig));
144  }
145  }
146  Names.pop_back();
147  TookPrefix = true;
148  } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
149 
150  rank(MatchedSymbols, FileName);
151  // Strip signals, they are no longer needed.
152  std::vector<SymbolInfo> Res;
153  Res.reserve(MatchedSymbols.size());
154  for (auto &SymAndSig : MatchedSymbols)
155  Res.push_back(std::move(SymAndSig.Symbol));
156  return Res;
157 }
158 
159 } // namespace include_fixer
160 } // namespace clang
clang::find_all_symbols::SymbolAndSignals
Definition: SymbolInfo.h:126
clang::find_all_symbols::SymbolInfo
Describes a named symbol from a header.
Definition: SymbolInfo.h:27
clang::find_all_symbols::SymbolInfo::SymbolKind::EnumConstantDecl
@ EnumConstantDecl
clang::find_all_symbols::SymbolInfo::SymbolKind::Variable
@ Variable
clang::include_fixer::similarityScore
static double similarityScore(llvm::StringRef FileName, llvm::StringRef Header)
Definition: SymbolIndexManager.cpp:27
SymbolIndexManager.h
ns1::ns2::A
@ A
Definition: CategoricalFeature.h:3
clang::find_all_symbols::SymbolInfo::SymbolKind::Function
@ Function
clang::include_fixer::rank
static void rank(std::vector< SymbolAndSignals > &Symbols, llvm::StringRef FileName)
Definition: SymbolIndexManager.cpp:49
clang::include_fixer::SymbolIndexManager::search
std::vector< find_all_symbols::SymbolInfo > search(llvm::StringRef Identifier, bool IsNestedSearch=true, llvm::StringRef FileName="") const
Search for header files to be included for an identifier.
Definition: SymbolIndexManager.cpp:73
clang::find_all_symbols::SymbolInfo::SymbolKind::Macro
@ Macro
FileName
StringRef FileName
Definition: KernelNameRestrictionCheck.cpp:46
Score
llvm::Optional< float > Score
Definition: FuzzyMatchTests.cpp:47
SymbolInfo.h
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
SymbolInfo
clang::find_all_symbols::SymbolInfo SymbolInfo
Definition: FindAllSymbolsMain.cpp:38
clang::find_all_symbols::SymbolInfo::ContextType::EnumDecl
@ EnumDecl
ns1::ns2::B
@ B
Definition: CategoricalFeature.h:3