clang-tools  14.0.0git
Quality.h
Go to the documentation of this file.
1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// Some operations such as code completion produce a set of candidates.
10 /// Usually the user can choose between them, but we should put the best options
11 /// at the top (they're easier to select, and more likely to be seen).
12 ///
13 /// This file defines building blocks for ranking candidates.
14 /// It's used by the features directly and also in the implementation of
15 /// indexes, as indexes also need to heuristically limit their results.
16 ///
17 /// The facilities here are:
18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
19 /// These are structured in a way that they can be debugged, and are fairly
20 /// consistent regardless of the source.
21 /// - compute scores from scoring signals. These are suitable for sorting.
22 /// - sorting utilities like the TopN container.
23 /// These could be split up further to isolate dependencies if we care.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29 
30 #include "ExpectedTypes.h"
31 #include "FileDistance.h"
32 #include "TUScheduler.h"
33 #include "clang/Sema/CodeCompleteConsumer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSet.h"
37 #include <algorithm>
38 #include <functional>
39 #include <vector>
40 
41 namespace llvm {
42 class raw_ostream;
43 } // namespace llvm
44 
45 namespace clang {
46 class CodeCompletionResult;
47 
48 namespace clangd {
49 
50 struct Symbol;
51 class URIDistance;
52 
53 // Signals structs are designed to be aggregated from 0 or more sources.
54 // A default instance has neutral signals, and sources are merged into it.
55 // They can be dumped for debugging, and evaluate()d into a score.
56 
57 /// Attributes of a symbol that affect how much we like it.
59  bool Deprecated = false;
60  bool ReservedName = false; // __foo, _Foo are usually implementation details.
61  // FIXME: make these findable once user types _.
62  bool ImplementationDetail = false;
63  unsigned References = 0;
64 
66  Unknown = 0,
76  } Category = Unknown;
77 
78  void merge(const CodeCompletionResult &SemaCCResult);
79  void merge(const Symbol &IndexResult);
80 
81  // Condense these signals down to a single number, higher is better.
82  float evaluateHeuristics() const;
83 };
84 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
85  const SymbolQualitySignals &);
86 
87 /// Attributes of a symbol-query pair that affect how much we like it.
89  /// The name of the symbol (for ContextWords). Must be explicitly assigned.
90  llvm::StringRef Name;
91  /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
92  float NameMatch = 1;
93  /// Lowercase words relevant to the context (e.g. near the completion point).
94  llvm::StringSet<>* ContextWords = nullptr;
95  bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
96  /// Whether fixits needs to be applied for that completion or not.
97  bool NeedsFixIts = false;
98  bool InBaseClass = false; // A member from base class of the accessed class.
99 
101  /// These are used to calculate proximity between the index symbol and the
102  /// query.
103  llvm::StringRef SymbolURI;
104  /// FIXME: unify with index proximity score - signals should be
105  /// source-independent.
106  /// Proximity between best declaration and the query. [0-1], 1 is closest.
108 
109  // Scope proximity is only considered (both index and sema) when this is set.
111  llvm::Optional<llvm::StringRef> SymbolScope;
112  // A symbol from sema should be accessible from the current scope.
113  bool SemaSaysInScope = false;
114 
115  // An approximate measure of where we expect the symbol to be used.
121  } Scope = GlobalScope;
122 
123  enum QueryType {
126  } Query = Generic;
127 
128  CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
129 
130  // Whether symbol is an instance member of a class.
131  bool IsInstanceMember = false;
132 
133  // Whether clang provided a preferred type in the completion context.
134  bool HadContextType = false;
135  // Whether a source completion item or a symbol had a type information.
136  bool HadSymbolType = false;
137  // Whether the item matches the type expected in the completion context.
138  bool TypeMatchesPreferred = false;
139 
140  /// Length of the unqualified partial name of Symbol typed in
141  /// CompletionPrefix.
142  unsigned FilterLength = 0;
143 
144  const ASTSignals *MainFileSignals = nullptr;
145  /// Number of references to the candidate in the main file.
146  unsigned MainFileRefs = 0;
147  /// Number of unique symbols in the main file which belongs to candidate's
148  /// namespace. This indicates how relevant the namespace is in the current
149  /// file.
150  unsigned ScopeRefsInFile = 0;
151 
152  /// Set of derived signals computed by calculateDerivedSignals(). Must not be
153  /// set explicitly.
154  struct DerivedSignals {
155  /// Whether Name contains some word from context.
156  bool NameMatchesContext = false;
157  /// Min distance between SymbolURI and all the headers included by the TU.
159  /// Min distance between SymbolScope and all the available scopes.
161  };
162 
164 
165  void merge(const CodeCompletionResult &SemaResult);
166  void merge(const Symbol &IndexResult);
167  void computeASTSignals(const CodeCompletionResult &SemaResult);
168 
169  // Condense these signals down to a single number, higher is better.
170  float evaluateHeuristics() const;
171 };
172 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
173  const SymbolRelevanceSignals &);
174 
175 /// Combine symbol quality and relevance into a single score.
176 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
177 
178 /// Same semantics as CodeComplete::Score. Quality score and Relevance score
179 /// have been removed since DecisionForest cannot assign individual scores to
180 /// Quality and Relevance signals.
182  float Total = 0.f;
183  float ExcludingName = 0.f;
184 };
185 
188  const SymbolRelevanceSignals &Relevance, float Base);
189 
190 /// TopN<T> is a lossy container that preserves only the "best" N elements.
191 template <typename T, typename Compare = std::greater<T>> class TopN {
192 public:
193  using value_type = T;
194  TopN(size_t N, Compare Greater = Compare())
195  : N(N), Greater(std::move(Greater)) {}
196 
197  // Adds a candidate to the set.
198  // Returns true if a candidate was dropped to get back under N.
199  bool push(value_type &&V) {
200  bool Dropped = false;
201  if (Heap.size() >= N) {
202  Dropped = true;
203  if (N > 0 && Greater(V, Heap.front())) {
204  std::pop_heap(Heap.begin(), Heap.end(), Greater);
205  Heap.back() = std::move(V);
206  std::push_heap(Heap.begin(), Heap.end(), Greater);
207  }
208  } else {
209  Heap.push_back(std::move(V));
210  std::push_heap(Heap.begin(), Heap.end(), Greater);
211  }
212  assert(Heap.size() <= N);
213  assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
214  return Dropped;
215  }
216 
217  // Returns candidates from best to worst.
218  std::vector<value_type> items() && {
219  std::sort_heap(Heap.begin(), Heap.end(), Greater);
220  assert(Heap.size() <= N);
221  return std::move(Heap);
222  }
223 
224 private:
225  const size_t N;
226  std::vector<value_type> Heap; // Min-heap, comparator is Greater.
227  Compare Greater;
228 };
229 
230 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
231 /// LSP. (The highest score compares smallest so it sorts at the top).
232 std::string sortText(float Score, llvm::StringRef Tiebreak = "");
233 
235  uint32_t NumberOfParameters = 0;
237  CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
238  CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
239 };
240 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
241  const SignatureQualitySignals &);
242 
243 } // namespace clangd
244 } // namespace clang
245 
246 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
clang::clangd::SymbolRelevanceSignals::merge
void merge(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:331
clang::clangd::SymbolQualitySignals::Deprecated
bool Deprecated
Definition: Quality.h:59
clang::clangd::SymbolQualitySignals::ImplementationDetail
bool ImplementationDetail
Definition: Quality.h:62
Base
std::unique_ptr< GlobalCompilationDatabase > Base
Definition: GlobalCompilationDatabaseTests.cpp:90
llvm
Some operations such as code completion produce a set of candidates.
Definition: YAMLGenerator.cpp:28
clang::clangd::SymbolRelevanceSignals::Scope
enum clang::clangd::SymbolRelevanceSignals::AccessibleScope Scope
clang::clangd::SymbolRelevanceSignals::Query
enum clang::clangd::SymbolRelevanceSignals::QueryType Query
clang::clangd::TopN::items
std::vector< value_type > items() &&
Definition: Quality.h:218
clang::clangd::SymbolRelevanceSignals::HadSymbolType
bool HadSymbolType
Definition: Quality.h:136
clang::clangd::SymbolRelevanceSignals::FilterLength
unsigned FilterLength
Length of the unqualified partial name of Symbol typed in CompletionPrefix.
Definition: Quality.h:142
clang::clangd::SignatureQualitySignals
Definition: Quality.h:234
IndexResult
const Symbol * IndexResult
Definition: CodeComplete.cpp:172
clang::clangd::evaluateSymbolAndRelevance
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance)
Combine symbol quality and relevance into a single score.
Definition: Quality.cpp:536
clang::clangd::SymbolRelevanceSignals::SymbolURI
llvm::StringRef SymbolURI
These are used to calculate proximity between the index symbol and the query.
Definition: Quality.h:103
clang::clangd::TopN::TopN
TopN(size_t N, Compare Greater=Compare())
Definition: Quality.h:194
clang::clangd::SymbolQualitySignals::Variable
@ Variable
Definition: Quality.h:67
clang::clangd::DecisionForestScores
Same semantics as CodeComplete::Score.
Definition: Quality.h:181
clang::clangd::SymbolQualitySignals::Unknown
@ Unknown
Definition: Quality.h:66
clang::clangd::SymbolRelevanceSignals::FunctionScope
@ FunctionScope
Definition: Quality.h:117
Kind
BindArgumentKind Kind
Definition: AvoidBindCheck.cpp:59
clang::clangd::SymbolRelevanceSignals::DerivedSignals::FileProximityDistance
unsigned FileProximityDistance
Min distance between SymbolURI and all the headers included by the TU.
Definition: Quality.h:158
clang::clangd::SymbolRelevanceSignals::evaluateHeuristics
float evaluateHeuristics() const
Definition: Quality.cpp:401
clang::clangd::SymbolQualitySignals::ReservedName
bool ReservedName
Definition: Quality.h:60
clang::clangd::DecisionForestScores::ExcludingName
float ExcludingName
Definition: Quality.h:183
clang::clangd::ScopeDistance
Support lookups like FileDistance, but the lookup keys are symbol scopes.
Definition: FileDistance.h:117
clang::clangd::SymbolQualitySignals
Attributes of a symbol that affect how much we like it.
Definition: Quality.h:58
clang::clangd::SymbolRelevanceSignals::computeASTSignals
void computeASTSignals(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:307
clang::clangd::SymbolRelevanceSignals::ScopeRefsInFile
unsigned ScopeRefsInFile
Number of unique symbols in the main file which belongs to candidate's namespace.
Definition: Quality.h:150
clang::clangd::SymbolRelevanceSignals::Forbidden
bool Forbidden
Definition: Quality.h:95
clang::clangd::SignatureQualitySignals::NumberOfOptionalParameters
uint32_t NumberOfOptionalParameters
Definition: Quality.h:236
clang::clangd::SymbolRelevanceSignals::AccessibleScope
AccessibleScope
Definition: Quality.h:116
clang::clangd::TopN::push
bool push(value_type &&V)
Definition: Quality.h:199
FileDistance.h
clang::clangd::SymbolQualitySignals::Constructor
@ Constructor
Definition: Quality.h:71
clang::clangd::TopN
TopN<T> is a lossy container that preserves only the "best" N elements.
Definition: Quality.h:191
clang::clangd::SymbolQualitySignals::References
unsigned References
Definition: Quality.h:63
Quality
SignatureQualitySignals Quality
Definition: CodeComplete.cpp:878
clang::clangd::TopN::value_type
T value_type
Definition: Quality.h:193
clang::clangd::SymbolQualitySignals::Destructor
@ Destructor
Definition: Quality.h:72
SemaResult
const CodeCompletionResult * SemaResult
Definition: CodeComplete.cpp:171
clang::clangd::Symbol
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
clang::clangd::SymbolRelevanceSignals::HadContextType
bool HadContextType
Definition: Quality.h:134
clang::clangd::SignatureQualitySignals::NumberOfParameters
uint32_t NumberOfParameters
Definition: Quality.h:235
ExpectedTypes.h
clang::clangd::evaluateDecisionForest
DecisionForestScores evaluateDecisionForest(const SymbolQualitySignals &Quality, const SymbolRelevanceSignals &Relevance, float Base)
Definition: Quality.cpp:541
clang::clangd::SymbolRelevanceSignals::SemaFileProximityScore
float SemaFileProximityScore
FIXME: unify with index proximity score - signals should be source-independent.
Definition: Quality.h:107
clang::clangd::SymbolQualitySignals::Function
@ Function
Definition: Quality.h:70
clang::clangd::SymbolRelevanceSignals::FileProximityMatch
URIDistance * FileProximityMatch
Definition: Quality.h:100
clang::clangd::operator<<
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Definition: CodeComplete.cpp:2126
clang::clangd::SymbolRelevanceSignals::TypeMatchesPreferred
bool TypeMatchesPreferred
Definition: Quality.h:138
clang::clangd::SymbolRelevanceSignals::ScopeProximityMatch
ScopeDistance * ScopeProximityMatch
Definition: Quality.h:110
clang::clangd::SymbolRelevanceSignals::Name
llvm::StringRef Name
The name of the symbol (for ContextWords). Must be explicitly assigned.
Definition: Quality.h:90
clang::clangd::SymbolRelevanceSignals::DerivedSignals
Set of derived signals computed by calculateDerivedSignals().
Definition: Quality.h:154
clang::clangd::URIDistance
Definition: FileDistance.h:93
clang::clangd::SymbolRelevanceSignals::DerivedSignals::NameMatchesContext
bool NameMatchesContext
Whether Name contains some word from context.
Definition: Quality.h:156
clang::clangd::SymbolQualitySignals::Namespace
@ Namespace
Definition: Quality.h:73
clang::clangd::ASTSignals
Signals derived from a valid AST of a file.
Definition: ASTSignals.h:26
clang::clangd::SymbolRelevanceSignals::IsInstanceMember
bool IsInstanceMember
Definition: Quality.h:131
clang::clangd::SymbolRelevanceSignals::MainFileSignals
const ASTSignals * MainFileSignals
Definition: Quality.h:144
clang::clangd::SymbolRelevanceSignals::SemaSaysInScope
bool SemaSaysInScope
Definition: Quality.h:113
Score
llvm::Optional< float > Score
Definition: FuzzyMatchTests.cpp:48
clang::clangd::SymbolQualitySignals::Keyword
@ Keyword
Definition: Quality.h:74
clang::clangd::SymbolRelevanceSignals::NeedsFixIts
bool NeedsFixIts
Whether fixits needs to be applied for that completion or not.
Definition: Quality.h:97
clang::clangd::sortText
std::string sortText(float Score, llvm::StringRef Name)
Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
Definition: Quality.cpp:613
clang::clangd::SymbolQualitySignals::Macro
@ Macro
Definition: Quality.h:68
clang::clangd::SymbolRelevanceSignals::MainFileRefs
unsigned MainFileRefs
Number of references to the candidate in the main file.
Definition: Quality.h:146
clang::clangd::SymbolRelevanceSignals::ClassScope
@ ClassScope
Definition: Quality.h:118
clang::clangd::SymbolQualitySignals::Operator
@ Operator
Definition: Quality.h:75
clang::clangd::SymbolRelevanceSignals::calculateDerivedSignals
DerivedSignals calculateDerivedSignals() const
Definition: Quality.cpp:387
clang::clangd::SymbolQualitySignals::Type
@ Type
Definition: Quality.h:69
clang::clangd::SymbolRelevanceSignals::SymbolScope
llvm::Optional< llvm::StringRef > SymbolScope
Definition: Quality.h:111
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
TUScheduler.h
clang::clangd::SymbolQualitySignals::SymbolCategory
SymbolCategory
Definition: Quality.h:65
clang::clangd::SymbolQualitySignals::evaluateHeuristics
float evaluateHeuristics() const
Definition: Quality.cpp:204
clang::clangd::SymbolRelevanceSignals::ContextWords
llvm::StringSet * ContextWords
Lowercase words relevant to the context (e.g. near the completion point).
Definition: Quality.h:94
clang::clangd::SignatureQualitySignals::Kind
CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind
Definition: Quality.h:237
clang::clangd::SymbolQualitySignals::Category
enum clang::clangd::SymbolQualitySignals::SymbolCategory Category
clang::clangd::SymbolRelevanceSignals::DerivedSignals::ScopeProximityDistance
unsigned ScopeProximityDistance
Min distance between SymbolScope and all the available scopes.
Definition: Quality.h:160
clang::clangd::SymbolRelevanceSignals::Generic
@ Generic
Definition: Quality.h:125
clang::clangd::SymbolRelevanceSignals::NameMatch
float NameMatch
0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Definition: Quality.h:92
clang::clangd::DecisionForestScores::Total
float Total
Definition: Quality.h:182
clang::clangd::SymbolRelevanceSignals::CodeComplete
@ CodeComplete
Definition: Quality.h:124
clang::clangd::SymbolRelevanceSignals::InBaseClass
bool InBaseClass
Definition: Quality.h:98
clang::clangd::SymbolRelevanceSignals::GlobalScope
@ GlobalScope
Definition: Quality.h:120
clang::clangd::SymbolRelevanceSignals
Attributes of a symbol-query pair that affect how much we like it.
Definition: Quality.h:88
clang::clangd::Context
A context is an immutable container for per-request data that must be propagated through layers that ...
Definition: Context.h:69
clang::clangd::SymbolRelevanceSignals::QueryType
QueryType
Definition: Quality.h:123
clang::clangd::FileDistance::Unreachable
static constexpr unsigned Unreachable
Definition: FileDistance.h:74
clang::clangd::SymbolRelevanceSignals::FileScope
@ FileScope
Definition: Quality.h:119
clang::clangd::SymbolQualitySignals::merge
void merge(const CodeCompletionResult &SemaCCResult)
Definition: Quality.cpp:184