clang-tools 20.0.0git
Quality.h
Go to the documentation of this file.
1//===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// Some operations such as code completion produce a set of candidates.
10/// Usually the user can choose between them, but we should put the best options
11/// at the top (they're easier to select, and more likely to be seen).
12///
13/// This file defines building blocks for ranking candidates.
14/// It's used by the features directly and also in the implementation of
15/// indexes, as indexes also need to heuristically limit their results.
16///
17/// The facilities here are:
18/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
19/// These are structured in a way that they can be debugged, and are fairly
20/// consistent regardless of the source.
21/// - compute scores from scoring signals. These are suitable for sorting.
22/// - sorting utilities like the TopN container.
23/// These could be split up further to isolate dependencies if we care.
24///
25//===----------------------------------------------------------------------===//
26
27#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29
30#include "FileDistance.h"
31#include "clang/Sema/CodeCompleteConsumer.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/StringSet.h"
34#include <algorithm>
35#include <functional>
36#include <optional>
37#include <vector>
38
39namespace llvm {
40class raw_ostream;
41} // namespace llvm
42
43namespace clang {
44class CodeCompletionResult;
45
46namespace clangd {
47struct ASTSignals;
48struct Symbol;
49class URIDistance;
50
51// Signals structs are designed to be aggregated from 0 or more sources.
52// A default instance has neutral signals, and sources are merged into it.
53// They can be dumped for debugging, and evaluate()d into a score.
54
55/// Attributes of a symbol that affect how much we like it.
57 bool Deprecated = false;
58 bool ReservedName = false; // __foo, _Foo are usually implementation details.
59 // FIXME: make these findable once user types _.
61 unsigned References = 0;
62
75
76 void merge(const CodeCompletionResult &SemaCCResult);
77 void merge(const Symbol &IndexResult);
78
79 // Condense these signals down to a single number, higher is better.
80 float evaluateHeuristics() const;
81};
82llvm::raw_ostream &operator<<(llvm::raw_ostream &,
83 const SymbolQualitySignals &);
84
85/// Attributes of a symbol-query pair that affect how much we like it.
87 /// The name of the symbol (for ContextWords). Must be explicitly assigned.
88 llvm::StringRef Name;
89 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
90 float NameMatch = 1;
91 /// Lowercase words relevant to the context (e.g. near the completion point).
92 llvm::StringSet<>* ContextWords = nullptr;
93 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
94 /// Whether fixits needs to be applied for that completion or not.
95 bool NeedsFixIts = false;
96 bool InBaseClass = false; // A member from base class of the accessed class.
97
99 /// These are used to calculate proximity between the index symbol and the
100 /// query.
101 llvm::StringRef SymbolURI;
102 /// FIXME: unify with index proximity score - signals should be
103 /// source-independent.
104 /// Proximity between best declaration and the query. [0-1], 1 is closest.
106
107 // Scope proximity is only considered (both index and sema) when this is set.
109 std::optional<llvm::StringRef> SymbolScope;
110 // A symbol from sema should be accessible from the current scope.
111 bool SemaSaysInScope = false;
112
113 // An approximate measure of where we expect the symbol to be used.
120
125
126 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
127
128 // Whether symbol is an instance member of a class.
129 bool IsInstanceMember = false;
130
131 // Whether clang provided a preferred type in the completion context.
132 bool HadContextType = false;
133 // Whether a source completion item or a symbol had a type information.
134 bool HadSymbolType = false;
135 // Whether the item matches the type expected in the completion context.
137
138 /// Length of the unqualified partial name of Symbol typed in
139 /// CompletionPrefix.
140 unsigned FilterLength = 0;
141
142 const ASTSignals *MainFileSignals = nullptr;
143 /// Number of references to the candidate in the main file.
144 unsigned MainFileRefs = 0;
145 /// Number of unique symbols in the main file which belongs to candidate's
146 /// namespace. This indicates how relevant the namespace is in the current
147 /// file.
148 unsigned ScopeRefsInFile = 0;
149
150 /// Set of derived signals computed by calculateDerivedSignals(). Must not be
151 /// set explicitly.
153 /// Whether Name contains some word from context.
154 bool NameMatchesContext = false;
155 /// Min distance between SymbolURI and all the headers included by the TU.
157 /// Min distance between SymbolScope and all the available scopes.
159 };
160
162
163 void merge(const CodeCompletionResult &SemaResult);
164 void merge(const Symbol &IndexResult);
165 void computeASTSignals(const CodeCompletionResult &SemaResult);
166
167 // Condense these signals down to a single number, higher is better.
168 float evaluateHeuristics() const;
169};
170llvm::raw_ostream &operator<<(llvm::raw_ostream &,
171 const SymbolRelevanceSignals &);
172
173/// Combine symbol quality and relevance into a single score.
174float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
175
176/// Same semantics as CodeComplete::Score. Quality score and Relevance score
177/// have been removed since DecisionForest cannot assign individual scores to
178/// Quality and Relevance signals.
180 float Total = 0.f;
181 float ExcludingName = 0.f;
182};
183
186 const SymbolRelevanceSignals &Relevance, float Base);
187
188/// TopN<T> is a lossy container that preserves only the "best" N elements.
189template <typename T, typename Compare = std::greater<T>> class TopN {
190public:
191 using value_type = T;
192 TopN(size_t N, Compare Greater = Compare())
193 : N(N), Greater(std::move(Greater)) {}
194
195 // Adds a candidate to the set.
196 // Returns true if a candidate was dropped to get back under N.
197 bool push(value_type &&V) {
198 bool Dropped = false;
199 if (Heap.size() >= N) {
200 Dropped = true;
201 if (N > 0 && Greater(V, Heap.front())) {
202 std::pop_heap(Heap.begin(), Heap.end(), Greater);
203 Heap.back() = std::move(V);
204 std::push_heap(Heap.begin(), Heap.end(), Greater);
205 }
206 } else {
207 Heap.push_back(std::move(V));
208 std::push_heap(Heap.begin(), Heap.end(), Greater);
209 }
210 assert(Heap.size() <= N);
211 assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
212 return Dropped;
213 }
214
215 // Returns candidates from best to worst.
216 std::vector<value_type> items() && {
217 std::sort_heap(Heap.begin(), Heap.end(), Greater);
218 assert(Heap.size() <= N);
219 return std::move(Heap);
220 }
221
222private:
223 const size_t N;
224 std::vector<value_type> Heap; // Min-heap, comparator is Greater.
225 Compare Greater;
226};
227
228/// Returns a string that sorts in the same order as (-Score, Tiebreak), for
229/// LSP. (The highest score compares smallest so it sorts at the top).
230std::string sortText(float Score, llvm::StringRef Tiebreak = "");
231
233 uint32_t NumberOfParameters = 0;
235 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
236 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
237};
238llvm::raw_ostream &operator<<(llvm::raw_ostream &,
240
241} // namespace clangd
242} // namespace clang
243
244#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
const Symbol * IndexResult
SignatureQualitySignals Quality
const CodeCompletionResult * SemaResult
std::optional< float > Score
A context is an immutable container for per-request data that must be propagated through layers that ...
Definition: Context.h:69
static constexpr unsigned Unreachable
Definition: FileDistance.h:69
Support lookups like FileDistance, but the lookup keys are symbol scopes.
Definition: FileDistance.h:112
TopN<T> is a lossy container that preserves only the "best" N elements.
Definition: Quality.h:189
std::vector< value_type > items() &&
Definition: Quality.h:216
TopN(size_t N, Compare Greater=Compare())
Definition: Quality.h:192
bool push(value_type &&V)
Definition: Quality.h:197
std::string sortText(float Score, llvm::StringRef Name)
Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
Definition: Quality.cpp:549
DecisionForestScores evaluateDecisionForest(const SymbolQualitySignals &Quality, const SymbolRelevanceSignals &Relevance, float Base)
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance)
Combine symbol quality and relevance into a single score.
Definition: Quality.cpp:531
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Some operations such as code completion produce a set of candidates.
Signals derived from a valid AST of a file.
Definition: ASTSignals.h:27
Same semantics as CodeComplete::Score.
Definition: Quality.h:179
CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind
Definition: Quality.h:235
Attributes of a symbol that affect how much we like it.
Definition: Quality.h:56
enum clang::clangd::SymbolQualitySignals::SymbolCategory Category
void merge(const CodeCompletionResult &SemaCCResult)
Definition: Quality.cpp:175
Set of derived signals computed by calculateDerivedSignals().
Definition: Quality.h:152
bool NameMatchesContext
Whether Name contains some word from context.
Definition: Quality.h:154
unsigned FileProximityDistance
Min distance between SymbolURI and all the headers included by the TU.
Definition: Quality.h:156
unsigned ScopeProximityDistance
Min distance between SymbolScope and all the available scopes.
Definition: Quality.h:158
Attributes of a symbol-query pair that affect how much we like it.
Definition: Quality.h:86
float NameMatch
0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Definition: Quality.h:90
bool NeedsFixIts
Whether fixits needs to be applied for that completion or not.
Definition: Quality.h:95
DerivedSignals calculateDerivedSignals() const
Definition: Quality.cpp:382
unsigned MainFileRefs
Number of references to the candidate in the main file.
Definition: Quality.h:144
llvm::StringRef Name
The name of the symbol (for ContextWords). Must be explicitly assigned.
Definition: Quality.h:88
void computeASTSignals(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:300
llvm::StringRef SymbolURI
These are used to calculate proximity between the index symbol and the query.
Definition: Quality.h:101
unsigned ScopeRefsInFile
Number of unique symbols in the main file which belongs to candidate's namespace.
Definition: Quality.h:148
const ASTSignals * MainFileSignals
Definition: Quality.h:142
llvm::StringSet * ContextWords
Lowercase words relevant to the context (e.g. near the completion point).
Definition: Quality.h:92
unsigned FilterLength
Length of the unqualified partial name of Symbol typed in CompletionPrefix.
Definition: Quality.h:140
void merge(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:326
float SemaFileProximityScore
FIXME: unify with index proximity score - signals should be source-independent.
Definition: Quality.h:105
enum clang::clangd::SymbolRelevanceSignals::QueryType Query
enum clang::clangd::SymbolRelevanceSignals::AccessibleScope Scope
std::optional< llvm::StringRef > SymbolScope
Definition: Quality.h:109
The class presents a C++ symbol, e.g.
Definition: Symbol.h:39