clang-tools 17.0.0git
FileDistance.cpp
Go to the documentation of this file.
1//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The FileDistance structure allows calculating the minimum distance to paths
10// in a single tree.
11// We simply walk up the path's ancestors until we find a node whose cost is
12// known, and add the cost of walking back down. Initialization ensures this
13// gives the correct path to the roots.
14// We cache the results, so that the runtime is O(|A|), where A is the set of
15// all distinct ancestors of visited paths.
16//
17// Example after initialization with /=2, /bar=0, DownCost = 1:
18// / = 2
19// /bar = 0
20//
21// After querying /foo/bar and /bar/foo:
22// / = 2
23// /bar = 0
24// /bar/foo = 1
25// /foo = 3
26// /foo/bar = 4
27//
28// URIDistance creates FileDistance lazily for each URI scheme encountered. In
29// practice this is a small constant factor.
30//
31//===-------------------------------------------------------------------------//
32
33#include "FileDistance.h"
34#include "URI.h"
35#include "support/Logger.h"
36#include "llvm/ADT/STLExtras.h"
37#include "llvm/Support/Path.h"
38#include <queue>
39
40namespace clang {
41namespace clangd {
42
43// Convert a path into the canonical form.
44// Canonical form is either "/", or "/segment" * N:
45// C:\foo\bar --> /c:/foo/bar
46// /foo/ --> /foo
47// a/b/c --> /a/b/c
48static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
49 llvm::SmallString<128> Result = Path.rtrim('/');
50 native(Result, llvm::sys::path::Style::posix);
51 if (Result.empty() || Result.front() != '/')
52 Result.insert(Result.begin(), '/');
53 return Result;
54}
55
56constexpr const unsigned FileDistance::Unreachable;
57const llvm::hash_code FileDistance::RootHash =
58 llvm::hash_value(llvm::StringRef("/"));
59
60FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
61 const FileDistanceOptions &Opts)
62 : Opts(Opts) {
63 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges;
64 // Compute the best distance following only up edges.
65 // Keep track of down edges, in case we can use them to improve on this.
66 for (const auto &S : Sources) {
67 auto Canonical = canonicalize(S.getKey());
68 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
69 S.second.MaxUpTraversals);
70 // Walk up to ancestors of this source, assigning cost.
71 llvm::StringRef Rest = Canonical;
72 llvm::hash_code Hash = llvm::hash_value(Rest);
73 for (unsigned I = 0; !Rest.empty(); ++I) {
74 Rest = parent_path(Rest, llvm::sys::path::Style::posix);
75 auto NextHash = llvm::hash_value(Rest);
76 auto &Down = DownEdges[NextHash];
77 if (!llvm::is_contained(Down, Hash))
78 Down.push_back(Hash);
79 // We can't just break after MaxUpTraversals, must still set DownEdges.
80 if (I > S.getValue().MaxUpTraversals) {
81 if (Cache.contains(Hash))
82 break;
83 } else {
84 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
85 auto R = Cache.try_emplace(Hash, Cost);
86 if (!R.second) {
87 if (Cost < R.first->second) {
88 R.first->second = Cost;
89 } else {
90 // If we're not the best way to get to this path, stop assigning.
91 break;
92 }
93 }
94 }
95 Hash = NextHash;
96 }
97 }
98 // Now propagate scores parent -> child if that's an improvement.
99 // BFS ensures we propagate down chains (must visit parents before children).
100 std::queue<llvm::hash_code> Next;
101 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
102 Next.push(Child);
103 while (!Next.empty()) {
104 auto Parent = Next.front();
105 Next.pop();
106 auto ParentCost = Cache.lookup(Parent);
107 for (auto Child : DownEdges.lookup(Parent)) {
108 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
109 auto &ChildCost =
110 Cache.try_emplace(Child, Unreachable).first->getSecond();
111 if (ParentCost + Opts.DownCost < ChildCost)
112 ChildCost = ParentCost + Opts.DownCost;
113 }
114 Next.push(Child);
115 }
116 }
117}
118
119unsigned FileDistance::distance(llvm::StringRef Path) {
120 auto Canonical = canonicalize(Path);
121 unsigned Cost = Unreachable;
122 llvm::SmallVector<llvm::hash_code> Ancestors;
123 // Walk up ancestors until we find a path we know the distance for.
124 for (llvm::StringRef Rest = Canonical; !Rest.empty();
125 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
126 auto Hash = llvm::hash_value(Rest);
127 if (Hash == RootHash && !Ancestors.empty() &&
129 Cost = Unreachable;
130 break;
131 }
132 auto It = Cache.find(Hash);
133 if (It != Cache.end()) {
134 Cost = It->second;
135 break;
136 }
137 Ancestors.push_back(Hash);
138 }
139 // Now we know the costs for (known node, queried node].
140 // Fill these in, walking down the directory tree.
141 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
142 if (Cost != Unreachable)
143 Cost += Opts.DownCost;
144 Cache.try_emplace(Hash, Cost);
145 }
146 dlog("distance({0} = {1})", Path, Cost);
147 return Cost;
148}
149
150unsigned URIDistance::distance(llvm::StringRef URI) {
151 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
152 if (!R.second)
153 return R.first->getSecond();
154 if (auto U = clangd::URI::parse(URI)) {
155 dlog("distance({0} = {1})", URI, U->body());
156 R.first->second = forScheme(U->scheme()).distance(U->body());
157 } else {
158 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
159 }
160 return R.first->second;
161}
162
163FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
164 auto &Delegate = ByScheme[Scheme];
165 if (!Delegate) {
166 llvm::StringMap<SourceParams> SchemeSources;
167 for (const auto &Source : Sources) {
168 if (auto U = clangd::URI::create(Source.getKey(), Scheme))
169 SchemeSources.try_emplace(U->body(), Source.getValue());
170 else
171 llvm::consumeError(U.takeError());
172 }
173 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
174 SchemeSources.size(), Sources.size());
175 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
176 }
177 return *Delegate;
178}
179
180static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
181 llvm::SmallVector<llvm::StringRef> Split;
182 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
183 return {"/" + llvm::join(Split, "/"), Split.size()};
184}
185
186static FileDistance
187createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
189 Opts.UpCost = 2;
190 Opts.DownCost = 4;
191 Opts.AllowDownTraversalFromRoot = false;
192
193 llvm::StringMap<SourceParams> Sources;
194 llvm::StringRef Preferred =
195 QueryScopes.empty() ? "" : QueryScopes.front().c_str();
196 for (llvm::StringRef S : QueryScopes) {
197 SourceParams Param;
198 // Penalize the global scope even it's preferred, as all projects can define
199 // symbols in it, and there is pattern where using-namespace is used in
200 // place of enclosing namespaces (e.g. in implementation files).
201 if (S == Preferred)
202 Param.Cost = S == "" ? 4 : 0;
203 else if (Preferred.startswith(S) && !S.empty())
204 continue; // just rely on up-traversals.
205 else
206 Param.Cost = S == "" ? 6 : 2;
207 auto Path = scopeToPath(S);
208 // The global namespace is not 'near' its children.
209 Param.MaxUpTraversals = std::max(Path.second - 1, 0);
210 Sources[Path.first] = std::move(Param);
211 }
212 return FileDistance(std::move(Sources), Opts);
213}
214
215ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
217
218unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
219 return Distance.distance(scopeToPath(SymbolScope).first);
220}
221
222} // namespace clangd
223} // namespace clang
std::vector< std::string > QueryScopes
const Node * Parent
#define dlog(...)
Definition: Logger.h:101
static constexpr unsigned Unreachable
Definition: FileDistance.h:69
FileDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
unsigned distance(llvm::StringRef Path)
static const llvm::hash_code RootHash
Definition: FileDistance.h:70
unsigned distance(llvm::StringRef SymbolScope)
ScopeDistance(llvm::ArrayRef< std::string > QueryScopes)
QueryScopes[0] is the preferred scope.
unsigned distance(llvm::StringRef URI)
A URI describes the location of a source file.
Definition: URI.h:28
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:209
llvm::StringRef body() const
Returns decoded body e.g. "/D41946".
Definition: URI.h:37
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:177
static FileDistance createScopeFileDistance(llvm::ArrayRef< std::string > QueryScopes)
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
static llvm::SmallString< 128 > canonicalize(llvm::StringRef Path)
static std::pair< std::string, int > scopeToPath(llvm::StringRef Scope)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//