clang-tools  14.0.0git
FileDistance.cpp
Go to the documentation of this file.
1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The FileDistance structure allows calculating the minimum distance to paths
10 // in a single tree.
11 // We simply walk up the path's ancestors until we find a node whose cost is
12 // known, and add the cost of walking back down. Initialization ensures this
13 // gives the correct path to the roots.
14 // We cache the results, so that the runtime is O(|A|), where A is the set of
15 // all distinct ancestors of visited paths.
16 //
17 // Example after initialization with /=2, /bar=0, DownCost = 1:
18 // / = 2
19 // /bar = 0
20 //
21 // After querying /foo/bar and /bar/foo:
22 // / = 2
23 // /bar = 0
24 // /bar/foo = 1
25 // /foo = 3
26 // /foo/bar = 4
27 //
28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
29 // practice this is a small constant factor.
30 //
31 //===-------------------------------------------------------------------------//
32 
33 #include "FileDistance.h"
34 #include "support/Logger.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include <queue>
37 
38 namespace clang {
39 namespace clangd {
40 
41 // Convert a path into the canonical form.
42 // Canonical form is either "/", or "/segment" * N:
43 // C:\foo\bar --> /c:/foo/bar
44 // /foo/ --> /foo
45 // a/b/c --> /a/b/c
46 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
47  llvm::SmallString<128> Result = Path.rtrim('/');
48  native(Result, llvm::sys::path::Style::posix);
49  if (Result.empty() || Result.front() != '/')
50  Result.insert(Result.begin(), '/');
51  return Result;
52 }
53 
54 constexpr const unsigned FileDistance::Unreachable;
55 const llvm::hash_code FileDistance::RootHash =
56  llvm::hash_value(llvm::StringRef("/"));
57 
58 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
59  const FileDistanceOptions &Opts)
60  : Opts(Opts) {
61  llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges;
62  // Compute the best distance following only up edges.
63  // Keep track of down edges, in case we can use them to improve on this.
64  for (const auto &S : Sources) {
65  auto Canonical = canonicalize(S.getKey());
66  dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
67  S.second.MaxUpTraversals);
68  // Walk up to ancestors of this source, assigning cost.
69  llvm::StringRef Rest = Canonical;
70  llvm::hash_code Hash = llvm::hash_value(Rest);
71  for (unsigned I = 0; !Rest.empty(); ++I) {
72  Rest = parent_path(Rest, llvm::sys::path::Style::posix);
73  auto NextHash = llvm::hash_value(Rest);
74  auto &Down = DownEdges[NextHash];
75  if (!llvm::is_contained(Down, Hash))
76  Down.push_back(Hash);
77  // We can't just break after MaxUpTraversals, must still set DownEdges.
78  if (I > S.getValue().MaxUpTraversals) {
79  if (Cache.find(Hash) != Cache.end())
80  break;
81  } else {
82  unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
83  auto R = Cache.try_emplace(Hash, Cost);
84  if (!R.second) {
85  if (Cost < R.first->second) {
86  R.first->second = Cost;
87  } else {
88  // If we're not the best way to get to this path, stop assigning.
89  break;
90  }
91  }
92  }
93  Hash = NextHash;
94  }
95  }
96  // Now propagate scores parent -> child if that's an improvement.
97  // BFS ensures we propagate down chains (must visit parents before children).
98  std::queue<llvm::hash_code> Next;
99  for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
100  Next.push(Child);
101  while (!Next.empty()) {
102  auto Parent = Next.front();
103  Next.pop();
104  auto ParentCost = Cache.lookup(Parent);
105  for (auto Child : DownEdges.lookup(Parent)) {
106  if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
107  auto &ChildCost =
108  Cache.try_emplace(Child, Unreachable).first->getSecond();
109  if (ParentCost + Opts.DownCost < ChildCost)
110  ChildCost = ParentCost + Opts.DownCost;
111  }
112  Next.push(Child);
113  }
114  }
115 }
116 
117 unsigned FileDistance::distance(llvm::StringRef Path) {
118  auto Canonical = canonicalize(Path);
119  unsigned Cost = Unreachable;
120  llvm::SmallVector<llvm::hash_code> Ancestors;
121  // Walk up ancestors until we find a path we know the distance for.
122  for (llvm::StringRef Rest = Canonical; !Rest.empty();
123  Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
124  auto Hash = llvm::hash_value(Rest);
125  if (Hash == RootHash && !Ancestors.empty() &&
127  Cost = Unreachable;
128  break;
129  }
130  auto It = Cache.find(Hash);
131  if (It != Cache.end()) {
132  Cost = It->second;
133  break;
134  }
135  Ancestors.push_back(Hash);
136  }
137  // Now we know the costs for (known node, queried node].
138  // Fill these in, walking down the directory tree.
139  for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
140  if (Cost != Unreachable)
141  Cost += Opts.DownCost;
142  Cache.try_emplace(Hash, Cost);
143  }
144  dlog("distance({0} = {1})", Path, Cost);
145  return Cost;
146 }
147 
148 unsigned URIDistance::distance(llvm::StringRef URI) {
149  auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
150  if (!R.second)
151  return R.first->getSecond();
152  if (auto U = clangd::URI::parse(URI)) {
153  dlog("distance({0} = {1})", URI, U->body());
154  R.first->second = forScheme(U->scheme()).distance(U->body());
155  } else {
156  log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
157  }
158  return R.first->second;
159 }
160 
161 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
162  auto &Delegate = ByScheme[Scheme];
163  if (!Delegate) {
164  llvm::StringMap<SourceParams> SchemeSources;
165  for (const auto &Source : Sources) {
166  if (auto U = clangd::URI::create(Source.getKey(), Scheme))
167  SchemeSources.try_emplace(U->body(), Source.getValue());
168  else
169  llvm::consumeError(U.takeError());
170  }
171  dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
172  SchemeSources.size(), Sources.size());
173  Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
174  }
175  return *Delegate;
176 }
177 
178 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
179  llvm::SmallVector<llvm::StringRef> Split;
180  Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
181  return {"/" + llvm::join(Split, "/"), Split.size()};
182 }
183 
184 static FileDistance
185 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
186  FileDistanceOptions Opts;
187  Opts.UpCost = 2;
188  Opts.DownCost = 4;
189  Opts.AllowDownTraversalFromRoot = false;
190 
191  llvm::StringMap<SourceParams> Sources;
192  llvm::StringRef Preferred =
193  QueryScopes.empty() ? "" : QueryScopes.front().c_str();
194  for (llvm::StringRef S : QueryScopes) {
195  SourceParams Param;
196  // Penalize the global scope even it's preferred, as all projects can define
197  // symbols in it, and there is pattern where using-namespace is used in
198  // place of enclosing namespaces (e.g. in implementation files).
199  if (S == Preferred)
200  Param.Cost = S == "" ? 4 : 0;
201  else if (Preferred.startswith(S) && !S.empty())
202  continue; // just rely on up-traversals.
203  else
204  Param.Cost = S == "" ? 6 : 2;
205  auto Path = scopeToPath(S);
206  // The global namespace is not 'near' its children.
207  Param.MaxUpTraversals = std::max(Path.second - 1, 0);
208  Sources[Path.first] = std::move(Param);
209  }
210  return FileDistance(std::move(Sources), Opts);
211 }
212 
213 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
214  : Distance(createScopeFileDistance(QueryScopes)) {}
215 
216 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
217  return Distance.distance(scopeToPath(SymbolScope).first);
218 }
219 
220 } // namespace clangd
221 } // namespace clang
dlog
#define dlog(...)
Definition: Logger.h:102
clang::clangd::FileDistanceOptions
Definition: FileDistance.h:56
clang::clangd::URI::body
llvm::StringRef body() const
Returns decoded body e.g. "/D41946".
Definition: URI.h:37
clang::clangd::Path
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
clang::clangd::URI::create
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:211
clang::tidy::cppcoreguidelines::join
static std::string join(ArrayRef< SpecialMemberFunctionsCheck::SpecialMemberFunctionKind > SMFS, llvm::StringRef AndOr)
Definition: SpecialMemberFunctionsCheck.cpp:78
clang::clangd::scopeToPath
static std::pair< std::string, int > scopeToPath(llvm::StringRef Scope)
Definition: FileDistance.cpp:178
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:179
clang::clangd::FileDistance::distance
unsigned distance(llvm::StringRef Path)
Definition: FileDistance.cpp:117
clang::clangd::canonicalize
static llvm::SmallString< 128 > canonicalize(llvm::StringRef Path)
Definition: FileDistance.cpp:46
FileDistance.h
clang::clangd::hash_value
llvm::hash_code hash_value(const SymbolID &ID)
Definition: SymbolID.cpp:49
clang::clangd::SourceParams::MaxUpTraversals
unsigned MaxUpTraversals
Definition: FileDistance.h:67
Logger.h
clang::clangd::ScopeDistance::ScopeDistance
ScopeDistance(llvm::ArrayRef< std::string > QueryScopes)
QueryScopes[0] is the preferred scope.
Definition: FileDistance.cpp:213
clang::clangd::log
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:68
Parent
const Node * Parent
Definition: ExtractFunction.cpp:152
clang::clangd::FileDistance::FileDistance
FileDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
Definition: FileDistance.cpp:58
clang::clangd::FileDistance::RootHash
static const llvm::hash_code RootHash
Definition: FileDistance.h:75
clang::clangd::FileDistanceOptions::DownCost
unsigned DownCost
Definition: FileDistance.h:58
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::SourceParams::Cost
unsigned Cost
Definition: FileDistance.h:65
clang::clangd::SourceParams
Definition: FileDistance.h:63
clang::clangd::FileDistanceOptions::AllowDownTraversalFromRoot
bool AllowDownTraversalFromRoot
Definition: FileDistance.h:60
clang::clangd::createScopeFileDistance
static FileDistance createScopeFileDistance(llvm::ArrayRef< std::string > QueryScopes)
Definition: FileDistance.cpp:185
clang::clangd::URIDistance::distance
unsigned distance(llvm::StringRef URI)
Definition: FileDistance.cpp:148
clang::clangd::URI
A URI describes the location of a source file.
Definition: URI.h:28
clang::clangd::FileDistance
Definition: FileDistance.h:72
clang::clangd::ScopeDistance::distance
unsigned distance(llvm::StringRef SymbolScope)
Definition: FileDistance.cpp:216
clang::clangd::FileDistance::Unreachable
static constexpr unsigned Unreachable
Definition: FileDistance.h:74