clang-tools  14.0.0git
FileDistance.h
Go to the documentation of this file.
1 //===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This library measures the distance between file paths.
10 // It's used for ranking symbols, e.g. in code completion.
11 // |foo/bar.h -> foo/bar.h| = 0.
12 // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
13 // This is an edit-distance, where edits go up or down the directory tree.
14 // It's not symmetrical, the costs of going up and down may not match.
15 //
16 // Dealing with multiple sources:
17 // In practice we care about the distance from a source file, but files near
18 // its main-header and #included files are considered "close".
19 // So we start with a set of (anchor, cost) pairs, and call the distance to a
20 // path the minimum of `cost + |source -> path|`.
21 //
22 // We allow each source to limit the number of up-traversals paths may start
23 // with. Up-traversals may reach things that are not "semantically near".
24 //
25 // Symbol URI schemes:
26 // Symbol locations may be represented by URIs rather than file paths directly.
27 // In this case we want to perform distance computations in URI space rather
28 // than in file-space, without performing redundant conversions.
29 // Therefore we have a lookup structure that accepts URIs, so that intermediate
30 // calculations for the same scheme can be reused.
31 //
32 // Caveats:
33 // Assuming up and down traversals each have uniform costs is simplistic.
34 // Often there are "semantic roots" whose children are almost unrelated.
35 // (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
40 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
41 
42 #include "URI.h"
43 #include "llvm/ADT/DenseMap.h"
44 #include "llvm/ADT/DenseMapInfo.h"
45 #include "llvm/ADT/SmallString.h"
46 #include "llvm/ADT/StringMap.h"
47 #include "llvm/ADT/StringRef.h"
48 #include "llvm/Support/Allocator.h"
49 #include "llvm/Support/Path.h"
50 #include "llvm/Support/StringSaver.h"
51 #include <memory>
52 
53 namespace clang {
54 namespace clangd {
55 
57  unsigned UpCost = 2; // |foo/bar.h -> foo|
58  unsigned DownCost = 1; // |foo -> foo/bar.h|
59  unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
60  bool AllowDownTraversalFromRoot = true; // | / -> /a |
61 };
62 
63 struct SourceParams {
64  // Base cost for paths starting at this source.
65  unsigned Cost = 0;
66  // Limits the number of upwards traversals allowed from this source.
67  unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
68 };
69 
70 // Supports lookups to find the minimum distance to a file from any source.
71 // This object should be reused, it memoizes intermediate computations.
72 class FileDistance {
73 public:
74  static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
75  static const llvm::hash_code RootHash;
76 
77  FileDistance(llvm::StringMap<SourceParams> Sources,
78  const FileDistanceOptions &Opts = {});
79 
80  // Computes the minimum distance from any source to the file path.
81  unsigned distance(llvm::StringRef Path);
82 
83 private:
84  // Costs computed so far. Always contains sources and their ancestors.
85  // We store hash codes only. Collisions are rare and consequences aren't dire.
86  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
88 };
89 
90 // Supports lookups like FileDistance, but the lookup keys are URIs.
91 // We convert each of the sources to the scheme of the URI and do a FileDistance
92 // comparison on the bodies.
93 class URIDistance {
94 public:
95  // \p Sources must contain absolute paths, not URIs.
96  URIDistance(llvm::StringMap<SourceParams> Sources,
97  const FileDistanceOptions &Opts = {})
98  : Sources(Sources), Opts(Opts) {}
99 
100  // Computes the minimum distance from any source to the URI.
101  // Only sources that can be mapped into the URI's scheme are considered.
102  unsigned distance(llvm::StringRef URI);
103 
104 private:
105  // Returns the FileDistance for a URI scheme, creating it if needed.
106  FileDistance &forScheme(llvm::StringRef Scheme);
107 
108  // We cache the results using the original strings so we can skip URI parsing.
109  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
110  llvm::StringMap<SourceParams> Sources;
111  llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
112  FileDistanceOptions Opts;
113 };
114 
115 /// Support lookups like FileDistance, but the lookup keys are symbol scopes.
116 /// For example, a scope "na::nb::" is converted to "/na/nb".
118 public:
119  /// QueryScopes[0] is the preferred scope.
120  ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);
121 
122  unsigned distance(llvm::StringRef SymbolScope);
123 
124 private:
125  FileDistance Distance;
126 };
127 
128 } // namespace clangd
129 } // namespace clang
130 
131 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
clang::clangd::FileDistanceOptions
Definition: FileDistance.h:56
clang::clangd::Path
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
clang::clangd::URIDistance::URIDistance
URIDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
Definition: FileDistance.h:96
clang::clangd::FileDistanceOptions::UpCost
unsigned UpCost
Definition: FileDistance.h:57
clang::clangd::ScopeDistance
Support lookups like FileDistance, but the lookup keys are symbol scopes.
Definition: FileDistance.h:117
clang::clangd::FileDistance::distance
unsigned distance(llvm::StringRef Path)
Definition: FileDistance.cpp:117
clang::clangd::FileDistanceOptions::IncludeCost
unsigned IncludeCost
Definition: FileDistance.h:59
clang::clangd::SourceParams::MaxUpTraversals
unsigned MaxUpTraversals
Definition: FileDistance.h:67
clang::clangd::ScopeDistance::ScopeDistance
ScopeDistance(llvm::ArrayRef< std::string > QueryScopes)
QueryScopes[0] is the preferred scope.
Definition: FileDistance.cpp:213
clang::clangd::FileDistance::FileDistance
FileDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
Definition: FileDistance.cpp:58
clang::clangd::URIDistance
Definition: FileDistance.h:93
clang::clangd::FileDistance::RootHash
static const llvm::hash_code RootHash
Definition: FileDistance.h:75
clang::clangd::FileDistanceOptions::DownCost
unsigned DownCost
Definition: FileDistance.h:58
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::SourceParams::Cost
unsigned Cost
Definition: FileDistance.h:65
clang::clangd::SourceParams
Definition: FileDistance.h:63
clang::clangd::FileDistanceOptions::AllowDownTraversalFromRoot
bool AllowDownTraversalFromRoot
Definition: FileDistance.h:60
URI.h
clang::clangd::URIDistance::distance
unsigned distance(llvm::StringRef URI)
Definition: FileDistance.cpp:148
clang::clangd::FileDistance
Definition: FileDistance.h:72
clang::clangd::ScopeDistance::distance
unsigned distance(llvm::StringRef SymbolScope)
Definition: FileDistance.cpp:216
clang::clangd::FileDistance::Unreachable
static constexpr unsigned Unreachable
Definition: FileDistance.h:74