clang-tools  14.0.0git
URI.cpp
Go to the documentation of this file.
1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "URI.h"
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/FormatVariadic.h"
16 #include "llvm/Support/Path.h"
17 #include <algorithm>
18 
19 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
20 
21 namespace clang {
22 namespace clangd {
23 namespace {
24 
25 bool isWindowsPath(llvm::StringRef Path) {
26  return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':';
27 }
28 
29 bool isNetworkPath(llvm::StringRef Path) {
30  return Path.size() > 2 && Path[0] == Path[1] &&
31  llvm::sys::path::is_separator(Path[0]);
32 }
33 
34 /// This manages file paths in the file system. All paths in the scheme
35 /// are absolute (with leading '/').
36 /// Note that this scheme is hardcoded into the library and not registered in
37 /// registry.
38 class FileSystemScheme : public URIScheme {
39 public:
40  llvm::Expected<std::string>
41  getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
42  llvm::StringRef /*HintPath*/) const override {
43  if (!Body.startswith("/"))
44  return error("File scheme: expect body to be an absolute path starting "
45  "with '/': {0}",
46  Body);
47  llvm::SmallString<128> Path;
48  if (!Authority.empty()) {
49  // Windows UNC paths e.g. file://server/share => \\server\share
50  ("//" + Authority).toVector(Path);
51  } else if (isWindowsPath(Body.substr(1))) {
52  // Windows paths e.g. file:///X:/path => X:\path
53  Body.consume_front("/");
54  }
55  Path.append(Body);
56  llvm::sys::path::native(Path);
57  return std::string(Path);
58  }
59 
60  llvm::Expected<URI>
61  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
62  std::string Body;
63  llvm::StringRef Authority;
64  llvm::StringRef Root = llvm::sys::path::root_name(AbsolutePath);
65  if (isNetworkPath(Root)) {
66  // Windows UNC paths e.g. \\server\share => file://server/share
67  Authority = Root.drop_front(2);
68  AbsolutePath.consume_front(Root);
69  } else if (isWindowsPath(Root)) {
70  // Windows paths e.g. X:\path => file:///X:/path
71  Body = "/";
72  }
73  Body += llvm::sys::path::convert_to_slash(AbsolutePath);
74  return URI("file", Authority, Body);
75  }
76 };
77 
78 llvm::Expected<std::unique_ptr<URIScheme>>
79 findSchemeByName(llvm::StringRef Scheme) {
80  if (Scheme == "file")
81  return std::make_unique<FileSystemScheme>();
82 
83  for (const auto &URIScheme : URISchemeRegistry::entries()) {
84  if (URIScheme.getName() != Scheme)
85  continue;
86  return URIScheme.instantiate();
87  }
88  return error("Can't find scheme: {0}", Scheme);
89 }
90 
91 bool shouldEscape(unsigned char C) {
92  // Unreserved characters.
93  if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
94  (C >= '0' && C <= '9'))
95  return false;
96  switch (C) {
97  case '-':
98  case '_':
99  case '.':
100  case '~':
101  case '/': // '/' is only reserved when parsing.
102  // ':' is only reserved for relative URI paths, which clangd doesn't produce.
103  case ':':
104  return false;
105  }
106  return true;
107 }
108 
109 /// Encodes a string according to percent-encoding.
110 /// - Unreserved characters are not escaped.
111 /// - Reserved characters always escaped with exceptions like '/'.
112 /// - All other characters are escaped.
113 void percentEncode(llvm::StringRef Content, std::string &Out) {
114  for (unsigned char C : Content)
115  if (shouldEscape(C)) {
116  Out.push_back('%');
117  Out.push_back(llvm::hexdigit(C / 16));
118  Out.push_back(llvm::hexdigit(C % 16));
119  } else {
120  Out.push_back(C);
121  }
122 }
123 
124 /// Decodes a string according to percent-encoding.
125 std::string percentDecode(llvm::StringRef Content) {
126  std::string Result;
127  for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
128  if (*I != '%') {
129  Result += *I;
130  continue;
131  }
132  if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
133  llvm::isHexDigit(*(I + 2))) {
134  Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
135  I += 2;
136  } else
137  Result.push_back(*I);
138  }
139  return Result;
140 }
141 
142 bool isValidScheme(llvm::StringRef Scheme) {
143  if (Scheme.empty())
144  return false;
145  if (!llvm::isAlpha(Scheme[0]))
146  return false;
147  return std::all_of(Scheme.begin() + 1, Scheme.end(), [](char C) {
148  return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
149  });
150 }
151 
152 } // namespace
153 
154 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
155  llvm::StringRef Body)
156  : Scheme(Scheme), Authority(Authority), Body(Body) {
157  assert(!Scheme.empty());
158  assert((Authority.empty() || Body.startswith("/")) &&
159  "URI body must start with '/' when authority is present.");
160 }
161 
162 std::string URI::toString() const {
163  std::string Result;
164  percentEncode(Scheme, Result);
165  Result.push_back(':');
166  if (Authority.empty() && Body.empty())
167  return Result;
168  // If authority if empty, we only print body if it starts with "/"; otherwise,
169  // the URI is invalid.
170  if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
171  {
172  Result.append("//");
173  percentEncode(Authority, Result);
174  }
175  percentEncode(Body, Result);
176  return Result;
177 }
178 
179 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
180  URI U;
181  llvm::StringRef Uri = OrigUri;
182 
183  auto Pos = Uri.find(':');
184  if (Pos == llvm::StringRef::npos)
185  return error("Scheme must be provided in URI: {0}", OrigUri);
186  auto SchemeStr = Uri.substr(0, Pos);
187  U.Scheme = percentDecode(SchemeStr);
188  if (!isValidScheme(U.Scheme))
189  return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme);
190  Uri = Uri.substr(Pos + 1);
191  if (Uri.consume_front("//")) {
192  Pos = Uri.find('/');
193  U.Authority = percentDecode(Uri.substr(0, Pos));
194  Uri = Uri.substr(Pos);
195  }
196  U.Body = percentDecode(Uri);
197  return U;
198 }
199 
200 llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
201  llvm::StringRef HintPath) {
202  auto Uri = URI::parse(FileURI);
203  if (!Uri)
204  return Uri.takeError();
205  auto Path = URI::resolve(*Uri, HintPath);
206  if (!Path)
207  return Path.takeError();
208  return *Path;
209 }
210 
211 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
212  llvm::StringRef Scheme) {
213  if (!llvm::sys::path::is_absolute(AbsolutePath))
214  return error("Not a valid absolute path: {0}", AbsolutePath);
215  auto S = findSchemeByName(Scheme);
216  if (!S)
217  return S.takeError();
218  return S->get()->uriFromAbsolutePath(AbsolutePath);
219 }
220 
221 URI URI::create(llvm::StringRef AbsolutePath) {
222  if (!llvm::sys::path::is_absolute(AbsolutePath))
223  llvm_unreachable(
224  ("Not a valid absolute path: " + AbsolutePath).str().c_str());
225  for (auto &Entry : URISchemeRegistry::entries()) {
226  auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
227  // For some paths, conversion to different URI schemes is impossible. These
228  // should be just skipped.
229  if (!URI) {
230  // Ignore the error.
231  llvm::consumeError(URI.takeError());
232  continue;
233  }
234  return std::move(*URI);
235  }
236  // Fallback to file: scheme which should work for any paths.
237  return URI::createFile(AbsolutePath);
238 }
239 
240 URI URI::createFile(llvm::StringRef AbsolutePath) {
241  auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
242  if (!U)
243  llvm_unreachable(llvm::toString(U.takeError()).c_str());
244  return std::move(*U);
245 }
246 
247 llvm::Expected<std::string> URI::resolve(const URI &Uri,
248  llvm::StringRef HintPath) {
249  auto S = findSchemeByName(Uri.Scheme);
250  if (!S)
251  return S.takeError();
252  return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
253 }
254 
255 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
256  llvm::StringRef HintPath) {
257  if (!llvm::sys::path::is_absolute(AbsPath))
258  llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
259  for (auto &Entry : URISchemeRegistry::entries()) {
260  auto S = Entry.instantiate();
261  auto U = S->uriFromAbsolutePath(AbsPath);
262  // For some paths, conversion to different URI schemes is impossible. These
263  // should be just skipped.
264  if (!U) {
265  // Ignore the error.
266  llvm::consumeError(U.takeError());
267  continue;
268  }
269  return S->getAbsolutePath(U->Authority, U->Body, HintPath);
270  }
271  // Fallback to file: scheme which doesn't do any canonicalization.
272  return std::string(AbsPath);
273 }
274 
275 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
276  auto S = findSchemeByName(Uri.Scheme);
277  if (!S)
278  return S.takeError();
279  return S->get()->getIncludeSpelling(Uri);
280 }
281 
282 } // namespace clangd
283 } // namespace clang
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::Path
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
clang::clangd::error
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:80
Root
ASTNode Root
Definition: DumpAST.cpp:332
clang::clangd::URI::create
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:211
clang::clangd::URI::resolvePath
static llvm::Expected< std::string > resolvePath(llvm::StringRef AbsPath, llvm::StringRef HintPath="")
Resolves AbsPath into a canonical path of its URI, by converting AbsPath to URI and resolving the URI...
Definition: URI.cpp:255
clang::clangd::URISchemeRegistry
llvm::Registry< URIScheme > URISchemeRegistry
By default, a "file" scheme is supported where URI paths are always absolute in the file system.
Definition: URI.h:131
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:179
clang::clangd::URI::resolve
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition: URI.cpp:247
Logger.h
clang::clangd::URI::includeSpelling
static llvm::Expected< std::string > includeSpelling(const URI &U)
Gets the preferred spelling of this file for #include, if there is one, e.g.
Definition: URI.cpp:275
Entry
Definition: Modularize.cpp:428
clang::clangd::URI::toString
std::string toString() const
Returns a string URI with all components percent-encoded.
Definition: URI.cpp:162
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:55
Pos
Position Pos
Definition: SourceCode.cpp:657
URI.h
clang::clangd::URI::createFile
static URI createFile(llvm::StringRef AbsolutePath)
This creates a file:// URI for AbsolutePath. The path must be absolute.
Definition: URI.cpp:240
Out
CompiledFragmentImpl & Out
Definition: ConfigCompile.cpp:100
clang::clangd::URI
A URI describes the location of a source file.
Definition: URI.h:28