clang-tools  16.0.0git
URI.cpp
Go to the documentation of this file.
1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "URI.h"
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Path.h"
15 #include <algorithm>
16 
17 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
18 
19 namespace clang {
20 namespace clangd {
21 namespace {
22 
23 bool isWindowsPath(llvm::StringRef Path) {
24  return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':';
25 }
26 
27 bool isNetworkPath(llvm::StringRef Path) {
28  return Path.size() > 2 && Path[0] == Path[1] &&
29  llvm::sys::path::is_separator(Path[0]);
30 }
31 
32 /// This manages file paths in the file system. All paths in the scheme
33 /// are absolute (with leading '/').
34 /// Note that this scheme is hardcoded into the library and not registered in
35 /// registry.
36 class FileSystemScheme : public URIScheme {
37 public:
38  llvm::Expected<std::string>
39  getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
40  llvm::StringRef /*HintPath*/) const override {
41  if (!Body.startswith("/"))
42  return error("File scheme: expect body to be an absolute path starting "
43  "with '/': {0}",
44  Body);
45  llvm::SmallString<128> Path;
46  if (!Authority.empty()) {
47  // Windows UNC paths e.g. file://server/share => \\server\share
48  ("//" + Authority).toVector(Path);
49  } else if (isWindowsPath(Body.substr(1))) {
50  // Windows paths e.g. file:///X:/path => X:\path
51  Body.consume_front("/");
52  }
53  Path.append(Body);
54  llvm::sys::path::native(Path);
55  return std::string(Path);
56  }
57 
58  llvm::Expected<URI>
59  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
60  std::string Body;
61  llvm::StringRef Authority;
62  llvm::StringRef Root = llvm::sys::path::root_name(AbsolutePath);
63  if (isNetworkPath(Root)) {
64  // Windows UNC paths e.g. \\server\share => file://server/share
65  Authority = Root.drop_front(2);
66  AbsolutePath.consume_front(Root);
67  } else if (isWindowsPath(Root)) {
68  // Windows paths e.g. X:\path => file:///X:/path
69  Body = "/";
70  }
71  Body += llvm::sys::path::convert_to_slash(AbsolutePath);
72  return URI("file", Authority, Body);
73  }
74 };
75 
76 llvm::Expected<std::unique_ptr<URIScheme>>
77 findSchemeByName(llvm::StringRef Scheme) {
78  if (Scheme == "file")
79  return std::make_unique<FileSystemScheme>();
80 
81  for (const auto &URIScheme : URISchemeRegistry::entries()) {
82  if (URIScheme.getName() != Scheme)
83  continue;
84  return URIScheme.instantiate();
85  }
86  return error("Can't find scheme: {0}", Scheme);
87 }
88 
89 bool shouldEscape(unsigned char C) {
90  // Unreserved characters.
91  if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
92  (C >= '0' && C <= '9'))
93  return false;
94  switch (C) {
95  case '-':
96  case '_':
97  case '.':
98  case '~':
99  case '/': // '/' is only reserved when parsing.
100  // ':' is only reserved for relative URI paths, which clangd doesn't produce.
101  case ':':
102  return false;
103  }
104  return true;
105 }
106 
107 /// Encodes a string according to percent-encoding.
108 /// - Unreserved characters are not escaped.
109 /// - Reserved characters always escaped with exceptions like '/'.
110 /// - All other characters are escaped.
111 void percentEncode(llvm::StringRef Content, std::string &Out) {
112  for (unsigned char C : Content)
113  if (shouldEscape(C)) {
114  Out.push_back('%');
115  Out.push_back(llvm::hexdigit(C / 16));
116  Out.push_back(llvm::hexdigit(C % 16));
117  } else {
118  Out.push_back(C);
119  }
120 }
121 
122 /// Decodes a string according to percent-encoding.
123 std::string percentDecode(llvm::StringRef Content) {
124  std::string Result;
125  for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
126  if (*I != '%') {
127  Result += *I;
128  continue;
129  }
130  if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
131  llvm::isHexDigit(*(I + 2))) {
132  Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
133  I += 2;
134  } else
135  Result.push_back(*I);
136  }
137  return Result;
138 }
139 
140 bool isValidScheme(llvm::StringRef Scheme) {
141  if (Scheme.empty())
142  return false;
143  if (!llvm::isAlpha(Scheme[0]))
144  return false;
145  return llvm::all_of(llvm::drop_begin(Scheme), [](char C) {
146  return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
147  });
148 }
149 
150 } // namespace
151 
152 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
153  llvm::StringRef Body)
154  : Scheme(Scheme), Authority(Authority), Body(Body) {
155  assert(!Scheme.empty());
156  assert((Authority.empty() || Body.startswith("/")) &&
157  "URI body must start with '/' when authority is present.");
158 }
159 
160 std::string URI::toString() const {
161  std::string Result;
162  percentEncode(Scheme, Result);
163  Result.push_back(':');
164  if (Authority.empty() && Body.empty())
165  return Result;
166  // If authority if empty, we only print body if it starts with "/"; otherwise,
167  // the URI is invalid.
168  if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
169  {
170  Result.append("//");
171  percentEncode(Authority, Result);
172  }
173  percentEncode(Body, Result);
174  return Result;
175 }
176 
177 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
178  URI U;
179  llvm::StringRef Uri = OrigUri;
180 
181  auto Pos = Uri.find(':');
182  if (Pos == llvm::StringRef::npos)
183  return error("Scheme must be provided in URI: {0}", OrigUri);
184  auto SchemeStr = Uri.substr(0, Pos);
185  U.Scheme = percentDecode(SchemeStr);
186  if (!isValidScheme(U.Scheme))
187  return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme);
188  Uri = Uri.substr(Pos + 1);
189  if (Uri.consume_front("//")) {
190  Pos = Uri.find('/');
191  U.Authority = percentDecode(Uri.substr(0, Pos));
192  Uri = Uri.substr(Pos);
193  }
194  U.Body = percentDecode(Uri);
195  return U;
196 }
197 
198 llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
199  llvm::StringRef HintPath) {
200  auto Uri = URI::parse(FileURI);
201  if (!Uri)
202  return Uri.takeError();
203  auto Path = URI::resolve(*Uri, HintPath);
204  if (!Path)
205  return Path.takeError();
206  return *Path;
207 }
208 
209 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
210  llvm::StringRef Scheme) {
211  if (!llvm::sys::path::is_absolute(AbsolutePath))
212  return error("Not a valid absolute path: {0}", AbsolutePath);
213  auto S = findSchemeByName(Scheme);
214  if (!S)
215  return S.takeError();
216  return S->get()->uriFromAbsolutePath(AbsolutePath);
217 }
218 
219 URI URI::create(llvm::StringRef AbsolutePath) {
220  if (!llvm::sys::path::is_absolute(AbsolutePath))
221  llvm_unreachable(
222  ("Not a valid absolute path: " + AbsolutePath).str().c_str());
223  for (auto &Entry : URISchemeRegistry::entries()) {
224  auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
225  // For some paths, conversion to different URI schemes is impossible. These
226  // should be just skipped.
227  if (!URI) {
228  // Ignore the error.
229  llvm::consumeError(URI.takeError());
230  continue;
231  }
232  return std::move(*URI);
233  }
234  // Fallback to file: scheme which should work for any paths.
235  return URI::createFile(AbsolutePath);
236 }
237 
238 URI URI::createFile(llvm::StringRef AbsolutePath) {
239  auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
240  if (!U)
241  llvm_unreachable(llvm::toString(U.takeError()).c_str());
242  return std::move(*U);
243 }
244 
245 llvm::Expected<std::string> URI::resolve(const URI &Uri,
246  llvm::StringRef HintPath) {
247  auto S = findSchemeByName(Uri.Scheme);
248  if (!S)
249  return S.takeError();
250  return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
251 }
252 
253 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
254  llvm::StringRef HintPath) {
255  if (!llvm::sys::path::is_absolute(AbsPath))
256  llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
257  for (auto &Entry : URISchemeRegistry::entries()) {
258  auto S = Entry.instantiate();
259  auto U = S->uriFromAbsolutePath(AbsPath);
260  // For some paths, conversion to different URI schemes is impossible. These
261  // should be just skipped.
262  if (!U) {
263  // Ignore the error.
264  llvm::consumeError(U.takeError());
265  continue;
266  }
267  return S->getAbsolutePath(U->Authority, U->Body, HintPath);
268  }
269  // Fallback to file: scheme which doesn't do any canonicalization.
270  return std::string(AbsPath);
271 }
272 
273 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
274  auto S = findSchemeByName(Uri.Scheme);
275  if (!S)
276  return S.takeError();
277  return S->get()->getIncludeSpelling(Uri);
278 }
279 
280 } // namespace clangd
281 } // namespace clang
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::Path
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
clang::clangd::error
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:79
Root
ASTNode Root
Definition: DumpAST.cpp:332
clang::clangd::URI::create
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:209
clang::clangd::URI::resolvePath
static llvm::Expected< std::string > resolvePath(llvm::StringRef AbsPath, llvm::StringRef HintPath="")
Resolves AbsPath into a canonical path of its URI, by converting AbsPath to URI and resolving the URI...
Definition: URI.cpp:253
clang::clangd::URISchemeRegistry
llvm::Registry< URIScheme > URISchemeRegistry
By default, a "file" scheme is supported where URI paths are always absolute in the file system.
Definition: URI.h:131
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:177
Pos
size_t Pos
Definition: NoLintDirectiveHandler.cpp:97
clang::clangd::URI::resolve
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition: URI.cpp:245
Logger.h
clang::clangd::URI::includeSpelling
static llvm::Expected< std::string > includeSpelling(const URI &U)
Gets the preferred spelling of this file for #include, if there is one, e.g.
Definition: URI.cpp:273
Entry
Definition: Modularize.cpp:427
clang::clangd::URI::toString
std::string toString() const
Returns a string URI with all components percent-encoded.
Definition: URI.cpp:160
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:55
URI.h
clang::clangd::URI::createFile
static URI createFile(llvm::StringRef AbsolutePath)
This creates a file:// URI for AbsolutePath. The path must be absolute.
Definition: URI.cpp:238
Out
CompiledFragmentImpl & Out
Definition: ConfigCompile.cpp:99
clang::clangd::URI
A URI describes the location of a source file.
Definition: URI.h:28