clang-tools 20.0.0git
URI.cpp
Go to the documentation of this file.
1//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "URI.h"
10#include "support/Logger.h"
11#include "llvm/ADT/StringExtras.h"
12#include "llvm/ADT/Twine.h"
13#include "llvm/Support/Error.h"
14#include "llvm/Support/Path.h"
15#include <algorithm>
16
17LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
18
19namespace clang {
20namespace clangd {
21namespace {
22
23bool isWindowsPath(llvm::StringRef Path) {
24 return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':';
25}
26
27bool isNetworkPath(llvm::StringRef Path) {
28 return Path.size() > 2 && Path[0] == Path[1] &&
29 llvm::sys::path::is_separator(Path[0]);
30}
31
32/// This manages file paths in the file system. All paths in the scheme
33/// are absolute (with leading '/').
34/// Note that this scheme is hardcoded into the library and not registered in
35/// registry.
36class FileSystemScheme : public URIScheme {
37public:
38 llvm::Expected<std::string>
39 getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
40 llvm::StringRef /*HintPath*/) const override {
41 if (!Body.starts_with("/"))
42 return error("File scheme: expect body to be an absolute path starting "
43 "with '/': {0}",
44 Body);
45 llvm::SmallString<128> Path;
46 if (!Authority.empty()) {
47 // Windows UNC paths e.g. file://server/share => \\server\share
48 ("//" + Authority).toVector(Path);
49 } else if (isWindowsPath(Body.substr(1))) {
50 // Windows paths e.g. file:///X:/path => X:\path
51 Body.consume_front("/");
52 }
53 Path.append(Body);
54 llvm::sys::path::native(Path);
55 return std::string(Path);
56 }
57
58 llvm::Expected<URI>
59 uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
60 std::string Body;
61 llvm::StringRef Authority;
62 llvm::StringRef Root = llvm::sys::path::root_name(AbsolutePath);
63 if (isNetworkPath(Root)) {
64 // Windows UNC paths e.g. \\server\share => file://server/share
65 Authority = Root.drop_front(2);
66 AbsolutePath.consume_front(Root);
67 } else if (isWindowsPath(Root)) {
68 // Windows paths e.g. X:\path => file:///X:/path
69 Body = "/";
70 }
71 Body += llvm::sys::path::convert_to_slash(AbsolutePath);
72 return URI("file", Authority, Body);
73 }
74};
75
76llvm::Expected<std::unique_ptr<URIScheme>>
77findSchemeByName(llvm::StringRef Scheme) {
78 if (Scheme == "file")
79 return std::make_unique<FileSystemScheme>();
80
81 for (const auto &URIScheme : URISchemeRegistry::entries()) {
82 if (URIScheme.getName() != Scheme)
83 continue;
84 return URIScheme.instantiate();
85 }
86 return error("Can't find scheme: {0}", Scheme);
87}
88
89bool shouldEscape(unsigned char C) {
90 // Unreserved characters.
91 if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
92 (C >= '0' && C <= '9'))
93 return false;
94 switch (C) {
95 case '-':
96 case '_':
97 case '.':
98 case '~':
99 case '/': // '/' is only reserved when parsing.
100 // ':' is only reserved for relative URI paths, which clangd doesn't produce.
101 case ':':
102 return false;
103 }
104 return true;
105}
106
107/// Encodes a string according to percent-encoding.
108/// - Unreserved characters are not escaped.
109/// - Reserved characters always escaped with exceptions like '/'.
110/// - All other characters are escaped.
111void percentEncode(llvm::StringRef Content, std::string &Out) {
112 for (unsigned char C : Content)
113 if (shouldEscape(C)) {
114 Out.push_back('%');
115 Out.push_back(llvm::hexdigit(C / 16));
116 Out.push_back(llvm::hexdigit(C % 16));
117 } else {
118 Out.push_back(C);
119 }
120}
121
122/// Decodes a string according to percent-encoding.
123std::string percentDecode(llvm::StringRef Content) {
124 std::string Result;
125 for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
126 if (*I != '%') {
127 Result += *I;
128 continue;
129 }
130 if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
131 llvm::isHexDigit(*(I + 2))) {
132 Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
133 I += 2;
134 } else
135 Result.push_back(*I);
136 }
137 return Result;
138}
139
140bool isValidScheme(llvm::StringRef Scheme) {
141 if (Scheme.empty())
142 return false;
143 if (!llvm::isAlpha(Scheme[0]))
144 return false;
145 return llvm::all_of(llvm::drop_begin(Scheme), [](char C) {
146 return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
147 });
148}
149
150} // namespace
151
152URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
153 llvm::StringRef Body)
154 : Scheme(Scheme), Authority(Authority), Body(Body) {
155 assert(!Scheme.empty());
156 assert((Authority.empty() || Body.starts_with("/")) &&
157 "URI body must start with '/' when authority is present.");
158}
159
160std::string URI::toString() const {
161 std::string Result;
162 percentEncode(Scheme, Result);
163 Result.push_back(':');
164 if (Authority.empty() && Body.empty())
165 return Result;
166 // If authority if empty, we only print body if it starts with "/"; otherwise,
167 // the URI is invalid.
168 if (!Authority.empty() || llvm::StringRef(Body).starts_with("/")) {
169 Result.append("//");
170 percentEncode(Authority, Result);
171 }
172 percentEncode(Body, Result);
173 return Result;
174}
175
176llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
177 URI U;
178 llvm::StringRef Uri = OrigUri;
179
180 auto Pos = Uri.find(':');
181 if (Pos == llvm::StringRef::npos)
182 return error("Scheme must be provided in URI: {0}", OrigUri);
183 auto SchemeStr = Uri.substr(0, Pos);
184 U.Scheme = percentDecode(SchemeStr);
185 if (!isValidScheme(U.Scheme))
186 return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme);
187 Uri = Uri.substr(Pos + 1);
188 if (Uri.consume_front("//")) {
189 Pos = Uri.find('/');
190 U.Authority = percentDecode(Uri.substr(0, Pos));
191 Uri = Uri.substr(Pos);
192 }
193 U.Body = percentDecode(Uri);
194 return U;
195}
196
197llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
198 llvm::StringRef HintPath) {
199 auto Uri = URI::parse(FileURI);
200 if (!Uri)
201 return Uri.takeError();
202 auto Path = URI::resolve(*Uri, HintPath);
203 if (!Path)
204 return Path.takeError();
205 return *Path;
206}
207
208llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
209 llvm::StringRef Scheme) {
210 if (!llvm::sys::path::is_absolute(AbsolutePath))
211 return error("Not a valid absolute path: {0}", AbsolutePath);
212 auto S = findSchemeByName(Scheme);
213 if (!S)
214 return S.takeError();
215 return S->get()->uriFromAbsolutePath(AbsolutePath);
216}
217
218URI URI::create(llvm::StringRef AbsolutePath) {
219 if (!llvm::sys::path::is_absolute(AbsolutePath))
220 llvm_unreachable(
221 ("Not a valid absolute path: " + AbsolutePath).str().c_str());
222 for (auto &Entry : URISchemeRegistry::entries()) {
223 auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
224 // For some paths, conversion to different URI schemes is impossible. These
225 // should be just skipped.
226 if (!URI) {
227 // Ignore the error.
228 llvm::consumeError(URI.takeError());
229 continue;
230 }
231 return std::move(*URI);
232 }
233 // Fallback to file: scheme which should work for any paths.
234 return URI::createFile(AbsolutePath);
235}
236
237URI URI::createFile(llvm::StringRef AbsolutePath) {
238 auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
239 if (!U)
240 llvm_unreachable(llvm::toString(U.takeError()).c_str());
241 return std::move(*U);
242}
243
244llvm::Expected<std::string> URI::resolve(const URI &Uri,
245 llvm::StringRef HintPath) {
246 auto S = findSchemeByName(Uri.Scheme);
247 if (!S)
248 return S.takeError();
249 return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
250}
251
252llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
253 llvm::StringRef HintPath) {
254 if (!llvm::sys::path::is_absolute(AbsPath))
255 llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
256 for (auto &Entry : URISchemeRegistry::entries()) {
257 auto S = Entry.instantiate();
258 auto U = S->uriFromAbsolutePath(AbsPath);
259 // For some paths, conversion to different URI schemes is impossible. These
260 // should be just skipped.
261 if (!U) {
262 // Ignore the error.
263 llvm::consumeError(U.takeError());
264 continue;
265 }
266 return S->getAbsolutePath(U->Authority, U->Body, HintPath);
267 }
268 // Fallback to file: scheme which doesn't do any canonicalization.
269 return std::string(AbsPath);
270}
271
272llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
273 auto S = findSchemeByName(Uri.Scheme);
274 if (!S)
275 return S.takeError();
276 return S->get()->getIncludeSpelling(Uri);
277}
278
279} // namespace clangd
280} // namespace clang
const Expr * E
CompiledFragmentImpl & Out
ASTNode Root
Definition: DumpAST.cpp:342
const Criteria C
size_t Pos
A URI describes the location of a source file.
Definition: URI.h:28
static llvm::Expected< std::string > includeSpelling(const URI &U)
Gets the preferred spelling of this file for #include, if there is one, e.g.
Definition: URI.cpp:272
static llvm::Expected< std::string > resolvePath(llvm::StringRef AbsPath, llvm::StringRef HintPath="")
Resolves AbsPath into a canonical path of its URI, by converting AbsPath to URI and resolving the URI...
Definition: URI.cpp:252
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:208
static URI createFile(llvm::StringRef AbsolutePath)
This creates a file:// URI for AbsolutePath. The path must be absolute.
Definition: URI.cpp:237
std::string toString() const
Returns a string URI with all components percent-encoded.
Definition: URI.cpp:160
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition: URI.cpp:244
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:176
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
llvm::Registry< URIScheme > URISchemeRegistry
By default, a "file" scheme is supported where URI paths are always absolute in the file system.
Definition: URI.h:131
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:79
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//