clang-tools 20.0.0git
StdLib.cpp
Go to the documentation of this file.
1//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "StdLib.h"
9#include <fstream>
10#include <memory>
11#include <optional>
12#include <string>
13#include <vector>
14
15#include "Compiler.h"
16#include "Config.h"
17#include "SymbolCollector.h"
18#include "index/IndexAction.h"
19#include "support/Logger.h"
21#include "support/Trace.h"
22#include "clang/Basic/LangOptions.h"
23#include "clang/Frontend/CompilerInvocation.h"
24#include "clang/Lex/PreprocessorOptions.h"
25#include "clang/Tooling/Inclusions/StandardLibrary.h"
26#include "llvm/ADT/IntrusiveRefCntPtr.h"
27#include "llvm/ADT/StringRef.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/Path.h"
30
31namespace clang {
32namespace clangd {
33namespace {
34
35enum Lang { C, CXX };
36
37Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
38llvm::StringLiteral mandatoryHeader(Lang L) {
39 switch (L) {
40 case C:
41 return "stdio.h";
42 case CXX:
43 return "vector";
44 }
45 llvm_unreachable("unhandled Lang");
46}
47
48LangStandard::Kind standardFromOpts(const LangOptions &LO) {
49 if (LO.CPlusPlus) {
50 if (LO.CPlusPlus23)
51 return LangStandard::lang_cxx23;
52 if (LO.CPlusPlus20)
53 return LangStandard::lang_cxx20;
54 if (LO.CPlusPlus17)
55 return LangStandard::lang_cxx17;
56 if (LO.CPlusPlus14)
57 return LangStandard::lang_cxx14;
58 if (LO.CPlusPlus11)
59 return LangStandard::lang_cxx11;
60 return LangStandard::lang_cxx98;
61 }
62 if (LO.C23)
63 return LangStandard::lang_c23;
64 // C17 has no new features, so treat {C11,C17} as C17.
65 if (LO.C11)
66 return LangStandard::lang_c17;
67 return LangStandard::lang_c99;
68}
69
70std::string buildUmbrella(llvm::StringLiteral Mandatory,
71 llvm::ArrayRef<tooling::stdlib::Header> Headers) {
72 std::string Result;
73 llvm::raw_string_ostream OS(Result);
74
75 // We __has_include guard all our #includes to avoid errors when using older
76 // stdlib version that don't have headers for the newest language standards.
77 // But make sure we get *some* error if things are totally broken.
78 OS << llvm::formatv(
79 "#if !__has_include(<{0}>)\n"
80 "#error Mandatory header <{0}> not found in standard library!\n"
81 "#endif\n",
82 Mandatory);
83
84 for (auto Header : Headers) {
85 OS << llvm::formatv("#if __has_include({0})\n"
86 "#include {0}\n"
87 "#endif\n",
88 Header);
89 }
90 return Result;
91}
92
93} // namespace
94
95llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
96 // The umbrella header is the same for all versions of each language.
97 // Headers that are unsupported in old lang versions are usually guarded by
98 // #if. Some headers may be not present in old stdlib versions, the umbrella
99 // header guards with __has_include for this purpose.
100 Lang L = langFromOpts(LO);
101 switch (L) {
102 case CXX:
103 static std::string *UmbrellaCXX = new std::string(buildUmbrella(
104 mandatoryHeader(L),
105 tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX)));
106 return *UmbrellaCXX;
107 case C:
108 static std::string *UmbrellaC = new std::string(
109 buildUmbrella(mandatoryHeader(L),
110 tooling::stdlib::Header::all(tooling::stdlib::Lang::C)));
111 return *UmbrellaC;
112 }
113 llvm_unreachable("invalid Lang in langFromOpts");
114}
115
116namespace {
117
118// Including the standard library leaks unwanted transitively included symbols.
119//
120// We want to drop these, they're a bit tricky to identify:
121// - we don't want to limit to symbols on our list, as our list has only
122// top-level symbols (and there may be legitimate stdlib extensions).
123// - we can't limit to only symbols defined in known stdlib headers, as stdlib
124// internal structure is murky
125// - we can't strictly require symbols to come from a particular path, e.g.
126// libstdc++ is mostly under /usr/include/c++/10/...
127// but std::ctype_base is under /usr/include/<platform>/c++/10/...
128// We require the symbol to come from a header that is *either* from
129// the standard library path (as identified by the location of <vector>), or
130// another header that defines a symbol from our stdlib list.
131SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
132 SymbolSlab::Builder Result;
133
134 static auto &StandardHeaders = *[] {
135 auto *Set = new llvm::DenseSet<llvm::StringRef>();
136 for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX))
137 Set->insert(Header.name());
138 for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::C))
139 Set->insert(Header.name());
140 return Set;
141 }();
142
143 // Form prefixes like file:///usr/include/c++/10/
144 // These can be trivially prefix-compared with URIs in the indexed symbols.
145 llvm::SmallVector<std::string> StdLibURIPrefixes;
146 for (const auto &Path : Loc.Paths) {
147 StdLibURIPrefixes.push_back(URI::create(Path).toString());
148 if (StdLibURIPrefixes.back().back() != '/')
149 StdLibURIPrefixes.back().push_back('/');
150 }
151 // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
152 // owner of a symbol whose insertable header is in StandardHeaders?
153 // Pointer key because strings in a SymbolSlab are interned.
154 llvm::DenseMap<const char *, bool> GoodHeader;
155 for (const Symbol &S : Slab) {
156 if (!S.IncludeHeaders.empty() &&
157 StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) {
158 GoodHeader[S.CanonicalDeclaration.FileURI] = true;
159 GoodHeader[S.Definition.FileURI] = true;
160 continue;
161 }
162 for (const char *URI :
163 {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
164 auto R = GoodHeader.try_emplace(URI, false);
165 if (R.second) {
166 R.first->second = llvm::any_of(
167 StdLibURIPrefixes,
168 [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
169 return URIStr.starts_with(Prefix);
170 });
171 }
172 }
173 }
174#ifndef NDEBUG
175 for (const auto &Good : GoodHeader)
176 if (Good.second && *Good.first)
177 dlog("Stdlib header: {0}", Good.first);
178#endif
179 // Empty URIs aren't considered good. (Definition can be blank).
180 auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); };
181
182 for (const Symbol &S : Slab) {
183 if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
184 IsGoodHeader(S.Definition.FileURI))) {
185 dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
186 S.CanonicalDeclaration.FileURI);
187 continue;
188 }
189 Result.insert(S);
190 }
191
192 return std::move(Result).build();
193}
194
195} // namespace
196
197SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
198 std::unique_ptr<CompilerInvocation> CI,
199 const StdLibLocation &Loc,
200 const ThreadsafeFS &TFS) {
201 if (CI->getFrontendOpts().Inputs.size() != 1 ||
202 !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
203 elog("Indexing standard library failed: bad CompilerInvocation");
204 assert(false && "indexing stdlib with a dubious CompilerInvocation!");
205 return SymbolSlab();
206 }
207 const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
208 trace::Span Tracer("StandardLibraryIndex");
209 LangStandard::Kind LangStd = standardFromOpts(CI->getLangOpts());
210 log("Indexing {0} standard library in the context of {1}",
211 LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile());
212
215 // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
216 CI->getPreprocessorOpts().clearRemappedFiles();
218 std::move(CI), /*Preamble=*/nullptr,
219 llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()),
220 TFS.view(/*CWD=*/std::nullopt), IgnoreDiags);
221 if (!Clang) {
222 elog("Standard Library Index: Couldn't build compiler instance");
223 return Symbols;
224 }
225
226 SymbolCollector::Options IndexOpts;
227 IndexOpts.Origin = SymbolOrigin::StdLib;
228 IndexOpts.CollectMainFileSymbols = false;
229 IndexOpts.CollectMainFileRefs = false;
230 IndexOpts.CollectMacro = true;
231 IndexOpts.StoreAllDocumentation = true;
232 // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
233 // Files from outside the StdLibLocation may define true std symbols anyway.
234 // We end up "blessing" such headers, and can only do that by indexing
235 // everything first.
236
237 // Refs, relations, include graph in the stdlib mostly aren't useful.
239 IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr,
240 nullptr, nullptr);
241
242 if (!Action->BeginSourceFile(*Clang, Input)) {
243 elog("Standard Library Index: BeginSourceFile() failed");
244 return Symbols;
245 }
246
247 if (llvm::Error Err = Action->Execute()) {
248 elog("Standard Library Index: Execute failed: {0}", std::move(Err));
249 return Symbols;
250 }
251
252 Action->EndSourceFile();
253
254 unsigned SymbolsBeforeFilter = Symbols.size();
255 Symbols = filter(std::move(Symbols), Loc);
256 bool Errors = Clang->hasDiagnostics() &&
257 Clang->getDiagnostics().hasUncompilableErrorOccurred();
258 log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
259 LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(),
260 SymbolsBeforeFilter - Symbols.size(),
261 Errors ? " (incomplete due to errors)" : "");
262 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
263 return Symbols;
264}
265
266SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
267 const StdLibLocation &Loc,
268 const ThreadsafeFS &TFS) {
269 llvm::StringRef Header = getStdlibUmbrellaHeader(Invocation->getLangOpts());
270 return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS);
271}
272
273bool StdLibSet::isBest(const LangOptions &LO) const {
274 return standardFromOpts(LO) >=
275 Best[langFromOpts(LO)].load(std::memory_order_acquire);
276}
277
278std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
279 const HeaderSearch &HS) {
280 Lang L = langFromOpts(LO);
281 int OldVersion = Best[L].load(std::memory_order_acquire);
282 int NewVersion = standardFromOpts(LO);
283 dlog("Index stdlib? {0}",
284 LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
285
286 if (!Config::current().Index.StandardLibrary) {
287 dlog("No: disabled in config");
288 return std::nullopt;
289 }
290
291 if (NewVersion <= OldVersion) {
292 dlog("No: have {0}, {1}>={2}",
293 LangStandard::getLangStandardForKind(
294 static_cast<LangStandard::Kind>(NewVersion))
295 .getName(),
296 OldVersion, NewVersion);
297 return std::nullopt;
298 }
299
300 // We'd like to index a standard library here if there is one.
301 // Check for the existence of <vector> on the search path.
302 // We could cache this, but we only get here repeatedly when there's no
303 // stdlib, and even then only once per preamble build.
304 llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
305 llvm::SmallString<256> Path; // Scratch space.
306 llvm::SmallVector<std::string> SearchPaths;
307 auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
308 llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath);
309 if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path))
310 SearchPaths.emplace_back(Path);
311 };
312 for (const auto &DL :
313 llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) {
314 switch (DL.getLookupType()) {
315 case DirectoryLookup::LT_NormalDir: {
316 Path = DL.getDirRef()->getName();
317 llvm::sys::path::append(Path, ProbeHeader);
318 llvm::vfs::Status Stat;
319 if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) &&
320 Stat.isRegularFile())
321 RecordHeaderPath(Path);
322 break;
323 }
324 case DirectoryLookup::LT_Framework:
325 // stdlib can't be a framework (framework includes must have a slash)
326 continue;
327 case DirectoryLookup::LT_HeaderMap:
328 llvm::StringRef Target =
329 DL.getHeaderMap()->lookupFilename(ProbeHeader, Path);
330 if (!Target.empty())
331 RecordHeaderPath(Target);
332 break;
333 }
334 }
335 if (SearchPaths.empty())
336 return std::nullopt;
337
338 dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
339
340 while (!Best[L].compare_exchange_weak(OldVersion, NewVersion,
341 std::memory_order_acq_rel))
342 if (OldVersion >= NewVersion) {
343 dlog("No: lost the race");
344 return std::nullopt; // Another thread won the race while we were
345 // checking.
346 }
347
348 dlog("Yes, index stdlib!");
349 return StdLibLocation{std::move(SearchPaths)};
350}
351
352} // namespace clangd
353} // namespace clang
int Errors
llvm::raw_ostream & OS
const Criteria C
IgnoringDiagConsumer IgnoreDiags
std::unique_ptr< CompilerInstance > Clang
SourceLocation Loc
#define dlog(...)
Definition: Logger.h:101
FieldAction Action
std::string Lang
std::unique_ptr< CompilerInvocation > CI
#define SPAN_ATTACH(S, Name, Expr)
Attach a key-value pair to a Span event.
Definition: Trace.h:164
std::optional< StdLibLocation > add(const LangOptions &, const HeaderSearch &)
Definition: StdLib.cpp:278
bool isBest(const LangOptions &) const
Definition: StdLib.cpp:273
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:201
Wrapper for vfs::FileSystem for use in multithreaded programs like clangd.
Definition: ThreadsafeFS.h:26
llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > view(std::nullopt_t CWD) const
Obtain a vfs::FileSystem with an arbitrary initial working directory.
Definition: ThreadsafeFS.h:32
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:208
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
std::unique_ptr< CompilerInstance > prepareCompilerInstance(std::unique_ptr< clang::CompilerInvocation > CI, const PrecompiledPreamble *Preamble, std::unique_ptr< llvm::MemoryBuffer > Buffer, llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > VFS, DiagnosticConsumer &DiagsClient)
Definition: Compiler.cpp:129
llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO)
Definition: StdLib.cpp:95
std::unique_ptr< FrontendAction > createStaticIndexingAction(SymbolCollector::Options Opts, std::function< void(SymbolSlab)> SymbolsCallback, std::function< void(RefSlab)> RefsCallback, std::function< void(RelationSlab)> RelationsCallback, std::function< void(IncludeGraph)> IncludeGraphCallback)
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61
SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources, std::unique_ptr< CompilerInvocation > CI, const StdLibLocation &Loc, const ThreadsafeFS &TFS)
Definition: StdLib.cpp:197
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static const Config & current()
Returns the Config of the current Context, or an empty configuration.
Definition: Config.cpp:17
bool CollectMainFileSymbols
Collect symbols local to main-files, such as static functions, symbols inside an anonymous namespace,...
bool StoreAllDocumentation
If set to true, SymbolCollector will collect doc for all symbols.
bool CollectMainFileRefs
Collect references to main-file symbols.