clang-tools 20.0.0git
ModulesBuilder.cpp
Go to the documentation of this file.
1//===----------------- ModulesBuilder.cpp ------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ModulesBuilder.h"
10#include "Compiler.h"
11#include "support/Logger.h"
12#include "clang/Frontend/FrontendAction.h"
13#include "clang/Frontend/FrontendActions.h"
14#include "clang/Serialization/ASTReader.h"
15#include "clang/Serialization/InMemoryModuleCache.h"
16#include "llvm/ADT/ScopeExit.h"
17#include <queue>
18
19namespace clang {
20namespace clangd {
21
22namespace {
23
24// Create a path to store module files. Generally it should be:
25//
26// {TEMP_DIRS}/clangd/module_files/{hashed-file-name}-%%-%%-%%-%%-%%-%%/.
27//
28// {TEMP_DIRS} is the temporary directory for the system, e.g., "/var/tmp"
29// or "C:/TEMP".
30//
31// '%%' means random value to make the generated path unique.
32//
33// \param MainFile is used to get the root of the project from global
34// compilation database.
35//
36// TODO: Move these module fils out of the temporary directory if the module
37// files are persistent.
38llvm::SmallString<256> getUniqueModuleFilesPath(PathRef MainFile) {
39 llvm::SmallString<128> HashedPrefix = llvm::sys::path::filename(MainFile);
40 // There might be multiple files with the same name in a project. So appending
41 // the hash value of the full path to make sure they won't conflict.
42 HashedPrefix += std::to_string(llvm::hash_value(MainFile));
43
44 llvm::SmallString<256> ResultPattern;
45
46 llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/true,
47 ResultPattern);
48
49 llvm::sys::path::append(ResultPattern, "clangd");
50 llvm::sys::path::append(ResultPattern, "module_files");
51
52 llvm::sys::path::append(ResultPattern, HashedPrefix);
53
54 ResultPattern.append("-%%-%%-%%-%%-%%-%%");
55
56 llvm::SmallString<256> Result;
57 llvm::sys::fs::createUniquePath(ResultPattern, Result,
58 /*MakeAbsolute=*/false);
59
60 llvm::sys::fs::create_directories(Result);
61 return Result;
62}
63
64// Get a unique module file path under \param ModuleFilesPrefix.
65std::string getModuleFilePath(llvm::StringRef ModuleName,
66 PathRef ModuleFilesPrefix) {
67 llvm::SmallString<256> ModuleFilePath(ModuleFilesPrefix);
68 auto [PrimaryModuleName, PartitionName] = ModuleName.split(':');
69 llvm::sys::path::append(ModuleFilePath, PrimaryModuleName);
70 if (!PartitionName.empty()) {
71 ModuleFilePath.append("-");
72 ModuleFilePath.append(PartitionName);
73 }
74
75 ModuleFilePath.append(".pcm");
76 return std::string(ModuleFilePath);
77}
78
79// FailedPrerequisiteModules - stands for the PrerequisiteModules which has
80// errors happened during the building process.
81class FailedPrerequisiteModules : public PrerequisiteModules {
82public:
83 ~FailedPrerequisiteModules() override = default;
84
85 // We shouldn't adjust the compilation commands based on
86 // FailedPrerequisiteModules.
87 void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
88 }
89
90 // FailedPrerequisiteModules can never be reused.
91 bool
92 canReuse(const CompilerInvocation &CI,
93 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override {
94 return false;
95 }
96};
97
98struct ModuleFile {
99 ModuleFile(StringRef ModuleName, PathRef ModuleFilePath)
100 : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {}
101
102 ModuleFile() = delete;
103
104 ModuleFile(const ModuleFile &) = delete;
105 ModuleFile operator=(const ModuleFile &) = delete;
106
107 // The move constructor is needed for llvm::SmallVector.
108 ModuleFile(ModuleFile &&Other)
109 : ModuleName(std::move(Other.ModuleName)),
110 ModuleFilePath(std::move(Other.ModuleFilePath)) {
111 Other.ModuleName.clear();
112 Other.ModuleFilePath.clear();
113 }
114
115 ModuleFile &operator=(ModuleFile &&Other) {
116 if (this == &Other)
117 return *this;
118
119 this->~ModuleFile();
120 new (this) ModuleFile(std::move(Other));
121 return *this;
122 }
123
124 ~ModuleFile() {
125 if (!ModuleFilePath.empty())
126 llvm::sys::fs::remove(ModuleFilePath);
127 }
128
129 StringRef getModuleName() const { return ModuleName; }
130
131 StringRef getModuleFilePath() const { return ModuleFilePath; }
132
133private:
134 std::string ModuleName;
135 std::string ModuleFilePath;
136};
137
138// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
139// the required modules are built successfully. All the module files
140// are owned by the modules builder.
141class ReusablePrerequisiteModules : public PrerequisiteModules {
142public:
143 ReusablePrerequisiteModules() = default;
144
145 ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
146 default;
147 ReusablePrerequisiteModules &
148 operator=(const ReusablePrerequisiteModules &) = default;
149 ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
150 ReusablePrerequisiteModules
151 operator=(ReusablePrerequisiteModules &&) = delete;
152
153 ~ReusablePrerequisiteModules() override = default;
154
155 void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
156 // Appending all built module files.
157 for (const auto &RequiredModule : RequiredModules)
158 Options.PrebuiltModuleFiles.insert_or_assign(
159 RequiredModule->getModuleName().str(),
160 RequiredModule->getModuleFilePath().str());
161 }
162
163 bool canReuse(const CompilerInvocation &CI,
164 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
165
166 bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
167 return BuiltModuleNames.contains(ModuleName);
168 }
169
170 void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) {
171 BuiltModuleNames.insert(ModuleFile->getModuleName());
172 RequiredModules.emplace_back(std::move(ModuleFile));
173 }
174
175private:
176 llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
177 // A helper class to speedup the query if a module is built.
178 llvm::StringSet<> BuiltModuleNames;
179};
180
181bool IsModuleFileUpToDate(PathRef ModuleFilePath,
182 const PrerequisiteModules &RequisiteModules,
183 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
184 auto HSOpts = std::make_shared<HeaderSearchOptions>();
185 RequisiteModules.adjustHeaderSearchOptions(*HSOpts);
186 HSOpts->ForceCheckCXX20ModulesInputFiles = true;
187 HSOpts->ValidateASTInputFilesContent = true;
188
190 IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
191 CompilerInstance::createDiagnostics(*VFS, new DiagnosticOptions,
193 /*ShouldOwnClient=*/false);
194
195 LangOptions LangOpts;
196 LangOpts.SkipODRCheckInGMF = true;
197
198 FileManager FileMgr(FileSystemOptions(), VFS);
199
200 SourceManager SourceMgr(*Diags, FileMgr);
201
202 HeaderSearch HeaderInfo(std::move(HSOpts), SourceMgr, *Diags, LangOpts,
203 /*Target=*/nullptr);
204
205 TrivialModuleLoader ModuleLoader;
206 Preprocessor PP(std::make_shared<PreprocessorOptions>(), *Diags, LangOpts,
207 SourceMgr, HeaderInfo, ModuleLoader);
208
209 IntrusiveRefCntPtr<InMemoryModuleCache> ModuleCache = new InMemoryModuleCache;
210 PCHContainerOperations PCHOperations;
211 ASTReader Reader(PP, *ModuleCache, /*ASTContext=*/nullptr,
212 PCHOperations.getRawReader(), {});
213
214 // We don't need any listener here. By default it will use a validator
215 // listener.
216 Reader.setListener(nullptr);
217
218 if (Reader.ReadAST(ModuleFilePath, serialization::MK_MainFile,
219 SourceLocation(),
220 ASTReader::ARR_None) != ASTReader::Success)
221 return false;
222
223 bool UpToDate = true;
224 Reader.getModuleManager().visit([&](serialization::ModuleFile &MF) -> bool {
225 Reader.visitInputFiles(
226 MF, /*IncludeSystem=*/false, /*Complain=*/false,
227 [&](const serialization::InputFile &IF, bool isSystem) {
228 if (!IF.getFile() || IF.isOutOfDate())
229 UpToDate = false;
230 });
231 return !UpToDate;
232 });
233 return UpToDate;
234}
235
236bool IsModuleFilesUpToDate(
237 llvm::SmallVector<PathRef> ModuleFilePaths,
238 const PrerequisiteModules &RequisiteModules,
239 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
240 return llvm::all_of(
241 ModuleFilePaths, [&RequisiteModules, VFS](auto ModuleFilePath) {
242 return IsModuleFileUpToDate(ModuleFilePath, RequisiteModules, VFS);
243 });
244}
245
246/// Build a module file for module with `ModuleName`. The information of built
247/// module file are stored in \param BuiltModuleFiles.
248llvm::Expected<ModuleFile>
249buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
250 const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
251 const ReusablePrerequisiteModules &BuiltModuleFiles) {
252 // Try cheap operation earlier to boil-out cheaply if there are problems.
253 auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
254 if (!Cmd)
255 return llvm::createStringError(
256 llvm::formatv("No compile command for {0}", ModuleUnitFileName));
257
258 llvm::SmallString<256> ModuleFilesPrefix =
259 getUniqueModuleFilesPath(ModuleUnitFileName);
260
261 Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
262
263 ParseInputs Inputs;
264 Inputs.TFS = &TFS;
265 Inputs.CompileCommand = std::move(*Cmd);
266
267 IgnoreDiagnostics IgnoreDiags;
268 auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
269 if (!CI)
270 return llvm::createStringError("Failed to build compiler invocation");
271
272 auto FS = Inputs.TFS->view(Inputs.CompileCommand.Directory);
273 auto Buf = FS->getBufferForFile(Inputs.CompileCommand.Filename);
274 if (!Buf)
275 return llvm::createStringError("Failed to create buffer");
276
277 // In clang's driver, we will suppress the check for ODR violation in GMF.
278 // See the implementation of RenderModulesOptions in Clang.cpp.
279 CI->getLangOpts().SkipODRCheckInGMF = true;
280
281 // Hash the contents of input files and store the hash value to the BMI files.
282 // So that we can check if the files are still valid when we want to reuse the
283 // BMI files.
284 CI->getHeaderSearchOpts().ValidateASTInputFilesContent = true;
285
286 BuiltModuleFiles.adjustHeaderSearchOptions(CI->getHeaderSearchOpts());
287
288 CI->getFrontendOpts().OutputFile = Inputs.CompileCommand.Output;
289 auto Clang =
290 prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
291 std::move(*Buf), std::move(FS), IgnoreDiags);
292 if (!Clang)
293 return llvm::createStringError("Failed to prepare compiler instance");
294
295 GenerateReducedModuleInterfaceAction Action;
296 Clang->ExecuteAction(Action);
297
298 if (Clang->getDiagnostics().hasErrorOccurred())
299 return llvm::createStringError("Compilation failed");
300
301 return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
302}
303
304bool ReusablePrerequisiteModules::canReuse(
305 const CompilerInvocation &CI,
306 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
307 if (RequiredModules.empty())
308 return true;
309
310 llvm::SmallVector<llvm::StringRef> BMIPaths;
311 for (auto &MF : RequiredModules)
312 BMIPaths.push_back(MF->getModuleFilePath());
313 return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
314}
315
316class ModuleFileCache {
317public:
318 ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
319 const GlobalCompilationDatabase &getCDB() const { return CDB; }
320
321 std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName);
322
323 void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
324 std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
325
326 ModuleFiles[ModuleName] = ModuleFile;
327 }
328
329 void remove(StringRef ModuleName);
330
331private:
332 const GlobalCompilationDatabase &CDB;
333
334 llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
335 // Mutex to guard accesses to ModuleFiles.
336 std::mutex ModuleFilesMutex;
337};
338
339std::shared_ptr<const ModuleFile>
340ModuleFileCache::getModule(StringRef ModuleName) {
341 std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
342
343 auto Iter = ModuleFiles.find(ModuleName);
344 if (Iter == ModuleFiles.end())
345 return nullptr;
346
347 if (auto Res = Iter->second.lock())
348 return Res;
349
350 ModuleFiles.erase(Iter);
351 return nullptr;
352}
353
354void ModuleFileCache::remove(StringRef ModuleName) {
355 std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
356
357 ModuleFiles.erase(ModuleName);
358}
359
360/// Collect the directly and indirectly required module names for \param
361/// ModuleName in topological order. The \param ModuleName is guaranteed to
362/// be the last element in \param ModuleNames.
363llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
364 StringRef ModuleName) {
365 llvm::SmallVector<llvm::StringRef> ModuleNames;
366 llvm::StringSet<> ModuleNamesSet;
367
368 auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
369 ModuleNamesSet.insert(ModuleName);
370
371 for (StringRef RequiredModuleName :
372 MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
373 if (ModuleNamesSet.insert(RequiredModuleName).second)
374 Visitor(RequiredModuleName, Visitor);
375
376 ModuleNames.push_back(ModuleName);
377 };
378 VisitDeps(ModuleName, VisitDeps);
379
380 return ModuleNames;
381}
382
383} // namespace
384
386public:
388
389 const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
390
391 llvm::Error
392 getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
393 ProjectModules &MDB,
394 ReusablePrerequisiteModules &BuiltModuleFiles);
395
396private:
397 ModuleFileCache Cache;
398};
399
401 StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
402 ReusablePrerequisiteModules &BuiltModuleFiles) {
403 if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
404 return llvm::Error::success();
405
406 PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
407 /// It is possible that we're meeting third party modules (modules whose
408 /// source are not in the project. e.g, the std module may be a third-party
409 /// module for most project) or something wrong with the implementation of
410 /// ProjectModules.
411 /// FIXME: How should we treat third party modules here? If we want to ignore
412 /// third party modules, we should return true instead of false here.
413 /// Currently we simply bail out.
414 if (ModuleUnitFileName.empty())
415 return llvm::createStringError(
416 llvm::formatv("Don't get the module unit for module {0}", ModuleName));
417
418 // Get Required modules in topological order.
419 auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
420 for (llvm::StringRef ReqModuleName : ReqModuleNames) {
421 if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
422 continue;
423
424 if (auto Cached = Cache.getModule(ReqModuleName)) {
425 if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
426 TFS.view(std::nullopt))) {
427 log("Reusing module {0} from {1}", ModuleName,
428 Cached->getModuleFilePath());
429 BuiltModuleFiles.addModuleFile(std::move(Cached));
430 continue;
431 }
432 Cache.remove(ReqModuleName);
433 }
434
435 llvm::Expected<ModuleFile> MF = buildModuleFile(
436 ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
437 if (llvm::Error Err = MF.takeError())
438 return Err;
439
440 log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
441 auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
442 Cache.add(ModuleName, BuiltModuleFile);
443 BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
444 }
445
446 return llvm::Error::success();
447}
448
449std::unique_ptr<PrerequisiteModules>
451 const ThreadsafeFS &TFS) {
452 std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
453 if (!MDB) {
454 elog("Failed to get Project Modules information for {0}", File);
455 return std::make_unique<FailedPrerequisiteModules>();
456 }
457
458 std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
459 if (RequiredModuleNames.empty())
460 return std::make_unique<ReusablePrerequisiteModules>();
461
462 auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
463 for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
464 // Return early if there is any error.
465 if (llvm::Error Err = Impl->getOrBuildModuleFile(
466 RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
467 elog("Failed to build module {0}; due to {1}", RequiredModuleName,
468 toString(std::move(Err)));
469 return std::make_unique<FailedPrerequisiteModules>();
470 }
471 }
472
473 return std::move(RequiredModules);
474}
475
477 Impl = std::make_unique<ModulesBuilderImpl>(CDB);
478}
479
481
482} // namespace clangd
483} // namespace clang
IgnoringDiagConsumer IgnoreDiags
std::unique_ptr< CompilerInstance > Clang
std::string MainFile
FieldAction Action
std::unique_ptr< CompilerInvocation > CI
Provides compilation arguments used for parsing C and C++ files.
virtual std::optional< tooling::CompileCommand > getCompileCommand(PathRef File) const =0
If there are any known-good commands for building this file, returns one.
const GlobalCompilationDatabase & getCDB() const
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB)
llvm::Error getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles)
std::unique_ptr< PrerequisiteModules > buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS)
ModulesBuilder(const GlobalCompilationDatabase &CDB)
An interface to query the modules information in the project.
virtual PathRef getSourceForModuleName(llvm::StringRef ModuleName, PathRef RequiredSrcFile=PathRef())=0
Wrapper for vfs::FileSystem for use in multithreaded programs like clangd.
Definition: ThreadsafeFS.h:26
llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > view(std::nullopt_t CWD) const
Obtain a vfs::FileSystem with an arbitrary initial working directory.
Definition: ThreadsafeFS.h:32
std::unique_ptr< CompilerInvocation > buildCompilerInvocation(const ParseInputs &Inputs, clang::DiagnosticConsumer &D, std::vector< std::string > *CC1Args)
Builds compiler invocation that could be used to build AST or preamble.
Definition: Compiler.cpp:95
static const char * toString(OffsetEncoding OE)
Definition: Protocol.cpp:1595
std::unique_ptr< CompilerInstance > prepareCompilerInstance(std::unique_ptr< clang::CompilerInvocation > CI, const PrecompiledPreamble *Preamble, std::unique_ptr< llvm::MemoryBuffer > Buffer, llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > VFS, DiagnosticConsumer &DiagsClient)
Definition: Compiler.cpp:129
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:29
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
tooling::CompileCommand CompileCommand
Definition: Compiler.h:50
const ThreadsafeFS * TFS
Definition: Compiler.h:51