clang 20.0.0git
ModuleDepCollector.h
Go to the documentation of this file.
1//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/Module.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Hashing.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26#include <string>
27#include <unordered_map>
28#include <variant>
29
30namespace clang {
31namespace tooling {
32namespace dependencies {
33
34class DependencyActionController;
35class DependencyConsumer;
36
37/// Modular dependency that has already been built prior to the dependency scan.
39 std::string ModuleName;
40 std::string PCMFile;
41 std::string ModuleMapFile;
42
43 explicit PrebuiltModuleDep(const Module *M)
44 : ModuleName(M->getTopLevelModuleName()),
45 PCMFile(M->getASTFile()->getName()),
46 ModuleMapFile(M->PresumedModuleMapFile) {}
47};
48
49/// This is used to identify a specific module.
50struct ModuleID {
51 /// The name of the module. This may include `:` for C++20 module partitions,
52 /// or a header-name for C++20 header units.
53 std::string ModuleName;
54
55 /// The context hash of a module represents the compiler options that affect
56 /// the resulting command-line invocation.
57 ///
58 /// Modules with the same name and ContextHash but different invocations could
59 /// cause non-deterministic build results.
60 ///
61 /// Modules with the same name but a different \c ContextHash should be
62 /// treated as separate modules for the purpose of a build.
63 std::string ContextHash;
64
65 bool operator==(const ModuleID &Other) const {
66 return std::tie(ModuleName, ContextHash) ==
67 std::tie(Other.ModuleName, Other.ContextHash);
68 }
69
70 bool operator<(const ModuleID& Other) const {
71 return std::tie(ModuleName, ContextHash) <
72 std::tie(Other.ModuleName, Other.ContextHash);
73 }
74};
75
76/// P1689ModuleInfo - Represents the needed information of standard C++20
77/// modules for P1689 format.
79 /// The name of the module. This may include `:` for partitions.
80 std::string ModuleName;
81
82 /// Optional. The source path to the module.
83 std::string SourcePath;
84
85 /// If this module is a standard c++ interface unit.
87
88 enum class ModuleType {
90 // To be supported
91 // AngleHeaderUnit,
92 // QuoteHeaderUnit
93 };
95};
96
97/// An output from a module compilation, such as the path of the module file.
98enum class ModuleOutputKind {
99 /// The module file (.pcm). Required.
101 /// The path of the dependency file (.d), if any.
103 /// The null-separated list of names to use as the targets in the dependency
104 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
106 /// The path of the serialized diagnostic file (.dia), if any.
108};
109
111 /// The identifier of the module.
113
114 /// Whether this is a "system" module.
116
117 /// The path to the modulemap file which defines this module.
118 ///
119 /// This can be used to explicitly build this module. This file will
120 /// additionally appear in \c FileDeps as a dependency.
122
123 /// A collection of absolute paths to module map files that this module needs
124 /// to know about. The ordering is significant.
125 std::vector<std::string> ModuleMapFileDeps;
126
127 /// A collection of prebuilt modular dependencies this module directly depends
128 /// on, not including transitive dependencies.
129 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
130
131 /// A list of module identifiers this module directly depends on, not
132 /// including transitive dependencies.
133 ///
134 /// This may include modules with a different context hash when it can be
135 /// determined that the differences are benign for this compilation.
136 std::vector<ModuleID> ClangModuleDeps;
137
138 /// The set of libraries or frameworks to link against when
139 /// an entity from this module is used.
141
142 /// Invokes \c Cb for all file dependencies of this module. Each provided
143 /// \c StringRef is only valid within the individual callback invocation.
144 void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
145
146 /// Get (or compute) the compiler invocation that can be used to build this
147 /// module. Does not include argv[0].
148 const std::vector<std::string> &getBuildArguments();
149
150private:
151 friend class ModuleDepCollector;
153
154 /// The base directory for relative paths in \c FileDeps.
155 std::string FileDepsBaseDir;
156
157 /// A collection of paths to files that this module directly depends on, not
158 /// including transitive dependencies.
159 std::vector<std::string> FileDeps;
160
161 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
162 BuildInfo;
163};
164
165using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>;
166
168
169/// Callback that records textual includes and direct modular includes/imports
170/// during preprocessing. At the end of the main file, it also collects
171/// transitive modular dependencies and passes everything to the
172/// \c DependencyConsumer of the parent \c ModuleDepCollector.
173class ModuleDepCollectorPP final : public PPCallbacks {
174public:
176
179 SourceLocation Loc) override;
180 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
181 StringRef FileName, bool IsAngled,
182 CharSourceRange FilenameRange,
183 OptionalFileEntryRef File, StringRef SearchPath,
184 StringRef RelativePath, const Module *SuggestedModule,
185 bool ModuleImported,
188 const Module *Imported) override;
189
190 void EndOfMainFile() override;
191
192private:
193 /// The parent dependency collector.
195
196 void handleImport(const Module *Imported);
197
198 /// Adds direct modular dependencies that have already been built to the
199 /// ModuleDeps instance.
200 void
201 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
202 llvm::DenseSet<const Module *> &SeenSubmodules);
203 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
204 llvm::DenseSet<const Module *> &SeenSubmodules);
205
206 /// Traverses the previously collected direct modular dependencies to discover
207 /// transitive modular dependencies and fills the parent \c ModuleDepCollector
208 /// with both.
209 /// Returns the ID or nothing if the dependency is spurious and is ignored.
210 std::optional<ModuleID> handleTopLevelModule(const Module *M);
211 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
212 llvm::DenseSet<const Module *> &AddedModules);
213 void addModuleDep(const Module *M, ModuleDeps &MD,
214 llvm::DenseSet<const Module *> &AddedModules);
215
216 /// Traverses the affecting modules and updates \c MD with references to the
217 /// parent \c ModuleDepCollector info.
218 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
219 llvm::DenseSet<const Module *> &AddedModules);
220 void addAffectingClangModule(const Module *M, ModuleDeps &MD,
221 llvm::DenseSet<const Module *> &AddedModules);
222};
223
224/// Collects modular and non-modular dependencies of the main file by attaching
225/// \c ModuleDepCollectorPP to the preprocessor.
227public:
228 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
229 CompilerInstance &ScanInstance, DependencyConsumer &C,
230 DependencyActionController &Controller,
231 CompilerInvocation OriginalCI,
232 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
233 ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
234 bool IsStdModuleP1689Format);
235
236 void attachToPreprocessor(Preprocessor &PP) override;
237 void attachToASTReader(ASTReader &R) override;
238
239 /// Apply any changes implied by the discovered dependencies to the given
240 /// invocation, (e.g. disable implicit modules, add explicit module paths).
242
243private:
245
246 /// The compiler instance for scanning the current translation unit.
247 CompilerInstance &ScanInstance;
248 /// The consumer of collected dependency information.
249 DependencyConsumer &Consumer;
250 /// Callbacks for computing dependency information.
251 DependencyActionController &Controller;
252 /// Mapping from prebuilt AST files to their sorted list of VFS overlay files.
253 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap;
254 /// Path to the main source file.
255 std::string MainFile;
256 /// Hash identifying the compilation conditions of the current TU.
257 std::string ContextHash;
258 /// Non-modular file dependencies. This includes the main source file and
259 /// textually included header files.
260 std::vector<std::string> FileDeps;
261 /// Direct and transitive modular dependencies of the main source file.
262 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
263 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
264 /// a preprocessor. Storage owned by \c ModularDeps.
265 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
266 /// Direct modular dependencies that have already been built.
267 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
268 /// Working set of direct modular dependencies.
269 llvm::SetVector<const Module *> DirectModularDeps;
270 /// Options that control the dependency output generation.
271 std::unique_ptr<DependencyOutputOptions> Opts;
272 /// A Clang invocation that's based on the original TU invocation and that has
273 /// been partially transformed into one that can perform explicit build of
274 /// a discovered modular dependency. Note that this still needs to be adjusted
275 /// for each individual module.
276 CowCompilerInvocation CommonInvocation;
277 /// Whether to optimize the modules' command-line arguments.
278 ScanningOptimizations OptimizeArgs;
279 /// Whether to set up command-lines to load PCM files eagerly.
280 bool EagerLoadModules;
281 /// If we're generating dependency output in P1689 format
282 /// for standard C++ modules.
283 bool IsStdModuleP1689Format;
284
285 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
286 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
287
288 /// Checks whether the module is known as being prebuilt.
289 bool isPrebuiltModule(const Module *M);
290
291 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
292 void addFileDep(StringRef Path);
293 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
294 void addFileDep(ModuleDeps &MD, StringRef Path);
295
296 /// Get a Clang invocation adjusted to build the given modular dependency.
297 /// This excludes paths that are yet-to-be-provided by the build system.
298 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
299 const ModuleDeps &Deps,
300 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
301
302 /// Collect module map files for given modules.
303 llvm::DenseSet<const FileEntry *>
304 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
305
306 /// Add module map files to the invocation, if needed.
307 void addModuleMapFiles(CompilerInvocation &CI,
308 ArrayRef<ModuleID> ClangModuleDeps) const;
309 /// Add module files (pcm) to the invocation, if needed.
310 void addModuleFiles(CompilerInvocation &CI,
311 ArrayRef<ModuleID> ClangModuleDeps) const;
312 void addModuleFiles(CowCompilerInvocation &CI,
313 ArrayRef<ModuleID> ClangModuleDeps) const;
314
315 /// Add paths that require looking up outputs to the given dependencies.
316 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
317
318 /// Compute the context hash for \p Deps, and create the mapping
319 /// \c ModuleDepsByID[Deps.ID] = &Deps.
320 void associateWithContextHash(const CowCompilerInvocation &CI,
321 ModuleDeps &Deps);
322};
323
324/// Resets codegen options that don't affect modules/PCH.
326 const LangOptions &LangOpts,
327 CodeGenOptions &CGOpts);
328
329} // end namespace dependencies
330} // end namespace tooling
331} // end namespace clang
332
333namespace llvm {
335 return hash_combine(ID.ModuleName, ID.ContextHash);
336}
337
338template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
340 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
341 static inline ModuleID getTombstoneKey() {
342 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
343 }
344 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
345 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
346 return LHS == RHS;
347 }
348};
349} // namespace llvm
350
351#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
static char ID
Definition: Arena.cpp:183
IndirectLocalPath & Path
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
llvm::MachO::FileType FileType
Definition: MachO.h:46
Defines the clang::Module class, which describes a module in the source code.
Defines the PPCallbacks interface.
static std::string getName(const CallEvent &Call)
SourceLocation Loc
Definition: SemaObjC.cpp:759
Defines the SourceManager interface.
Reads an AST files chain containing the contents of a translation unit.
Definition: ASTReader.h:383
Represents a character-granular source range.
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
Helper class for holding the data necessary to invoke the compiler.
Same as CompilerInvocation, but with copy-on-write optimization.
An interface for collecting the dependencies of a compilation.
Definition: Utils.h:63
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Record the location of an inclusion directive, such as an #include or #import statement.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:499
Describes a module or submodule.
Definition: Module.h:115
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition: PPCallbacks.h:36
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:138
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1828
Dependency scanner callbacks that are used during scanning to influence the behaviour of the scan - f...
Callback that records textual includes and direct modular includes/imports during preprocessing.
void EndOfMainFile() override
Callback invoked when the end of the main file is reached.
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override
Callback invoked whenever there was an explicit module-import syntax.
Collects modular and non-modular dependencies of the main file by attaching ModuleDepCollectorPP to t...
void applyDiscoveredDependencies(CompilerInvocation &CI)
Apply any changes implied by the discovered dependencies to the given invocation, (e....
void attachToPreprocessor(Preprocessor &PP) override
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:81
ModuleOutputKind
An output from a module compilation, such as the path of the module file.
@ DiagnosticSerializationFile
The path of the serialized diagnostic file (.dia), if any.
@ DependencyFile
The path of the dependency file (.d), if any.
@ DependencyTargets
The null-separated list of names to use as the targets in the dependency file, if any.
@ ModuleFile
The module file (.pcm). Required.
void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, const LangOptions &LangOpts, CodeGenOptions &CGOpts)
Resets codegen options that don't affect modules/PCH.
llvm::StringMap< llvm::StringSet<> > PrebuiltModuleVFSMapT
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID)
std::string ClangModuleMapFile
The path to the modulemap file which defines this module.
std::vector< std::string > ModuleMapFileDeps
A collection of absolute paths to module map files that this module needs to know about.
llvm::SmallVector< Module::LinkLibrary, 2 > LinkLibraries
The set of libraries or frameworks to link against when an entity from this module is used.
void forEachFileDep(llvm::function_ref< void(StringRef)> Cb) const
Invokes Cb for all file dependencies of this module.
std::vector< PrebuiltModuleDep > PrebuiltModuleDeps
A collection of prebuilt modular dependencies this module directly depends on, not including transiti...
std::vector< ModuleID > ClangModuleDeps
A list of module identifiers this module directly depends on, not including transitive dependencies.
ModuleID ID
The identifier of the module.
const std::vector< std::string > & getBuildArguments()
Get (or compute) the compiler invocation that can be used to build this module.
bool IsSystem
Whether this is a "system" module.
This is used to identify a specific module.
std::string ContextHash
The context hash of a module represents the compiler options that affect the resulting command-line i...
std::string ModuleName
The name of the module.
bool operator==(const ModuleID &Other) const
bool operator<(const ModuleID &Other) const
P1689ModuleInfo - Represents the needed information of standard C++20 modules for P1689 format.
std::string SourcePath
Optional. The source path to the module.
std::string ModuleName
The name of the module. This may include : for partitions.
bool IsStdCXXModuleInterface
If this module is a standard c++ interface unit.
Modular dependency that has already been built prior to the dependency scan.
static bool isEqual(const ModuleID &LHS, const ModuleID &RHS)