clang 23.0.0git
ModuleDepCollector.h
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10#define LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/Module.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Hashing.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26#include <string>
27#include <unordered_map>
28#include <variant>
29
30namespace clang {
31namespace dependencies {
32
36
37/// Modular dependency that has already been built prior to the dependency scan.
39 std::string ModuleName;
40 std::string PCMFile;
41 std::string ModuleMapFile;
42
43 explicit PrebuiltModuleDep(const Module *M)
44 : ModuleName(M->getTopLevelModuleName()),
45 PCMFile(M->getASTFile()->getName()),
46 ModuleMapFile(M->PresumedModuleMapFile) {}
47};
48
49/// Attributes loaded from AST files of prebuilt modules collected prior to
50/// ModuleDepCollector creation.
51using PrebuiltModulesAttrsMap = llvm::StringMap<PrebuiltModuleASTAttrs>;
53public:
54 /// When a module is discovered to not be in stable directories, traverse &
55 /// update all modules that depend on it.
56 void
58
59 /// Read-only access to whether the module is made up of dependencies in
60 /// stable directories.
61 bool isInStableDir() const { return IsInStableDirs; }
62
63 /// Read-only access to vfs map files.
64 const llvm::StringSet<> &getVFS() const { return VFSMap; }
65
66 /// Update the VFSMap to the one discovered from serializing the AST file.
67 void setVFS(llvm::StringSet<> &&VFS) { VFSMap = std::move(VFS); }
68
69 /// Add a direct dependent module file, so it can be updated if the current
70 /// module is from stable directores.
71 void addDependent(StringRef ModuleFile) {
72 ModuleFileDependents.insert(ModuleFile);
73 }
74
75 /// Update whether the prebuilt module resolves entirely in a stable
76 /// directories.
77 void setInStableDir(bool V = false) {
78 // Cannot reset attribute once it's false.
79 if (!IsInStableDirs)
80 return;
81 IsInStableDirs = V;
82 }
83
84private:
85 llvm::StringSet<> VFSMap;
86 bool IsInStableDirs = true;
87 std::set<StringRef> ModuleFileDependents;
88};
89
90/// This is used to identify a specific module.
91struct ModuleID {
92 /// The name of the module. This may include `:` for C++20 module partitions,
93 /// or a header-name for C++20 header units.
94 std::string ModuleName;
95
96 /// The context hash of a module represents the compiler options that affect
97 /// the resulting command-line invocation.
98 ///
99 /// Modules with the same name and ContextHash but different invocations could
100 /// cause non-deterministic build results.
101 ///
102 /// Modules with the same name but a different \c ContextHash should be
103 /// treated as separate modules for the purpose of a build.
104 std::string ContextHash;
105
106 bool operator==(const ModuleID &Other) const {
107 return std::tie(ModuleName, ContextHash) ==
108 std::tie(Other.ModuleName, Other.ContextHash);
109 }
110
111 bool operator<(const ModuleID &Other) const {
112 return std::tie(ModuleName, ContextHash) <
113 std::tie(Other.ModuleName, Other.ContextHash);
114 }
115};
116
117/// P1689ModuleInfo - Represents the needed information of standard C++20
118/// modules for P1689 format.
120 /// The name of the module. This may include `:` for partitions.
121 std::string ModuleName;
122
123 /// Optional. The source path to the module.
124 std::string SourcePath;
125
126 /// If this module is a standard c++ interface unit.
128
129 enum class ModuleType {
131 // To be supported
132 // AngleHeaderUnit,
133 // QuoteHeaderUnit
134 };
136};
137
138/// An output from a module compilation, such as the path of the module file.
140 /// The module file (.pcm). Required.
142 /// The path of the dependency file (.d), if any.
144 /// The null-separated list of names to use as the targets in the dependency
145 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
147 /// The path of the serialized diagnostic file (.dia), if any.
149};
150
152 /// The identifier of the module.
154
155 /// Whether this is a "system" module.
157
158 /// Whether this module is fully composed of file & module inputs from
159 /// locations likely to stay the same across the active development and build
160 /// cycle. For example, when all those input paths only resolve in Sysroot.
161 ///
162 /// External paths, as opposed to virtual file paths, are always used
163 /// for computing this value.
165
166 /// The path to the modulemap file which defines this module.
167 ///
168 /// This can be used to explicitly build this module. This file will
169 /// additionally appear in \c FileDeps as a dependency.
171
172 /// A collection of absolute paths to module map files that this module needs
173 /// to know about. The ordering is significant.
174 std::vector<std::string> ModuleMapFileDeps;
175
176 /// A collection of prebuilt modular dependencies this module directly depends
177 /// on, not including transitive dependencies.
178 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
179
180 /// A list of module identifiers this module directly depends on, not
181 /// including transitive dependencies.
182 ///
183 /// This may include modules with a different context hash when it can be
184 /// determined that the differences are benign for this compilation.
185 std::vector<ModuleID> ClangModuleDeps;
186
187 /// The set of libraries or frameworks to link against when
188 /// an entity from this module is used.
190
191 /// Invokes \c Cb for all file dependencies of this module. Each provided
192 /// \c StringRef is only valid within the individual callback invocation.
193 void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
194
195 /// Get (or compute) the compiler invocation that can be used to build this
196 /// module. Does not include argv[0].
197 const std::vector<std::string> &getBuildArguments() const;
198
199private:
200 friend class ModuleDepCollector;
202
203 /// The absolute directory path that is the base for relative paths
204 /// in \c FileDeps.
205 std::string FileDepsBaseDir;
206
207 /// A collection of paths to files that this module directly depends on, not
208 /// including transitive dependencies.
209 std::vector<std::string> FileDeps;
210
211 mutable std::variant<std::monostate, CowCompilerInvocation,
212 std::vector<std::string>>
213 BuildInfo;
214};
215
217
218/// Callback that records textual includes and direct modular includes/imports
219/// during preprocessing. At the end of the main file, it also collects
220/// transitive modular dependencies and passes everything to the
221/// \c DependencyConsumer of the parent \c ModuleDepCollector.
222class ModuleDepCollectorPP final : public PPCallbacks {
223public:
225
228 SourceLocation Loc) override;
229 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
230 StringRef FileName, bool IsAngled,
231 CharSourceRange FilenameRange,
232 OptionalFileEntryRef File, StringRef SearchPath,
233 StringRef RelativePath, const Module *SuggestedModule,
234 bool ModuleImported,
236 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
237 const Module *Imported) override;
238
239 void EndOfMainFile() override;
240
241private:
242 /// The parent dependency collector.
244
245 void handleImport(const Module *Imported);
246
247 /// Adds direct modular dependencies that have already been built to the
248 /// ModuleDeps instance.
249 void
250 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
251 llvm::DenseSet<const Module *> &SeenSubmodules);
252 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
253 llvm::DenseSet<const Module *> &SeenSubmodules);
254
255 /// Traverses the previously collected direct modular dependencies to discover
256 /// transitive modular dependencies and fills the parent \c ModuleDepCollector
257 /// with both.
258 /// Returns the ID or nothing if the dependency is spurious and is ignored.
259 std::optional<ModuleID> handleTopLevelModule(const Module *M);
260 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
261 llvm::DenseSet<const Module *> &AddedModules);
262 void addModuleDep(const Module *M, ModuleDeps &MD,
263 llvm::DenseSet<const Module *> &AddedModules);
264
265 /// Traverses the affecting modules and updates \c MD with references to the
266 /// parent \c ModuleDepCollector info.
267 void
268 addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
269 llvm::DenseSet<const Module *> &AddedModules);
270 void addAffectingClangModule(const Module *M, ModuleDeps &MD,
271 llvm::DenseSet<const Module *> &AddedModules);
272
273 /// Add discovered module dependency for the given module.
274 void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD);
275};
276
277/// Collects modular and non-modular dependencies of the main file by attaching
278/// \c ModuleDepCollectorPP to the preprocessor.
280public:
282 std::unique_ptr<DependencyOutputOptions> Opts,
283 CompilerInstance &ScanInstance, DependencyConsumer &C,
284 DependencyActionController &Controller,
285 CompilerInvocation OriginalCI,
286 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
287 const ArrayRef<StringRef> StableDirs);
288
289 void attachToPreprocessor(Preprocessor &PP) override;
290 void attachToASTReader(ASTReader &R) override;
291
292 PPCallbacks *getPPCallbacks() { return CollectorPPPtr; }
293
294 /// Apply any changes implied by the discovered dependencies to the given
295 /// invocation, (e.g. disable implicit modules, add explicit module paths).
297
298private:
300
301 /// The parent dependency scanning service.
303 /// The compiler instance for scanning the current translation unit.
304 CompilerInstance &ScanInstance;
305 /// The consumer of collected dependency information.
306 DependencyConsumer &Consumer;
307 /// Callbacks for computing dependency information.
308 DependencyActionController &Controller;
309 /// Mapping from prebuilt AST filepaths to their attributes referenced during
310 /// dependency collecting.
311 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap;
312 /// Directory paths known to be stable through an active development and build
313 /// cycle.
314 const ArrayRef<StringRef> StableDirs;
315 /// Path to the main source file.
316 std::string MainFile;
317 /// Non-modular file dependencies. This includes the main source file and
318 /// textually included header files.
319 std::vector<std::string> FileDeps;
320 /// Direct and transitive modular dependencies of the main source file.
321 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
322 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
323 /// a preprocessor. Storage owned by \c ModularDeps.
324 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
325 /// Direct modular dependencies that have already been built.
326 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
327 /// Working set of direct modular dependencies.
328 llvm::SetVector<const Module *> DirectModularDeps;
329 /// Working set of direct modular dependencies, as they were imported.
331 /// All direct and transitive visible modules.
332 llvm::StringSet<> VisibleModules;
333
334 /// Options that control the dependency output generation.
335 std::unique_ptr<DependencyOutputOptions> Opts;
336 /// A Clang invocation that's based on the original TU invocation and that has
337 /// been partially transformed into one that can perform explicit build of
338 /// a discovered modular dependency. Note that this still needs to be adjusted
339 /// for each individual module.
340 CowCompilerInvocation CommonInvocation;
341
342 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
343 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
344
345 /// A pointer to the preprocessor callback so we can invoke it directly
346 /// if needed. The callback is created and added to a Preprocessor instance by
347 /// attachToPreprocessor and the Preprocessor instance owns it.
348 ModuleDepCollectorPP *CollectorPPPtr = nullptr;
349
350 /// Checks whether the module is known as being prebuilt.
351 bool isPrebuiltModule(const Module *M);
352
353 /// Computes all visible modules resolved from direct imports.
354 void addVisibleModules();
355
356 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
357 void addFileDep(StringRef Path);
358 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
359 void addFileDep(ModuleDeps &MD, StringRef Path);
360
361 /// Get a Clang invocation adjusted to build the given modular dependency.
362 /// This excludes paths that are yet-to-be-provided by the build system.
363 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
364 const ModuleDeps &Deps,
365 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
366
367 /// Collect module map files for given modules.
368 llvm::DenseSet<const FileEntry *>
369 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
370
371 /// Add module map files to the invocation, if needed.
372 void addModuleMapFiles(CompilerInvocation &CI,
373 ArrayRef<ModuleID> ClangModuleDeps) const;
374 /// Add module files (pcm) to the invocation, if needed.
375 void addModuleFiles(CompilerInvocation &CI,
376 ArrayRef<ModuleID> ClangModuleDeps) const;
377 void addModuleFiles(CowCompilerInvocation &CI,
378 ArrayRef<ModuleID> ClangModuleDeps) const;
379
380 /// Add paths that require looking up outputs to the given dependencies.
381 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
382
383 /// Compute the context hash for \p Deps, and create the mapping
384 /// \c ModuleDepsByID[Deps.ID] = &Deps.
385 void associateWithContextHash(const CowCompilerInvocation &CI, bool IgnoreCWD,
386 ModuleDeps &Deps);
387};
388
389/// Resets codegen options that don't affect modules/PCH.
391 const LangOptions &LangOpts,
392 CodeGenOptions &CGOpts);
393
394/// Determine if \c Input can be resolved within a stable directory.
395///
396/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
397/// \param Input Path to evaluate.
398bool isPathInStableDir(const ArrayRef<StringRef> Directories,
399 const StringRef Input);
400
401/// Determine if options collected from a module's
402/// compilation can safely be considered as stable.
403///
404/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
405/// \param HSOpts Header search options derived from the compiler invocation.
406bool areOptionsInStableDir(const ArrayRef<StringRef> Directories,
407 const HeaderSearchOptions &HSOpts);
408
409} // end namespace dependencies
410} // end namespace clang
411
412namespace llvm {
413inline hash_code hash_value(const clang::dependencies::ModuleID &ID) {
414 return hash_combine(ID.ModuleName, ID.ContextHash);
415}
416
417template <> struct DenseMapInfo<clang::dependencies::ModuleID> {
419 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
420 static inline ModuleID getTombstoneKey() {
421 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
422 }
423 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
424 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
425 return LHS == RHS;
426 }
427};
428} // namespace llvm
429
430#endif // LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
#define V(N, I)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
llvm::MachO::FileType FileType
Definition MachO.h:46
Defines the clang::Module class, which describes a module in the source code.
Defines the PPCallbacks interface.
Defines the SourceManager interface.
Reads an AST files chain containing the contents of a translation unit.
Definition ASTReader.h:430
Represents a byte-granular source range.
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
Helper class for holding the data necessary to invoke the compiler.
Same as CompilerInvocation, but with copy-on-write optimization.
An interface for collecting the dependencies of a compilation.
Definition Utils.h:63
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
HeaderSearchOptions - Helper class for storing options related to the initialization of the HeaderSea...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Describes a module or submodule.
Definition Module.h:144
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
Dependency scanner callbacks that are used during scanning to influence the behaviour of the scan - f...
The dependency scanning service contains shared configuration and state that is used by the individua...
Callback that records textual includes and direct modular includes/imports during preprocessing.
void EndOfMainFile() override
Callback invoked when the end of the main file is reached.
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, bool ModuleImported, SrcMgr::CharacteristicKind FileType) override
Callback invoked whenever an inclusion directive of any kind (#include, #import, etc....
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override
Callback invoked whenever there was an explicit module-import syntax.
Collects modular and non-modular dependencies of the main file by attaching ModuleDepCollectorPP to t...
ModuleDepCollector(DependencyScanningService &Service, std::unique_ptr< DependencyOutputOptions > Opts, CompilerInstance &ScanInstance, DependencyConsumer &C, DependencyActionController &Controller, CompilerInvocation OriginalCI, const PrebuiltModulesAttrsMap PrebuiltModulesASTMap, const ArrayRef< StringRef > StableDirs)
void applyDiscoveredDependencies(CompilerInvocation &CI)
Apply any changes implied by the discovered dependencies to the given invocation, (e....
void attachToPreprocessor(Preprocessor &PP) override
void attachToASTReader(ASTReader &R) override
void updateDependentsNotInStableDirs(PrebuiltModulesAttrsMap &PrebuiltModulesMap)
When a module is discovered to not be in stable directories, traverse & update all modules that depen...
void setVFS(llvm::StringSet<> &&VFS)
Update the VFSMap to the one discovered from serializing the AST file.
bool isInStableDir() const
Read-only access to whether the module is made up of dependencies in stable directories.
void addDependent(StringRef ModuleFile)
Add a direct dependent module file, so it can be updated if the current module is from stable directo...
void setInStableDir(bool V=false)
Update whether the prebuilt module resolves entirely in a stable directories.
const llvm::StringSet & getVFS() const
Read-only access to vfs map files.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
bool areOptionsInStableDir(const ArrayRef< StringRef > Directories, const HeaderSearchOptions &HSOpts)
Determine if options collected from a module's compilation can safely be considered as stable.
@ VFS
Remove unused -ivfsoverlay arguments.
@ IgnoreCWD
Ignore the compiler's working directory if it is safe.
ModuleOutputKind
An output from a module compilation, such as the path of the module file.
@ DiagnosticSerializationFile
The path of the serialized diagnostic file (.dia), if any.
@ DependencyFile
The path of the dependency file (.d), if any.
@ DependencyTargets
The null-separated list of names to use as the targets in the dependency file, if any.
@ ModuleFile
The module file (.pcm). Required.
llvm::StringMap< PrebuiltModuleASTAttrs > PrebuiltModulesAttrsMap
Attributes loaded from AST files of prebuilt modules collected prior to ModuleDepCollector creation.
bool isPathInStableDir(const ArrayRef< StringRef > Directories, const StringRef Input)
Determine if Input can be resolved within a stable directory.
void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, const LangOptions &LangOpts, CodeGenOptions &CGOpts)
Resets codegen options that don't affect modules/PCH.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Other
Other implicit parameter.
Definition Decl.h:1746
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
hash_code hash_value(const clang::dependencies::ModuleID &ID)
std::vector< std::string > ModuleMapFileDeps
A collection of absolute paths to module map files that this module needs to know about.
bool IsInStableDirectories
Whether this module is fully composed of file & module inputs from locations likely to stay the same ...
ModuleID ID
The identifier of the module.
void forEachFileDep(llvm::function_ref< void(StringRef)> Cb) const
Invokes Cb for all file dependencies of this module.
std::vector< PrebuiltModuleDep > PrebuiltModuleDeps
A collection of prebuilt modular dependencies this module directly depends on, not including transiti...
llvm::SmallVector< Module::LinkLibrary, 2 > LinkLibraries
The set of libraries or frameworks to link against when an entity from this module is used.
std::vector< ModuleID > ClangModuleDeps
A list of module identifiers this module directly depends on, not including transitive dependencies.
const std::vector< std::string > & getBuildArguments() const
Get (or compute) the compiler invocation that can be used to build this module.
std::string ClangModuleMapFile
The path to the modulemap file which defines this module.
bool IsSystem
Whether this is a "system" module.
This is used to identify a specific module.
bool operator==(const ModuleID &Other) const
std::string ModuleName
The name of the module.
std::string ContextHash
The context hash of a module represents the compiler options that affect the resulting command-line i...
bool operator<(const ModuleID &Other) const
P1689ModuleInfo - Represents the needed information of standard C++20 modules for P1689 format.
std::string SourcePath
Optional. The source path to the module.
bool IsStdCXXModuleInterface
If this module is a standard c++ interface unit.
std::string ModuleName
The name of the module. This may include : for partitions.
static bool isEqual(const ModuleID &LHS, const ModuleID &RHS)