clang 23.0.0git
ModuleDepCollector.h
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10#define LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/Module.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Hashing.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26#include <string>
27#include <unordered_map>
28#include <variant>
29
30namespace clang {
31namespace dependencies {
32
36
37/// Modular dependency that has already been built prior to the dependency scan.
39 std::string ModuleName;
40 std::string PCMFile;
41 std::string ModuleMapFile;
42
44 : ModuleName(MF->ModuleName), PCMFile(MF->FileName.str()),
45 ModuleMapFile(MF->ModuleMapPath) {}
46};
47
48/// Attributes loaded from AST files of prebuilt modules collected prior to
49/// ModuleDepCollector creation.
50using PrebuiltModulesAttrsMap = llvm::StringMap<PrebuiltModuleASTAttrs>;
52public:
53 /// When a module is discovered to not be in stable directories, traverse &
54 /// update all modules that depend on it.
55 void
57
58 /// Read-only access to whether the module is made up of dependencies in
59 /// stable directories.
60 bool isInStableDir() const { return IsInStableDirs; }
61
62 /// Read-only access to vfs map files.
63 const llvm::StringSet<> &getVFS() const { return VFSMap; }
64
65 /// Update the VFSMap to the one discovered from serializing the AST file.
66 void setVFS(llvm::StringSet<> &&VFS) { VFSMap = std::move(VFS); }
67
68 /// Add a direct dependent module file, so it can be updated if the current
69 /// module is from stable directores.
70 void addDependent(StringRef ModuleFile) {
71 ModuleFileDependents.insert(ModuleFile);
72 }
73
74 /// Update whether the prebuilt module resolves entirely in a stable
75 /// directories.
76 void setInStableDir(bool V = false) {
77 // Cannot reset attribute once it's false.
78 if (!IsInStableDirs)
79 return;
80 IsInStableDirs = V;
81 }
82
83private:
84 llvm::StringSet<> VFSMap;
85 bool IsInStableDirs = true;
86 std::set<StringRef> ModuleFileDependents;
87};
88
89/// This is used to identify a specific module.
90struct ModuleID {
91 /// The name of the module. This may include `:` for C++20 module partitions,
92 /// or a header-name for C++20 header units.
93 std::string ModuleName;
94
95 /// The context hash of a module represents the compiler options that affect
96 /// the resulting command-line invocation.
97 ///
98 /// Modules with the same name and ContextHash but different invocations could
99 /// cause non-deterministic build results.
100 ///
101 /// Modules with the same name but a different \c ContextHash should be
102 /// treated as separate modules for the purpose of a build.
103 std::string ContextHash;
104
105 bool operator==(const ModuleID &Other) const {
106 return std::tie(ModuleName, ContextHash) ==
107 std::tie(Other.ModuleName, Other.ContextHash);
108 }
109
110 bool operator<(const ModuleID &Other) const {
111 return std::tie(ModuleName, ContextHash) <
112 std::tie(Other.ModuleName, Other.ContextHash);
113 }
114};
115
116/// P1689ModuleInfo - Represents the needed information of standard C++20
117/// modules for P1689 format.
119 /// The name of the module. This may include `:` for partitions.
120 std::string ModuleName;
121
122 /// Optional. The source path to the module.
123 std::string SourcePath;
124
125 /// If this module is a standard c++ interface unit.
127
128 enum class ModuleType {
130 // To be supported
131 // AngleHeaderUnit,
132 // QuoteHeaderUnit
133 };
135};
136
137/// An output from a module compilation, such as the path of the module file.
139 /// The module file (.pcm). Required.
141 /// The path of the dependency file (.d), if any.
143 /// The null-separated list of names to use as the targets in the dependency
144 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
146 /// The path of the serialized diagnostic file (.dia), if any.
148};
149
151 /// The identifier of the module.
153
154 /// Whether this is a "system" module.
156
157 /// Whether this module is fully composed of file & module inputs from
158 /// locations likely to stay the same across the active development and build
159 /// cycle. For example, when all those input paths only resolve in Sysroot.
160 ///
161 /// External paths, as opposed to virtual file paths, are always used
162 /// for computing this value.
164
165 /// Whether current working directory is ignored.
167
168 /// The path to the modulemap file which defines this module.
169 ///
170 /// This can be used to explicitly build this module. This file will
171 /// additionally appear in \c FileDeps as a dependency.
173
174 /// A collection of absolute paths to module map files that this module needs
175 /// to know about. The ordering is significant.
176 std::vector<std::string> ModuleMapFileDeps;
177
178 /// A collection of prebuilt modular dependencies this module directly depends
179 /// on, not including transitive dependencies.
180 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
181
182 /// A list of module identifiers this module directly depends on, not
183 /// including transitive dependencies.
184 ///
185 /// This may include modules with a different context hash when it can be
186 /// determined that the differences are benign for this compilation.
187 std::vector<ModuleID> ClangModuleDeps;
188
189 /// The set of libraries or frameworks to link against when
190 /// an entity from this module is used.
192
193 /// Invokes \c Cb for all file dependencies of this module. Each provided
194 /// \c StringRef is only valid within the individual callback invocation.
195 void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
196
197 /// Get (or compute) the compiler invocation that can be used to build this
198 /// module. Does not include argv[0].
199 const std::vector<std::string> &getBuildArguments() const;
200
201private:
202 friend class ModuleDepCollector;
204
205 /// The absolute directory path that is the base for relative paths
206 /// in \c FileDeps.
207 std::string FileDepsBaseDir;
208
209 /// A collection of paths to files that this module directly depends on, not
210 /// including transitive dependencies.
211 std::vector<std::string> FileDeps;
212
213 mutable std::variant<std::monostate, CowCompilerInvocation,
214 std::vector<std::string>>
215 BuildInfo;
216};
217
219
220/// Callback that records textual includes and direct modular includes/imports
221/// during preprocessing. At the end of the main file, it also collects
222/// transitive modular dependencies and passes everything to the
223/// \c DependencyConsumer of the parent \c ModuleDepCollector.
224class ModuleDepCollectorPP final : public PPCallbacks {
225public:
227
230 SourceLocation Loc) override;
231 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
232 StringRef FileName, bool IsAngled,
233 CharSourceRange FilenameRange,
234 OptionalFileEntryRef File, StringRef SearchPath,
235 StringRef RelativePath, const Module *SuggestedModule,
236 bool ModuleImported,
238 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
239 const Module *Imported) override;
240
241 void EndOfMainFile() override;
242
243private:
244 /// The parent dependency collector.
246
247 void handleImport(const Module *Imported);
248
249 /// Returns the ID or nothing if the dependency is spurious and is ignored.
250 std::optional<ModuleID> handleTopLevelModule(serialization::ModuleFile *MF);
251
252 /// Adds direct module dependencies to the ModuleDeps instance. This includes
253 /// prebuilt module and implicitly-built modules.
254 void addAllModuleDeps(serialization::ModuleFile &MF, ModuleDeps &MD);
255};
256
257/// Collects modular and non-modular dependencies of the main file by attaching
258/// \c ModuleDepCollectorPP to the preprocessor.
260public:
262 std::unique_ptr<DependencyOutputOptions> Opts,
263 CompilerInstance &ScanInstance, DependencyConsumer &C,
264 DependencyActionController &Controller,
265 CompilerInvocation OriginalCI,
266 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
267 const ArrayRef<StringRef> StableDirs);
268
269 void attachToPreprocessor(Preprocessor &PP) override;
270 void attachToASTReader(ASTReader &R) override;
271
272 PPCallbacks *getPPCallbacks() { return CollectorPPPtr; }
273
274 /// Apply any changes implied by the discovered dependencies to the given
275 /// invocation, (e.g. disable implicit modules, add explicit module paths).
277
278private:
280
281 /// The parent dependency scanning service.
283 /// The compiler instance for scanning the current translation unit.
284 CompilerInstance &ScanInstance;
285 /// The consumer of collected dependency information.
286 DependencyConsumer &Consumer;
287 /// Callbacks for computing dependency information.
288 DependencyActionController &Controller;
289 /// Mapping from prebuilt AST filepaths to their attributes referenced during
290 /// dependency collecting.
291 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap;
292 /// Directory paths known to be stable through an active development and build
293 /// cycle.
294 const ArrayRef<StringRef> StableDirs;
295 /// Path to the main source file.
296 std::string MainFile;
297 /// Non-modular file dependencies. This includes the main source file and
298 /// textually included header files.
299 std::vector<std::string> FileDeps;
300 /// Direct and transitive modular dependencies of the main source file.
301 llvm::MapVector<serialization::ModuleFile *, std::unique_ptr<ModuleDeps>>
302 ModularDeps;
303 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
304 /// a preprocessor. Storage owned by \c ModularDeps.
305 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
306 /// Direct modular dependencies that have already been built.
307 llvm::MapVector<serialization::ModuleFile *, PrebuiltModuleDep>
308 DirectPrebuiltModularDeps;
309 /// Working set of direct modular dependencies.
310 llvm::SetVector<serialization::ModuleFile *> DirectModularDeps;
311 /// Working set of direct modular dependencies, as they were imported.
313 /// All direct and transitive visible modules.
314 llvm::StringSet<> VisibleModules;
315
316 /// Options that control the dependency output generation.
317 std::unique_ptr<DependencyOutputOptions> Opts;
318 /// A Clang invocation that's based on the original TU invocation and that has
319 /// been partially transformed into one that can perform explicit build of
320 /// a discovered modular dependency. Note that this still needs to be adjusted
321 /// for each individual module.
322 CowCompilerInvocation CommonInvocation;
323
324 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
325 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
326
327 /// A pointer to the preprocessor callback so we can invoke it directly
328 /// if needed. The callback is created and added to a Preprocessor instance by
329 /// attachToPreprocessor and the Preprocessor instance owns it.
330 ModuleDepCollectorPP *CollectorPPPtr = nullptr;
331
332 /// Checks whether the module is known as being prebuilt.
333 bool isPrebuiltModule(const serialization::ModuleFile *MF);
334
335 /// Computes all visible modules resolved from direct imports.
336 void addVisibleModules();
337
338 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
339 void addFileDep(StringRef Path);
340 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
341 void addFileDep(ModuleDeps &MD, StringRef Path);
342
343 /// Get a Clang invocation adjusted to build the given modular dependency.
344 /// This excludes paths that are yet-to-be-provided by the build system.
345 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
346 const ModuleDeps &Deps,
347 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
348
349 /// Collect module map files for given modules.
350 llvm::DenseSet<const FileEntry *>
351 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
352
353 /// Add module map files to the invocation, if needed.
354 void addModuleMapFiles(CompilerInvocation &CI,
355 ArrayRef<ModuleID> ClangModuleDeps) const;
356 /// Add module files (pcm) to the invocation, if needed.
357 void addModuleFiles(CompilerInvocation &CI,
358 ArrayRef<ModuleID> ClangModuleDeps) const;
359 void addModuleFiles(CowCompilerInvocation &CI,
360 ArrayRef<ModuleID> ClangModuleDeps) const;
361
362 /// Add paths that require looking up outputs to the given dependencies.
363 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
364
365 /// Compute the context hash for \p Deps, and create the mapping
366 /// \c ModuleDepsByID[Deps.ID] = &Deps.
367 void associateWithContextHash(const CowCompilerInvocation &CI,
368 ModuleDeps &Deps);
369};
370
371/// Resets codegen options that don't affect modules/PCH.
373 const LangOptions &LangOpts,
374 CodeGenOptions &CGOpts);
375
376/// Determine if \c Input can be resolved within a stable directory.
377///
378/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
379/// \param Input Path to evaluate.
380bool isPathInStableDir(const ArrayRef<StringRef> Directories,
381 const StringRef Input);
382
383/// Determine if options collected from a module's
384/// compilation can safely be considered as stable.
385///
386/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
387/// \param HSOpts Header search options derived from the compiler invocation.
388bool areOptionsInStableDir(const ArrayRef<StringRef> Directories,
389 const HeaderSearchOptions &HSOpts);
390
391} // end namespace dependencies
392} // end namespace clang
393
394namespace llvm {
395inline hash_code hash_value(const clang::dependencies::ModuleID &ID) {
396 return hash_combine(ID.ModuleName, ID.ContextHash);
397}
398
399template <> struct DenseMapInfo<clang::dependencies::ModuleID> {
401 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
402 static inline ModuleID getTombstoneKey() {
403 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
404 }
405 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
406 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
407 return LHS == RHS;
408 }
409};
410} // namespace llvm
411
412#endif // LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
#define V(N, I)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
llvm::MachO::FileType FileType
Definition MachO.h:46
Defines the clang::Module class, which describes a module in the source code.
Defines the PPCallbacks interface.
Defines the SourceManager interface.
Reads an AST files chain containing the contents of a translation unit.
Definition ASTReader.h:428
Represents a byte-granular source range.
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
Helper class for holding the data necessary to invoke the compiler.
Same as CompilerInvocation, but with copy-on-write optimization.
An interface for collecting the dependencies of a compilation.
Definition Utils.h:63
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
HeaderSearchOptions - Helper class for storing options related to the initialization of the HeaderSea...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Describes a module or submodule.
Definition Module.h:237
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
Dependency scanner callbacks that are used during scanning to influence the behaviour of the scan - f...
The dependency scanning service contains shared configuration and state that is used by the individua...
Callback that records textual includes and direct modular includes/imports during preprocessing.
void EndOfMainFile() override
Callback invoked when the end of the main file is reached.
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, bool ModuleImported, SrcMgr::CharacteristicKind FileType) override
Callback invoked whenever an inclusion directive of any kind (#include, #import, etc....
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override
Callback invoked whenever there was an explicit module-import syntax.
Collects modular and non-modular dependencies of the main file by attaching ModuleDepCollectorPP to t...
ModuleDepCollector(DependencyScanningService &Service, std::unique_ptr< DependencyOutputOptions > Opts, CompilerInstance &ScanInstance, DependencyConsumer &C, DependencyActionController &Controller, CompilerInvocation OriginalCI, const PrebuiltModulesAttrsMap PrebuiltModulesASTMap, const ArrayRef< StringRef > StableDirs)
void applyDiscoveredDependencies(CompilerInvocation &CI)
Apply any changes implied by the discovered dependencies to the given invocation, (e....
void attachToPreprocessor(Preprocessor &PP) override
void attachToASTReader(ASTReader &R) override
void updateDependentsNotInStableDirs(PrebuiltModulesAttrsMap &PrebuiltModulesMap)
When a module is discovered to not be in stable directories, traverse & update all modules that depen...
void setVFS(llvm::StringSet<> &&VFS)
Update the VFSMap to the one discovered from serializing the AST file.
bool isInStableDir() const
Read-only access to whether the module is made up of dependencies in stable directories.
void addDependent(StringRef ModuleFile)
Add a direct dependent module file, so it can be updated if the current module is from stable directo...
void setInStableDir(bool V=false)
Update whether the prebuilt module resolves entirely in a stable directories.
const llvm::StringSet & getVFS() const
Read-only access to vfs map files.
Information about a module that has been loaded by the ASTReader.
Definition ModuleFile.h:158
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
bool areOptionsInStableDir(const ArrayRef< StringRef > Directories, const HeaderSearchOptions &HSOpts)
Determine if options collected from a module's compilation can safely be considered as stable.
@ VFS
Remove unused -ivfsoverlay arguments.
ModuleOutputKind
An output from a module compilation, such as the path of the module file.
@ DiagnosticSerializationFile
The path of the serialized diagnostic file (.dia), if any.
@ DependencyFile
The path of the dependency file (.d), if any.
@ DependencyTargets
The null-separated list of names to use as the targets in the dependency file, if any.
@ ModuleFile
The module file (.pcm). Required.
llvm::StringMap< PrebuiltModuleASTAttrs > PrebuiltModulesAttrsMap
Attributes loaded from AST files of prebuilt modules collected prior to ModuleDepCollector creation.
bool isPathInStableDir(const ArrayRef< StringRef > Directories, const StringRef Input)
Determine if Input can be resolved within a stable directory.
void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, const LangOptions &LangOpts, CodeGenOptions &CGOpts)
Resets codegen options that don't affect modules/PCH.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Other
Other implicit parameter.
Definition Decl.h:1763
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
hash_code hash_value(const clang::dependencies::ModuleID &ID)
std::vector< std::string > ModuleMapFileDeps
A collection of absolute paths to module map files that this module needs to know about.
bool IsInStableDirectories
Whether this module is fully composed of file & module inputs from locations likely to stay the same ...
ModuleID ID
The identifier of the module.
bool IgnoreCWD
Whether current working directory is ignored.
void forEachFileDep(llvm::function_ref< void(StringRef)> Cb) const
Invokes Cb for all file dependencies of this module.
std::vector< PrebuiltModuleDep > PrebuiltModuleDeps
A collection of prebuilt modular dependencies this module directly depends on, not including transiti...
llvm::SmallVector< Module::LinkLibrary, 2 > LinkLibraries
The set of libraries or frameworks to link against when an entity from this module is used.
std::vector< ModuleID > ClangModuleDeps
A list of module identifiers this module directly depends on, not including transitive dependencies.
const std::vector< std::string > & getBuildArguments() const
Get (or compute) the compiler invocation that can be used to build this module.
std::string ClangModuleMapFile
The path to the modulemap file which defines this module.
bool IsSystem
Whether this is a "system" module.
This is used to identify a specific module.
bool operator==(const ModuleID &Other) const
std::string ModuleName
The name of the module.
std::string ContextHash
The context hash of a module represents the compiler options that affect the resulting command-line i...
bool operator<(const ModuleID &Other) const
P1689ModuleInfo - Represents the needed information of standard C++20 modules for P1689 format.
std::string SourcePath
Optional. The source path to the module.
bool IsStdCXXModuleInterface
If this module is a standard c++ interface unit.
std::string ModuleName
The name of the module. This may include : for partitions.
PrebuiltModuleDep(const serialization::ModuleFile *MF)
static bool isEqual(const ModuleID &LHS, const ModuleID &RHS)