clang-tools 17.0.0git
ModularizeUtilities.cpp
Go to the documentation of this file.
1//===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class for loading and validating a module map or
10// header list by checking that all headers in the corresponding directories
11// are accounted for.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/SourceManager.h"
16#include "clang/Driver/Options.h"
17#include "clang/Frontend/CompilerInstance.h"
18#include "clang/Frontend/FrontendActions.h"
19#include "CoverageChecker.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/raw_ostream.h"
25#include "ModularizeUtilities.h"
26
27using namespace clang;
28using namespace llvm;
29using namespace Modularize;
30
31namespace {
32// Subclass TargetOptions so we can construct it inline with
33// the minimal option, the triple.
34class ModuleMapTargetOptions : public clang::TargetOptions {
35public:
36 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37};
38} // namespace
39
40// ModularizeUtilities class implementation.
41
42// Constructor.
43ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44 llvm::StringRef Prefix,
45 llvm::StringRef ProblemFilesListPath)
46 : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48 MissingHeaderCount(0),
49 // Init clang stuff needed for loading the module map and preprocessing.
50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51 DiagnosticOpts(new DiagnosticOptions()),
52 DC(llvm::errs(), DiagnosticOpts.get()),
54 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55 TargetOpts(new ModuleMapTargetOptions()),
56 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
57 FileMgr(new FileManager(FileSystemOpts)),
58 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
60 *SourceMgr, *Diagnostics, *LangOpts,
61 Target.get())) {}
62
63// Create instance of ModularizeUtilities, to simplify setting up
64// subordinate objects.
66 std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
67 llvm::StringRef ProblemFilesListPath) {
68
69 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
70}
71
72// Load all header lists and dependencies.
74 // For each input file.
75 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
76 llvm::StringRef InputPath = *I;
77 // If it's a module map.
78 if (InputPath.endswith(".modulemap")) {
79 // Load the module map.
80 if (std::error_code EC = loadModuleMap(InputPath))
81 return EC;
82 }
83 else {
84 // Else we assume it's a header list and load it.
85 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
86 errs() << "modularize: error: Unable to get header list '" << InputPath
87 << "': " << EC.message() << '\n';
88 return EC;
89 }
90 }
91 }
92 // If we have a problem files list.
93 if (ProblemFilesPath.size() != 0) {
94 // Load problem files list.
95 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
96 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
97 << "': " << EC.message() << '\n';
98 return EC;
99 }
100 }
101 return std::error_code();
102}
103
104// Do coverage checks.
105// For each loaded module map, do header coverage check.
106// Starting from the directory of the module.map file,
107// Find all header files, optionally looking only at files
108// covered by the include path options, and compare against
109// the headers referenced by the module.map file.
110// Display warnings for unaccounted-for header files.
111// Returns 0 if there were no errors or warnings, 1 if there
112// were warnings, 2 if any other problem, such as a bad
113// module map path argument was specified.
115 std::vector<std::string> &IncludePaths,
116 llvm::ArrayRef<std::string> CommandLine) {
117 int ModuleMapCount = ModuleMaps.size();
118 int ModuleMapIndex;
119 std::error_code EC;
120 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
121 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
123 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
124 ModMap.get());
125 std::error_code LocalEC = Checker->doChecks();
126 if (LocalEC.value() > 0)
127 EC = LocalEC;
128 }
129 return EC;
130}
131
132// Load single header list and dependencies.
134 llvm::StringRef InputPath) {
135
136 // By default, use the path component of the list file name.
137 SmallString<256> HeaderDirectory(InputPath);
138 llvm::sys::path::remove_filename(HeaderDirectory);
139 SmallString<256> CurrentDirectory;
140 llvm::sys::fs::current_path(CurrentDirectory);
141
142 // Get the prefix if we have one.
143 if (HeaderPrefix.size() != 0)
144 HeaderDirectory = HeaderPrefix;
145
146 // Read the header list file into a buffer.
147 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
148 MemoryBuffer::getFile(InputPath);
149 if (std::error_code EC = listBuffer.getError())
150 return EC;
151
152 // Parse the header list into strings.
153 SmallVector<StringRef, 32> Strings;
154 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
155
156 // Collect the header file names from the string list.
158 E = Strings.end();
159 I != E; ++I) {
160 StringRef Line = I->trim();
161 // Ignore comments and empty lines.
162 if (Line.empty() || (Line[0] == '#'))
163 continue;
164 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
165 SmallString<256> HeaderFileName;
166 // Prepend header file name prefix if it's not absolute.
167 if (llvm::sys::path::is_absolute(TargetAndDependents.first))
168 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
169 else {
170 if (HeaderDirectory.size() != 0)
171 HeaderFileName = HeaderDirectory;
172 else
173 HeaderFileName = CurrentDirectory;
174 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
175 llvm::sys::path::native(HeaderFileName);
176 }
177 // Handle optional dependencies.
178 DependentsVector Dependents;
179 SmallVector<StringRef, 4> DependentsList;
180 TargetAndDependents.second.split(DependentsList, " ", -1, false);
181 int Count = DependentsList.size();
182 for (int Index = 0; Index < Count; ++Index) {
183 SmallString<256> Dependent;
184 if (llvm::sys::path::is_absolute(DependentsList[Index]))
185 Dependent = DependentsList[Index];
186 else {
187 if (HeaderDirectory.size() != 0)
188 Dependent = HeaderDirectory;
189 else
190 Dependent = CurrentDirectory;
191 llvm::sys::path::append(Dependent, DependentsList[Index]);
192 }
193 llvm::sys::path::native(Dependent);
194 Dependents.push_back(getCanonicalPath(Dependent.str()));
195 }
196 // Get canonical form.
197 HeaderFileName = getCanonicalPath(HeaderFileName);
198 // Save the resulting header file path and dependencies.
199 HeaderFileNames.push_back(std::string(HeaderFileName.str()));
200 Dependencies[HeaderFileName.str()] = Dependents;
201 }
202 return std::error_code();
203}
204
205// Load problem header list.
207 llvm::StringRef InputPath) {
208
209 // By default, use the path component of the list file name.
210 SmallString<256> HeaderDirectory(InputPath);
211 llvm::sys::path::remove_filename(HeaderDirectory);
212 SmallString<256> CurrentDirectory;
213 llvm::sys::fs::current_path(CurrentDirectory);
214
215 // Get the prefix if we have one.
216 if (HeaderPrefix.size() != 0)
217 HeaderDirectory = HeaderPrefix;
218
219 // Read the header list file into a buffer.
220 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
221 MemoryBuffer::getFile(InputPath);
222 if (std::error_code EC = listBuffer.getError())
223 return EC;
224
225 // Parse the header list into strings.
226 SmallVector<StringRef, 32> Strings;
227 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
228
229 // Collect the header file names from the string list.
231 E = Strings.end();
232 I != E; ++I) {
233 StringRef Line = I->trim();
234 // Ignore comments and empty lines.
235 if (Line.empty() || (Line[0] == '#'))
236 continue;
237 SmallString<256> HeaderFileName;
238 // Prepend header file name prefix if it's not absolute.
239 if (llvm::sys::path::is_absolute(Line))
240 llvm::sys::path::native(Line, HeaderFileName);
241 else {
242 if (HeaderDirectory.size() != 0)
243 HeaderFileName = HeaderDirectory;
244 else
245 HeaderFileName = CurrentDirectory;
246 llvm::sys::path::append(HeaderFileName, Line);
247 llvm::sys::path::native(HeaderFileName);
248 }
249 // Get canonical form.
250 HeaderFileName = getCanonicalPath(HeaderFileName);
251 // Save the resulting header file path.
252 ProblemFileNames.push_back(std::string(HeaderFileName.str()));
253 }
254 return std::error_code();
255}
256
257// Load single module map and extract header file list.
259 llvm::StringRef InputPath) {
260 // Get file entry for module.modulemap file.
261 auto ModuleMapEntryOrErr =
262 SourceMgr->getFileManager().getFile(InputPath);
263
264 // return error if not found.
265 if (!ModuleMapEntryOrErr) {
266 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
267 return ModuleMapEntryOrErr.getError();
268 }
269 const FileEntry *ModuleMapEntry = *ModuleMapEntryOrErr;
270
271 // Because the module map parser uses a ForwardingDiagnosticConsumer,
272 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
273 DC.BeginSourceFile(*LangOpts, nullptr);
274
275 // Figure out the home directory for the module map file.
276 const DirectoryEntry *Dir = ModuleMapEntry->getDir();
277 StringRef DirName(Dir->getName());
278 if (llvm::sys::path::filename(DirName) == "Modules") {
279 DirName = llvm::sys::path::parent_path(DirName);
280 if (DirName.endswith(".framework")) {
281 if (auto DirEntry = FileMgr->getDirectory(DirName))
282 Dir = *DirEntry;
283 else
284 Dir = nullptr;
285 }
286 // FIXME: This assert can fail if there's a race between the above check
287 // and the removal of the directory.
288 assert(Dir && "parent must exist");
289 }
290
291 std::unique_ptr<ModuleMap> ModMap;
292 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
293 Target.get(), *HeaderInfo));
294
295 // Parse module.modulemap file into module map.
296 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
297 return std::error_code(1, std::generic_category());
298 }
299
300 // Do matching end call.
301 DC.EndSourceFile();
302
303 // Reset missing header count.
305
306 if (!collectModuleMapHeaders(ModMap.get()))
307 return std::error_code(1, std::generic_category());
308
309 // Save module map.
310 ModuleMaps.push_back(std::move(ModMap));
311
312 // Indicate we are using module maps.
313 HasModuleMap = true;
314
315 // Return code of 1 for missing headers.
317 return std::error_code(1, std::generic_category());
318
319 return std::error_code();
320}
321
322// Collect module map headers.
323// Walks the modules and collects referenced headers into
324// HeaderFileNames.
325bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
326 for (ModuleMap::module_iterator I = ModMap->module_begin(),
327 E = ModMap->module_end();
328 I != E; ++I) {
329 if (!collectModuleHeaders(*I->second))
330 return false;
331 }
332 return true;
333}
334
335// Collect referenced headers from one module.
336// Collects the headers referenced in the given module into
337// HeaderFileNames.
338bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
339
340 // Ignore explicit modules because they often have dependencies
341 // we can't know.
342 if (Mod.IsExplicit)
343 return true;
344
345 // Treat headers in umbrella directory as dependencies.
346 DependentsVector UmbrellaDependents;
347
348 // Recursively do submodules.
349 for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
350 MI != MIEnd; ++MI)
352
353 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
354 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
355 // Collect umbrella header.
356 HeaderFileNames.push_back(HeaderPath);
357
358 // FUTURE: When needed, umbrella header header collection goes here.
359 }
360 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
361 // If there normal headers, assume these are umbrellas and skip collection.
362 if (Mod.Headers->size() == 0) {
363 // Collect headers in umbrella directory.
364 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
365 return false;
366 }
367 }
368
369 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370 // assuming they are marked as such either because of unsuitability for
371 // modules or because they are meant to be included by another header,
372 // and thus should be ignored by modularize.
373
374 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
375
376 for (int Index = 0; Index < NormalHeaderCount; ++Index) {
377 DependentsVector NormalDependents;
378 // Collect normal header.
379 const clang::Module::Header &Header(
380 Mod.Headers[clang::Module::HK_Normal][Index]);
381 std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
382 HeaderFileNames.push_back(HeaderPath);
383 }
384
385 int MissingCountThisModule = Mod.MissingHeaders.size();
386
387 for (int Index = 0; Index < MissingCountThisModule; ++Index) {
388 std::string MissingFile = Mod.MissingHeaders[Index].FileName;
389 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
390 errs() << Loc.printToString(*SourceMgr)
391 << ": error : Header not found: " << MissingFile << "\n";
392 }
393
394 MissingHeaderCount += MissingCountThisModule;
395
396 return true;
397}
398
399// Collect headers from an umbrella directory.
400bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
401 DependentsVector &Dependents) {
402 // Initialize directory name.
403 SmallString<256> Directory(UmbrellaDirName);
404 // Walk the directory.
405 std::error_code EC;
406 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
407 I.increment(EC)) {
408 if (EC)
409 return false;
410 std::string File(I->path());
411 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
412 if (!Status)
413 return false;
414 llvm::sys::fs::file_type Type = Status->type();
415 // If the file is a directory, ignore the name and recurse.
416 if (Type == llvm::sys::fs::file_type::directory_file) {
417 if (!collectUmbrellaHeaders(File, Dependents))
418 return false;
419 continue;
420 }
421 // If the file does not have a common header extension, ignore it.
422 if (!isHeader(File))
423 continue;
424 // Save header name.
425 std::string HeaderPath = getCanonicalPath(File);
426 Dependents.push_back(HeaderPath);
427 }
428 return true;
429}
430
431// Replace .. embedded in path for purposes of having
432// a canonical path.
433static std::string replaceDotDot(StringRef Path) {
434 SmallString<128> Buffer;
435 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
436 E = llvm::sys::path::end(Path);
437 while (B != E) {
438 if (B->compare(".") == 0) {
439 }
440 else if (B->compare("..") == 0)
441 llvm::sys::path::remove_filename(Buffer);
442 else
443 llvm::sys::path::append(Buffer, *B);
444 ++B;
445 }
446 if (Path.endswith("/") || Path.endswith("\\"))
447 Buffer.append(1, Path.back());
448 return Buffer.c_str();
449}
450
451// Convert header path to canonical form.
452// The canonical form is basically just use forward slashes, and remove "./".
453// \param FilePath The file path, relative to the module map directory.
454// \returns The file path in canonical form.
455std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
456 std::string Tmp(replaceDotDot(FilePath));
457 std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
458 StringRef Tmp2(Tmp);
459 if (Tmp2.startswith("./"))
460 Tmp = std::string(Tmp2.substr(2));
461 return Tmp;
462}
463
464// Check for header file extension.
465// If the file extension is .h, .inc, or missing, it's
466// assumed to be a header.
467// \param FileName The file name. Must not be a directory.
468// \returns true if it has a header extension or no extension.
470 StringRef Extension = llvm::sys::path::extension(FileName);
471 if (Extension.size() == 0)
472 return true;
473 if (Extension.equals_insensitive(".h"))
474 return true;
475 if (Extension.equals_insensitive(".inc"))
476 return true;
477 return false;
478}
479
480// Get directory path component from file path.
481// \returns the component of the given path, which will be
482// relative if the given path is relative, absolute if the
483// given path is absolute, or "." if the path has no leading
484// path component.
486 SmallString<256> Directory(Path);
487 sys::path::remove_filename(Directory);
488 if (Directory.size() == 0)
489 return ".";
490 return std::string(Directory.str());
491}
492
493// Add unique problem file.
494// Also standardizes the path.
496 FilePath = getCanonicalPath(FilePath);
497 // Don't add if already present.
498 for(auto &TestFilePath : ProblemFileNames) {
499 if (TestFilePath == FilePath)
500 return;
501 }
502 ProblemFileNames.push_back(FilePath);
503}
504
505// Add file with no compile errors.
506// Also standardizes the path.
508 FilePath = getCanonicalPath(FilePath);
509 GoodFileNames.push_back(FilePath);
510}
511
512// List problem files.
514 errs() << "\nThese are the files with possible errors:\n\n";
515 for (auto &ProblemFile : ProblemFileNames) {
516 errs() << ProblemFile << "\n";
517 }
518}
519
520// List files with no problems.
522 errs() << "\nThese are the files with no detected errors:\n\n";
523 for (auto &GoodFile : HeaderFileNames) {
524 bool Good = true;
525 for (auto &ProblemFile : ProblemFileNames) {
526 if (ProblemFile == GoodFile) {
527 Good = false;
528 break;
529 }
530 }
531 if (Good)
532 errs() << GoodFile << "\n";
533 }
534}
535
536// List files with problem files commented out.
538 errs() <<
539 "\nThese are the combined files, with problem files preceded by #:\n\n";
540 for (auto &File : HeaderFileNames) {
541 bool Good = true;
542 for (auto &ProblemFile : ProblemFileNames) {
543 if (ProblemFile == File) {
544 Good = false;
545 break;
546 }
547 }
548 errs() << (Good ? "" : "#") << File << "\n";
549 }
550}
const Expr * E
Definitions for CoverageChecker.
NodeType Type
StringRef FileName
SourceLocation Loc
static std::string replaceDotDot(StringRef Path)
ModularizeUtilities class definition.
static cl::list< std::string > IncludePaths("I", cl::desc("Include path for coverage check."), cl::value_desc("path"))
static cl::opt< std::string > HeaderPrefix("prefix", cl::init(""), cl::desc("Prepend header file paths with this prefix." " If not specified," " the files are considered to be relative to the header list file."))
llvm::SmallVector< std::string, 4 > DependentsVector
Definition: Modularize.h:31
std::vector< HeaderHandle > Path
std::vector< llvm::StringRef > Strings
std::vector< llvm::StringRef > CommandLine
llvm::StringRef Directory
WantDiagnostics Diagnostics
static std::unique_ptr< CoverageChecker > createCoverageChecker(llvm::StringRef ModuleMapPath, std::vector< std::string > &IncludePaths, llvm::ArrayRef< std::string > CommandLine, clang::ModuleMap *ModuleMap)
Create instance of CoverageChecker.
Modularize utilities class.
llvm::SmallVector< std::string, 32 > GoodFileNames
List of header files with no problems during the first pass, that is, no compile errors.
ModularizeUtilities(std::vector< std::string > &InputPaths, llvm::StringRef Prefix, llvm::StringRef ProblemFilesListPath)
Constructor.
static std::string getCanonicalPath(llvm::StringRef FilePath)
Convert header path to canonical form.
std::vector< std::string > InputFilePaths
The input file paths.
std::error_code loadAllHeaderListsAndDependencies()
Load header list and dependencies.
bool collectModuleMapHeaders(clang::ModuleMap *ModMap)
Collect module Map headers.
void displayProblemFiles()
List problem files.
std::error_code loadModuleMap(llvm::StringRef InputPath)
Load single module map and extract header file list.
clang::TextDiagnosticPrinter DC
Diagnostic consumer.
DependencyMap Dependencies
Map of top-level header file dependencies.
static ModularizeUtilities * createModularizeUtilities(std::vector< std::string > &InputPaths, llvm::StringRef Prefix, llvm::StringRef ProblemFilesListPath)
Create instance of ModularizeUtilities.
llvm::IntrusiveRefCntPtr< clang::SourceManager > SourceMgr
Source manager.
std::error_code loadProblemHeaderList(llvm::StringRef InputPath)
Load problem header list.
bool collectModuleHeaders(const clang::Module &Mod)
Collect referenced headers from one module.
llvm::SmallVector< std::string, 32 > ProblemFileNames
List of header files with problems.
std::unique_ptr< clang::HeaderSearch > HeaderInfo
Header search manager.
llvm::IntrusiveRefCntPtr< clang::TargetInfo > Target
Target information.
llvm::StringRef HeaderPrefix
The header prefix.
std::vector< std::unique_ptr< clang::ModuleMap > > ModuleMaps
void displayCombinedFiles()
List files with problem files commented out.
void addNoCompileErrorsFile(std::string FilePath)
Add file with no compile errors.
void displayGoodFiles()
List files with no problems.
std::shared_ptr< clang::LangOptions > LangOpts
Options controlling the language variant.
llvm::IntrusiveRefCntPtr< clang::FileManager > FileMgr
File system manager.
bool collectUmbrellaHeaders(llvm::StringRef UmbrellaDirName, DependentsVector &Dependents)
Collect headers from an umbrella directory.
llvm::IntrusiveRefCntPtr< clang::DiagnosticsEngine > Diagnostics
Diagnostic engine.
int MissingHeaderCount
Missing header count.
static bool isHeader(llvm::StringRef FileName)
Check for header file extension.
llvm::StringRef ProblemFilesPath
The path of problem files list file.
std::error_code loadSingleHeaderListsAndDependencies(llvm::StringRef InputPath)
Load single header list and dependencies.
llvm::SmallVector< std::string, 32 > HeaderFileNames
List of top-level header files.
std::error_code doCoverageCheck(std::vector< std::string > &IncludePaths, llvm::ArrayRef< std::string > CommandLine)
Do coverage checks.
bool HasModuleMap
True if we have module maps.
void addUniqueProblemFile(std::string FilePath)
Add unique problem file.
static std::string getDirectoryFromPath(llvm::StringRef Path)
Get directory path component from file path.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Some operations such as code completion produce a set of candidates.