clang 23.0.0git
GlobalModuleIndex.cpp
Go to the documentation of this file.
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GlobalModuleIndex class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "ASTReaderInternals.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/IOSandbox.h"
28#include "llvm/Support/LockFileManager.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/OnDiskHashTable.h"
31#include "llvm/Support/Path.h"
32#include "llvm/Support/TimeProfiler.h"
33#include "llvm/Support/raw_ostream.h"
34#include <cstdio>
35using namespace clang;
36using namespace serialization;
37
38//----------------------------------------------------------------------------//
39// Shared constants
40//----------------------------------------------------------------------------//
41namespace {
42 enum {
43 /// The block containing the index.
44 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
45 };
46
47 /// Describes the record types in the index.
48 enum IndexRecordTypes {
49 /// Contains version information and potentially other metadata,
50 /// used to determine if we can read this global index file.
51 INDEX_METADATA,
52 /// Describes a module, including its file name and dependencies.
53 MODULE,
54 /// The index for identifiers.
55 IDENTIFIER_INDEX
56 };
57}
58
59/// The name of the global index file.
60static const char * const IndexFileName = "modules.idx";
61
62/// The global index file version.
63static const unsigned CurrentVersion = 1;
64
65//----------------------------------------------------------------------------//
66// Global module index reader.
67//----------------------------------------------------------------------------//
68
69namespace {
70
71/// Trait used to read the identifier index from the on-disk hash
72/// table.
73class IdentifierIndexReaderTrait {
74public:
75 typedef StringRef external_key_type;
76 typedef StringRef internal_key_type;
77 typedef SmallVector<unsigned, 2> data_type;
78 typedef unsigned hash_value_type;
79 typedef unsigned offset_type;
80
81 static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
82 return a == b;
83 }
84
85 static hash_value_type ComputeHash(const internal_key_type& a) {
86 return llvm::djbHash(a);
87 }
88
89 static std::pair<unsigned, unsigned>
90 ReadKeyDataLength(const unsigned char*& d) {
91 using namespace llvm::support;
92 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
94 return std::make_pair(KeyLen, DataLen);
95 }
96
97 static const internal_key_type&
98 GetInternalKey(const external_key_type& x) { return x; }
99
100 static const external_key_type&
101 GetExternalKey(const internal_key_type& x) { return x; }
102
103 static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
104 return StringRef((const char *)d, n);
105 }
106
107 static data_type ReadData(const internal_key_type& k,
108 const unsigned char* d,
109 unsigned DataLen) {
110 using namespace llvm::support;
111
112 data_type Result;
113 while (DataLen > 0) {
114 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
115 Result.push_back(ID);
116 DataLen -= 4;
117 }
118
119 return Result;
120 }
121};
122
123typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
124 IdentifierIndexTable;
125
126}
127
128GlobalModuleIndex::GlobalModuleIndex(
129 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
130 llvm::BitstreamCursor Cursor)
131 : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
132 NumIdentifierLookupHits() {
133 auto Fail = [&](llvm::Error &&Err) {
134 report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
135 "' failed: " + toString(std::move(Err)));
136 };
137
138 llvm::TimeTraceScope TimeScope("Module LoadIndex");
139 // Read the global index.
140 bool InGlobalIndexBlock = false;
141 bool Done = false;
142 while (!Done) {
143 llvm::BitstreamEntry Entry;
144 if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
145 Entry = Res.get();
146 else
147 Fail(Res.takeError());
148
149 switch (Entry.Kind) {
150 case llvm::BitstreamEntry::Error:
151 return;
152
153 case llvm::BitstreamEntry::EndBlock:
154 if (InGlobalIndexBlock) {
155 InGlobalIndexBlock = false;
156 Done = true;
157 continue;
158 }
159 return;
160
161
162 case llvm::BitstreamEntry::Record:
163 // Entries in the global index block are handled below.
164 if (InGlobalIndexBlock)
165 break;
166
167 return;
168
169 case llvm::BitstreamEntry::SubBlock:
170 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
171 if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
172 Fail(std::move(Err));
173 InGlobalIndexBlock = true;
174 } else if (llvm::Error Err = Cursor.SkipBlock())
175 Fail(std::move(Err));
176 continue;
177 }
178
180 StringRef Blob;
181 Expected<unsigned> MaybeIndexRecord =
182 Cursor.readRecord(Entry.ID, Record, &Blob);
183 if (!MaybeIndexRecord)
184 Fail(MaybeIndexRecord.takeError());
185 IndexRecordTypes IndexRecord =
186 static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
187 switch (IndexRecord) {
188 case INDEX_METADATA:
189 // Make sure that the version matches.
190 if (Record.size() < 1 || Record[0] != CurrentVersion)
191 return;
192 break;
193
194 case MODULE: {
195 unsigned Idx = 0;
196 unsigned ID = Record[Idx++];
197
198 // Make room for this module's information.
199 if (ID == Modules.size())
200 Modules.push_back(ModuleInfo());
201 else
202 Modules.resize(ID + 1);
203
204 // Size/modification time for this module file at the time the
205 // global index was built.
206 Modules[ID].Size = Record[Idx++];
207 Modules[ID].ModTime = Record[Idx++];
208
209 // File name.
210 unsigned NameLen = Record[Idx++];
211 Modules[ID].FileName.assign(Record.begin() + Idx,
212 Record.begin() + Idx + NameLen);
213 Idx += NameLen;
214
215 // Dependencies
216 unsigned NumDeps = Record[Idx++];
217 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218 Record.begin() + Idx,
219 Record.begin() + Idx + NumDeps);
220 Idx += NumDeps;
221
222 // Make sure we're at the end of the record.
223 assert(Idx == Record.size() && "More module info?");
224
225 // Record this module as an unresolved module.
226 // FIXME: this doesn't work correctly for module names containing path
227 // separators.
228 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
229 // Remove the -<hash of ModuleMapPath>
230 ModuleName = ModuleName.rsplit('-').first;
231 UnresolvedModules[ModuleName] = ID;
232 break;
233 }
234
235 case IDENTIFIER_INDEX:
236 // Wire up the identifier index.
237 if (Record[0]) {
238 IdentifierIndex = IdentifierIndexTable::Create(
239 (const unsigned char *)Blob.data() + Record[0],
240 (const unsigned char *)Blob.data() + sizeof(uint32_t),
241 (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
242 }
243 break;
244 }
245 }
246}
247
249 delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
250}
251
252std::pair<GlobalModuleIndex *, llvm::Error>
254 // This is a compiler-internal input/output, let's bypass the sandbox.
255 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
256
257 // Load the index file, if it's there.
258 llvm::SmallString<128> IndexPath;
259 IndexPath += Path;
260 llvm::sys::path::append(IndexPath, IndexFileName);
261
262 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
263 llvm::MemoryBuffer::getFile(IndexPath.c_str());
264 if (!BufferOrErr)
265 return std::make_pair(nullptr,
266 llvm::errorCodeToError(BufferOrErr.getError()));
267 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
268
269 /// The main bitstream cursor for the main block.
270 llvm::BitstreamCursor Cursor(*Buffer);
271
272 // Sniff for the signature.
273 for (unsigned char C : {'B', 'C', 'G', 'I'}) {
274 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
275 if (Res.get() != C)
276 return std::make_pair(
277 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
278 "expected signature BCGI"));
279 } else
280 return std::make_pair(nullptr, Res.takeError());
281 }
282
283 return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
284 llvm::Error::success());
285}
286
288 ModuleFile *File,
289 SmallVectorImpl<ModuleFile *> &Dependencies) {
290 // Look for information about this module file.
291 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
292 = ModulesByFile.find(File);
293 if (Known == ModulesByFile.end())
294 return;
295
296 // Record dependencies.
297 Dependencies.clear();
298 ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
299 for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
300 if (ModuleFile *MF = Modules[I].File)
301 Dependencies.push_back(MF);
302 }
303}
304
305bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
306 Hits.clear();
307
308 // If there's no identifier index, there is nothing we can do.
309 if (!IdentifierIndex)
310 return false;
311
312 // Look into the identifier index.
313 ++NumIdentifierLookups;
314 IdentifierIndexTable &Table
315 = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
316 IdentifierIndexTable::iterator Known = Table.find(Name);
317 if (Known == Table.end()) {
318 return false;
319 }
320
321 SmallVector<unsigned, 2> ModuleIDs = *Known;
322 for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
324 Hits.insert(MF);
325 }
326
327 ++NumIdentifierLookupHits;
328 return true;
329}
330
332 // Look for the module in the global module index based on the module name.
333 StringRef Name = File->ModuleName;
334 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
335 if (Known == UnresolvedModules.end()) {
336 return true;
337 }
338
339 // Rectify this module with the global module index.
340 ModuleInfo &Info = Modules[Known->second];
341
342 // If the size and modification time match what we expected, record this
343 // module file.
344 bool Failed = true;
345 if (File->File.getSize() == Info.Size &&
346 File->File.getModificationTime() == Info.ModTime) {
347 Info.File = File;
348 ModulesByFile[File] = Known->second;
349
350 Failed = false;
351 }
352
353 // One way or another, we have resolved this module file.
354 UnresolvedModules.erase(Known);
355 return Failed;
356}
357
359 std::fprintf(stderr, "*** Global Module Index Statistics:\n");
360 if (NumIdentifierLookups) {
361 fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n",
362 NumIdentifierLookupHits, NumIdentifierLookups,
363 (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
364 }
365 std::fprintf(stderr, "\n");
366}
367
368LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
369 llvm::errs() << "*** Global Module Index Dump:\n";
370 llvm::errs() << "Module files:\n";
371 for (auto &MI : Modules) {
372 llvm::errs() << "** " << MI.FileName << "\n";
373 if (MI.File)
374 MI.File->dump();
375 else
376 llvm::errs() << "\n";
377 }
378 llvm::errs() << "\n";
379}
380
381//----------------------------------------------------------------------------//
382// Global module index writer.
383//----------------------------------------------------------------------------//
384
385namespace {
386 /// Provides information about a specific module file.
387 struct ModuleFileInfo {
388 /// The numberic ID for this module file.
389 unsigned ID;
390
391 /// The set of modules on which this module depends. Each entry is
392 /// a module ID.
393 SmallVector<unsigned, 4> Dependencies;
394 ASTFileSignature Signature;
395 };
396
397 struct ImportedModuleFileInfo {
398 off_t StoredSize;
399 time_t StoredModTime;
400 ASTFileSignature StoredSignature;
401 ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
402 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
403 };
404
405 /// Builder that generates the global module index file.
406 class GlobalModuleIndexBuilder {
407 FileManager &FileMgr;
408 const PCHContainerReader &PCHContainerRdr;
409
410 /// Mapping from files to module file information.
411 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
412
413 /// Information about each of the known module files.
414 ModuleFilesMap ModuleFiles;
415
416 /// Mapping from the imported module file to the imported
417 /// information.
418 using ImportedModuleFilesMap =
419 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
420
421 /// Information about each importing of a module file.
422 ImportedModuleFilesMap ImportedModuleFiles;
423
424 /// Mapping from identifiers to the list of module file IDs that
425 /// consider this identifier to be interesting.
426 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
427
428 /// A mapping from all interesting identifiers to the set of module
429 /// files in which those identifiers are considered interesting.
430 InterestingIdentifierMap InterestingIdentifiers;
431
432 /// Write the block-info block for the global module index file.
433 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
434
435 /// Retrieve the module file information for the given file.
436 ModuleFileInfo &getModuleFileInfo(FileEntryRef File) {
437 auto [It, Inserted] = ModuleFiles.try_emplace(File);
438 if (Inserted) {
439 unsigned NewID = ModuleFiles.size();
440 ModuleFileInfo &Info = It->second;
441 Info.ID = NewID;
442 }
443 return It->second;
444 }
445
446 public:
447 explicit GlobalModuleIndexBuilder(
448 FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
449 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
450
451 /// Load the contents of the given module file into the builder.
452 llvm::Error loadModuleFile(FileEntryRef File);
453
454 /// Write the index to the given bitstream.
455 /// \returns true if an error occurred, false otherwise.
456 bool writeIndex(llvm::BitstreamWriter &Stream);
457 };
458}
459
460static void emitBlockID(unsigned ID, const char *Name,
461 llvm::BitstreamWriter &Stream,
463 Record.clear();
464 Record.push_back(ID);
465 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
466
467 // Emit the block name if present.
468 if (!Name || Name[0] == 0) return;
469 Record.clear();
470 while (*Name)
471 Record.push_back(*Name++);
472 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
473}
474
475static void emitRecordID(unsigned ID, const char *Name,
476 llvm::BitstreamWriter &Stream,
478 Record.clear();
479 Record.push_back(ID);
480 while (*Name)
481 Record.push_back(*Name++);
482 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
483}
484
485void
486GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
488 Stream.EnterBlockInfoBlock();
489
490#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
491#define RECORD(X) emitRecordID(X, #X, Stream, Record)
492 BLOCK(GLOBAL_INDEX_BLOCK);
493 RECORD(INDEX_METADATA);
494 RECORD(MODULE);
495 RECORD(IDENTIFIER_INDEX);
496#undef RECORD
497#undef BLOCK
498
499 Stream.ExitBlock();
500}
501
502namespace {
503 class InterestingASTIdentifierLookupTrait
505
506 public:
507 /// The identifier and whether it is "interesting".
508 typedef std::pair<StringRef, bool> data_type;
509
510 data_type ReadData(const internal_key_type& k,
511 const unsigned char* d,
512 unsigned DataLen) {
513 // The first bit indicates whether this identifier is interesting.
514 // That's all we care about.
515 using namespace llvm::support;
516 IdentifierID RawID =
517 endian::readNext<IdentifierID, llvm::endianness::little>(d);
518 bool IsInteresting = RawID & 0x01;
519 return std::make_pair(k, IsInteresting);
520 }
521 };
522}
523
524llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
525 // Open the module file.
526
527 auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
528 if (!Buffer)
529 return llvm::createStringError(Buffer.getError(),
530 "failed getting buffer for module file");
531
532 // Initialize the input stream
533 llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
534
535 // Sniff for the signature.
536 for (unsigned char C : {'C', 'P', 'C', 'H'})
537 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
538 if (Res.get() != C)
539 return llvm::createStringError(std::errc::illegal_byte_sequence,
540 "expected signature CPCH");
541 } else
542 return Res.takeError();
543
544 // Record this module file and assign it a unique ID (if it doesn't have
545 // one already).
546 unsigned ID = getModuleFileInfo(File).ID;
547
548 // Search for the blocks and records we care about.
549 enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
550 bool Done = false;
551 while (!Done) {
552 Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
553 if (!MaybeEntry)
554 return MaybeEntry.takeError();
555 llvm::BitstreamEntry Entry = MaybeEntry.get();
556
557 switch (Entry.Kind) {
558 case llvm::BitstreamEntry::Error:
559 Done = true;
560 continue;
561
562 case llvm::BitstreamEntry::Record:
563 // In the 'other' state, just skip the record. We don't care.
564 if (State == Other) {
565 if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
566 continue;
567 else
568 return Skipped.takeError();
569 }
570
571 // Handle potentially-interesting records below.
572 break;
573
574 case llvm::BitstreamEntry::SubBlock:
575 if (Entry.ID == CONTROL_BLOCK_ID) {
576 if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
577 return Err;
578
579 // Found the control block.
580 State = ControlBlock;
581 continue;
582 }
583
584 if (Entry.ID == AST_BLOCK_ID) {
585 if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
586 return Err;
587
588 // Found the AST block.
589 State = ASTBlock;
590 continue;
591 }
592
593 if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
594 if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
595 return Err;
596
597 // Found the Diagnostic Options block.
598 State = DiagnosticOptionsBlock;
599 continue;
600 }
601
602 if (llvm::Error Err = InStream.SkipBlock())
603 return Err;
604
605 continue;
606
607 case llvm::BitstreamEntry::EndBlock:
608 State = Other;
609 continue;
610 }
611
612 // Read the given record.
614 StringRef Blob;
615 Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
616 if (!MaybeCode)
617 return MaybeCode.takeError();
618 unsigned Code = MaybeCode.get();
619
620 // Handle module dependencies.
621 if (State == ControlBlock && Code == IMPORT) {
622 unsigned Idx = 0;
623 // Read information about the AST file.
624
625 // Skip the imported kind
626 ++Idx;
627
628 // Skip the import location
629 ++Idx;
630
631 // Skip the module name (currently this is only used for prebuilt
632 // modules while here we are only dealing with cached).
633 Blob = Blob.substr(Record[Idx++]);
634
635 // Skip if it is standard C++ module
636 ++Idx;
637
638 // Load stored size/modification time.
639 off_t StoredSize = (off_t)Record[Idx++];
640 time_t StoredModTime = (time_t)Record[Idx++];
641 (void)Record[Idx++]; // ImplicitModuleSuffixLength
642
643 // Skip the stored signature.
644 // FIXME: we could read the signature out of the import and validate it.
645 StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
646 auto StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
647 SignatureBytes.end());
648 Blob = Blob.substr(ASTFileSignature::size);
649
650 // Retrieve the imported file name.
651 unsigned Length = Record[Idx++];
652 StringRef ImportedFile = Blob.substr(0, Length);
653 Blob = Blob.substr(Length);
654
655 // Find the imported module file.
656 auto DependsOnFile =
657 FileMgr.getOptionalFileRef(ImportedFile, /*OpenFile=*/false,
658 /*CacheFailure=*/false);
659
660 if (!DependsOnFile)
661 return llvm::createStringError(std::errc::bad_file_descriptor,
662 "imported file \"%s\" not found",
663 std::string(ImportedFile).c_str());
664
665 // Save the information in ImportedModuleFileInfo so we can verify after
666 // loading all pcms.
667 ImportedModuleFiles.insert(std::make_pair(
668 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
669 StoredSignature)));
670
671 // Record the dependency.
672 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
673 getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
674
675 continue;
676 }
677
678 // Handle the identifier table
679 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
680 typedef llvm::OnDiskIterableChainedHashTable<
681 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
682 std::unique_ptr<InterestingIdentifierTable> Table(
683 InterestingIdentifierTable::Create(
684 (const unsigned char *)Blob.data() + Record[0],
685 (const unsigned char *)Blob.data() + sizeof(uint32_t),
686 (const unsigned char *)Blob.data()));
687 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
688 DEnd = Table->data_end();
689 D != DEnd; ++D) {
690 std::pair<StringRef, bool> Ident = *D;
691 if (Ident.second)
692 InterestingIdentifiers[Ident.first].push_back(ID);
693 else
694 (void)InterestingIdentifiers[Ident.first];
695 }
696 }
697
698 // Get Signature.
699 if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
700 auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
701 assert(Signature != ASTFileSignature::createDummy() &&
702 "Dummy AST file signature not backpatched in ASTWriter.");
703 getModuleFileInfo(File).Signature = Signature;
704 }
705
706 // We don't care about this record.
707 }
708
709 return llvm::Error::success();
710}
711
712namespace {
713
714/// Trait used to generate the identifier index as an on-disk hash
715/// table.
716class IdentifierIndexWriterTrait {
717public:
718 typedef StringRef key_type;
719 typedef StringRef key_type_ref;
720 typedef SmallVector<unsigned, 2> data_type;
721 typedef const SmallVector<unsigned, 2> &data_type_ref;
722 typedef unsigned hash_value_type;
723 typedef unsigned offset_type;
724
725 static hash_value_type ComputeHash(key_type_ref Key) {
726 return llvm::djbHash(Key);
727 }
728
729 std::pair<unsigned,unsigned>
730 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
731 using namespace llvm::support;
732 endian::Writer LE(Out, llvm::endianness::little);
733 unsigned KeyLen = Key.size();
734 unsigned DataLen = Data.size() * 4;
735 LE.write<uint16_t>(KeyLen);
736 LE.write<uint16_t>(DataLen);
737 return std::make_pair(KeyLen, DataLen);
738 }
739
740 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
741 Out.write(Key.data(), KeyLen);
742 }
743
744 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
745 unsigned DataLen) {
746 using namespace llvm::support;
747 for (unsigned I = 0, N = Data.size(); I != N; ++I)
748 endian::write<uint32_t>(Out, Data[I], llvm::endianness::little);
749 }
750};
751
752}
753
754bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
755 for (auto MapEntry : ImportedModuleFiles) {
756 auto File = MapEntry.first;
757 ImportedModuleFileInfo &Info = MapEntry.second;
758 if (getModuleFileInfo(File).Signature) {
759 if (getModuleFileInfo(File).Signature != Info.StoredSignature)
760 // Verify Signature.
761 return true;
762 } else if (Info.StoredSize != File.getSize() ||
763 Info.StoredModTime != File.getModificationTime())
764 // Verify Size and ModTime.
765 return true;
766 }
767
768 using namespace llvm;
769 llvm::TimeTraceScope TimeScope("Module WriteIndex");
770
771 // Emit the file header.
772 Stream.Emit((unsigned)'B', 8);
773 Stream.Emit((unsigned)'C', 8);
774 Stream.Emit((unsigned)'G', 8);
775 Stream.Emit((unsigned)'I', 8);
776
777 // Write the block-info block, which describes the records in this bitcode
778 // file.
779 emitBlockInfoBlock(Stream);
780
781 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
782
783 // Write the metadata.
785 Record.push_back(CurrentVersion);
786 Stream.EmitRecord(INDEX_METADATA, Record);
787
788 // Write the set of known module files.
789 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
790 MEnd = ModuleFiles.end();
791 M != MEnd; ++M) {
792 Record.clear();
793 Record.push_back(M->second.ID);
794 Record.push_back(M->first.getSize());
795 Record.push_back(M->first.getModificationTime());
796
797 // File name
798 StringRef Name(M->first.getName());
799 Record.push_back(Name.size());
800 Record.append(Name.begin(), Name.end());
801
802 // Dependencies
803 Record.push_back(M->second.Dependencies.size());
804 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
805 Stream.EmitRecord(MODULE, Record);
806 }
807
808 // Write the identifier -> module file mapping.
809 {
810 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
811 IdentifierIndexWriterTrait Trait;
812
813 // Populate the hash table.
814 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
815 IEnd = InterestingIdentifiers.end();
816 I != IEnd; ++I) {
817 Generator.insert(I->first(), I->second, Trait);
818 }
819
820 // Create the on-disk hash table in a buffer.
822 uint32_t BucketOffset;
823 {
824 using namespace llvm::support;
825 llvm::raw_svector_ostream Out(IdentifierTable);
826 // Make sure that no bucket is at offset 0
827 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
828 BucketOffset = Generator.Emit(Out, Trait);
829 }
830
831 // Create a blob abbreviation
832 auto Abbrev = std::make_shared<BitCodeAbbrev>();
833 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
835 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
836 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
837
838 // Write the identifier table
839 uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
840 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
841 }
842
843 Stream.ExitBlock();
844 return false;
845}
846
847llvm::Error
849 const PCHContainerReader &PCHContainerRdr,
850 StringRef Path) {
851 // This is a compiler-internal input/output, let's bypass the sandbox.
852 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
853
854 llvm::SmallString<128> IndexPath;
855 IndexPath += Path;
856 llvm::sys::path::append(IndexPath, IndexFileName);
857
858 // Coordinate building the global index file with other processes that might
859 // try to do the same.
860 llvm::LockFileManager Lock(IndexPath);
861 bool Owned;
862 if (llvm::Error Err = Lock.tryLock().moveInto(Owned)) {
863 llvm::consumeError(std::move(Err));
864 return llvm::createStringError(std::errc::io_error, "LFS error");
865 }
866 if (!Owned) {
867 // Someone else is responsible for building the index. We don't care
868 // when they finish, so we're done.
869 return llvm::createStringError(std::errc::device_or_resource_busy,
870 "someone else is building the index");
871 }
872
873 // We're responsible for building the index ourselves.
874
875 // The module index builder.
876 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
877
878 // Load each of the module files.
879 std::error_code EC;
880 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
881 D != DEnd && !EC;
882 D.increment(EC)) {
883 // If this isn't a module file, we don't care.
884 if (llvm::sys::path::extension(D->path()) != ".pcm") {
885 // ... unless it's a .pcm.lock file, which indicates that someone is
886 // in the process of rebuilding a module. They'll rebuild the index
887 // at the end of that translation unit, so we don't have to.
888 if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
889 return llvm::createStringError(std::errc::device_or_resource_busy,
890 "someone else is building the index");
891
892 continue;
893 }
894
895 // If we can't find the module file, skip it.
896 auto ModuleFile = FileMgr.getOptionalFileRef(D->path());
897 if (!ModuleFile)
898 continue;
899
900 // Load this module file.
901 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
902 return Err;
903 }
904
905 // The output buffer, into which the global index will be written.
906 SmallString<16> OutputBuffer;
907 {
908 llvm::BitstreamWriter OutputStream(OutputBuffer);
909 if (Builder.writeIndex(OutputStream))
910 return llvm::createStringError(std::errc::io_error,
911 "failed writing index");
912 }
913
914 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
915 OS << OutputBuffer;
916 return llvm::Error::success();
917 });
918}
919
920namespace {
921 class GlobalIndexIdentifierIterator : public IdentifierIterator {
922 /// The current position within the identifier lookup table.
923 IdentifierIndexTable::key_iterator Current;
924
925 /// The end position within the identifier lookup table.
926 IdentifierIndexTable::key_iterator End;
927
928 public:
929 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
930 Current = Idx.key_begin();
931 End = Idx.key_end();
932 }
933
934 StringRef Next() override {
935 if (Current == End)
936 return StringRef();
937
938 StringRef Result = *Current;
939 ++Current;
940 return Result;
941 }
942 };
943}
944
946 IdentifierIndexTable &Table =
947 *static_cast<IdentifierIndexTable *>(IdentifierIndex);
948 return new GlobalIndexIdentifierIterator(Table);
949}
#define RECORD(CLASS, BASE)
Defines the clang::FileManager interface and associated types.
FormatToken * Next
The next token in the unwrapped line.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
Definition MachO.h:31
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define IMPORT(DERIVED, BASE)
Definition Template.h:630
#define BLOCK(DERIVED, BASE)
Definition Template.h:646
__device__ __2f16 b
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Implements support for file system lookup, file system caching, and directory search management.
Definition FileManager.h:53
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
llvm::SmallPtrSet< ModuleFile *, 4 > HitSet
A set of module files in which we found a result.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
Definition Interp.h:1326
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
Definition ASTBitCodes.h:63
std::shared_ptr< MatchComputation< T > > Generator
Definition RewriteRule.h:65
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Definition Decl.h:1746
unsigned long uint64_t
unsigned int uint32_t
__LIBC_ATTRS FILE * stderr
The signature of a module, which is a hash of the AST content.
Definition Module.h:165
static constexpr size_t size
Definition Module.h:168
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
Definition Module.h:188
static ASTFileSignature createDummy()
Definition Module.h:198