clang 20.0.0git
GlobalModuleIndex.cpp
Go to the documentation of this file.
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GlobalModuleIndex class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "ASTReaderInternals.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/LockFileManager.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/OnDiskHashTable.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/raw_ostream.h"
33#include <cstdio>
34using namespace clang;
35using namespace serialization;
36
37//----------------------------------------------------------------------------//
38// Shared constants
39//----------------------------------------------------------------------------//
40namespace {
41 enum {
42 /// The block containing the index.
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
44 };
45
46 /// Describes the record types in the index.
47 enum IndexRecordTypes {
48 /// Contains version information and potentially other metadata,
49 /// used to determine if we can read this global index file.
50 INDEX_METADATA,
51 /// Describes a module, including its file name and dependencies.
52 MODULE,
53 /// The index for identifiers.
54 IDENTIFIER_INDEX
55 };
56}
57
58/// The name of the global index file.
59static const char * const IndexFileName = "modules.idx";
60
61/// The global index file version.
62static const unsigned CurrentVersion = 1;
63
64//----------------------------------------------------------------------------//
65// Global module index reader.
66//----------------------------------------------------------------------------//
67
68namespace {
69
70/// Trait used to read the identifier index from the on-disk hash
71/// table.
72class IdentifierIndexReaderTrait {
73public:
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
76 typedef SmallVector<unsigned, 2> data_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
79
80 static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
81 return a == b;
82 }
83
84 static hash_value_type ComputeHash(const internal_key_type& a) {
85 return llvm::djbHash(a);
86 }
87
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
92 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 return std::make_pair(KeyLen, DataLen);
94 }
95
96 static const internal_key_type&
97 GetInternalKey(const external_key_type& x) { return x; }
98
99 static const external_key_type&
100 GetExternalKey(const internal_key_type& x) { return x; }
101
102 static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
103 return StringRef((const char *)d, n);
104 }
105
106 static data_type ReadData(const internal_key_type& k,
107 const unsigned char* d,
108 unsigned DataLen) {
109 using namespace llvm::support;
110
111 data_type Result;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
114 Result.push_back(ID);
115 DataLen -= 4;
116 }
117
118 return Result;
119 }
120};
121
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
124
125}
126
127GlobalModuleIndex::GlobalModuleIndex(
128 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
129 llvm::BitstreamCursor Cursor)
130 : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
131 NumIdentifierLookupHits() {
132 auto Fail = [&](llvm::Error &&Err) {
133 report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
134 "' failed: " + toString(std::move(Err)));
135 };
136
137 llvm::TimeTraceScope TimeScope("Module LoadIndex");
138 // Read the global index.
139 bool InGlobalIndexBlock = false;
140 bool Done = false;
141 while (!Done) {
142 llvm::BitstreamEntry Entry;
143 if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
144 Entry = Res.get();
145 else
146 Fail(Res.takeError());
147
148 switch (Entry.Kind) {
149 case llvm::BitstreamEntry::Error:
150 return;
151
152 case llvm::BitstreamEntry::EndBlock:
153 if (InGlobalIndexBlock) {
154 InGlobalIndexBlock = false;
155 Done = true;
156 continue;
157 }
158 return;
159
160
161 case llvm::BitstreamEntry::Record:
162 // Entries in the global index block are handled below.
163 if (InGlobalIndexBlock)
164 break;
165
166 return;
167
168 case llvm::BitstreamEntry::SubBlock:
169 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
170 if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
171 Fail(std::move(Err));
172 InGlobalIndexBlock = true;
173 } else if (llvm::Error Err = Cursor.SkipBlock())
174 Fail(std::move(Err));
175 continue;
176 }
177
179 StringRef Blob;
180 Expected<unsigned> MaybeIndexRecord =
181 Cursor.readRecord(Entry.ID, Record, &Blob);
182 if (!MaybeIndexRecord)
183 Fail(MaybeIndexRecord.takeError());
184 IndexRecordTypes IndexRecord =
185 static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
186 switch (IndexRecord) {
187 case INDEX_METADATA:
188 // Make sure that the version matches.
189 if (Record.size() < 1 || Record[0] != CurrentVersion)
190 return;
191 break;
192
193 case MODULE: {
194 unsigned Idx = 0;
195 unsigned ID = Record[Idx++];
196
197 // Make room for this module's information.
198 if (ID == Modules.size())
199 Modules.push_back(ModuleInfo());
200 else
201 Modules.resize(ID + 1);
202
203 // Size/modification time for this module file at the time the
204 // global index was built.
205 Modules[ID].Size = Record[Idx++];
206 Modules[ID].ModTime = Record[Idx++];
207
208 // File name.
209 unsigned NameLen = Record[Idx++];
210 Modules[ID].FileName.assign(Record.begin() + Idx,
211 Record.begin() + Idx + NameLen);
212 Idx += NameLen;
213
214 // Dependencies
215 unsigned NumDeps = Record[Idx++];
216 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
217 Record.begin() + Idx,
218 Record.begin() + Idx + NumDeps);
219 Idx += NumDeps;
220
221 // Make sure we're at the end of the record.
222 assert(Idx == Record.size() && "More module info?");
223
224 // Record this module as an unresolved module.
225 // FIXME: this doesn't work correctly for module names containing path
226 // separators.
227 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
228 // Remove the -<hash of ModuleMapPath>
229 ModuleName = ModuleName.rsplit('-').first;
230 UnresolvedModules[ModuleName] = ID;
231 break;
232 }
233
234 case IDENTIFIER_INDEX:
235 // Wire up the identifier index.
236 if (Record[0]) {
237 IdentifierIndex = IdentifierIndexTable::Create(
238 (const unsigned char *)Blob.data() + Record[0],
239 (const unsigned char *)Blob.data() + sizeof(uint32_t),
240 (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
241 }
242 break;
243 }
244 }
245}
246
248 delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
249}
250
251std::pair<GlobalModuleIndex *, llvm::Error>
253 // Load the index file, if it's there.
254 llvm::SmallString<128> IndexPath;
255 IndexPath += Path;
256 llvm::sys::path::append(IndexPath, IndexFileName);
257
258 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
259 llvm::MemoryBuffer::getFile(IndexPath.c_str());
260 if (!BufferOrErr)
261 return std::make_pair(nullptr,
262 llvm::errorCodeToError(BufferOrErr.getError()));
263 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
264
265 /// The main bitstream cursor for the main block.
266 llvm::BitstreamCursor Cursor(*Buffer);
267
268 // Sniff for the signature.
269 for (unsigned char C : {'B', 'C', 'G', 'I'}) {
270 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
271 if (Res.get() != C)
272 return std::make_pair(
273 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
274 "expected signature BCGI"));
275 } else
276 return std::make_pair(nullptr, Res.takeError());
277 }
278
279 return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
280 llvm::Error::success());
281}
282
285 SmallVectorImpl<ModuleFile *> &Dependencies) {
286 // Look for information about this module file.
287 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
288 = ModulesByFile.find(File);
289 if (Known == ModulesByFile.end())
290 return;
291
292 // Record dependencies.
293 Dependencies.clear();
294 ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
295 for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
296 if (ModuleFile *MF = Modules[I].File)
297 Dependencies.push_back(MF);
298 }
299}
300
301bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
302 Hits.clear();
303
304 // If there's no identifier index, there is nothing we can do.
305 if (!IdentifierIndex)
306 return false;
307
308 // Look into the identifier index.
309 ++NumIdentifierLookups;
310 IdentifierIndexTable &Table
311 = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
312 IdentifierIndexTable::iterator Known = Table.find(Name);
313 if (Known == Table.end()) {
314 return false;
315 }
316
317 SmallVector<unsigned, 2> ModuleIDs = *Known;
318 for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
319 if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
320 Hits.insert(MF);
321 }
322
323 ++NumIdentifierLookupHits;
324 return true;
325}
326
328 // Look for the module in the global module index based on the module name.
329 StringRef Name = File->ModuleName;
330 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
331 if (Known == UnresolvedModules.end()) {
332 return true;
333 }
334
335 // Rectify this module with the global module index.
336 ModuleInfo &Info = Modules[Known->second];
337
338 // If the size and modification time match what we expected, record this
339 // module file.
340 bool Failed = true;
341 if (File->File.getSize() == Info.Size &&
342 File->File.getModificationTime() == Info.ModTime) {
343 Info.File = File;
344 ModulesByFile[File] = Known->second;
345
346 Failed = false;
347 }
348
349 // One way or another, we have resolved this module file.
350 UnresolvedModules.erase(Known);
351 return Failed;
352}
353
355 std::fprintf(stderr, "*** Global Module Index Statistics:\n");
356 if (NumIdentifierLookups) {
357 fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n",
358 NumIdentifierLookupHits, NumIdentifierLookups,
359 (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
360 }
361 std::fprintf(stderr, "\n");
362}
363
364LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
365 llvm::errs() << "*** Global Module Index Dump:\n";
366 llvm::errs() << "Module files:\n";
367 for (auto &MI : Modules) {
368 llvm::errs() << "** " << MI.FileName << "\n";
369 if (MI.File)
370 MI.File->dump();
371 else
372 llvm::errs() << "\n";
373 }
374 llvm::errs() << "\n";
375}
376
377//----------------------------------------------------------------------------//
378// Global module index writer.
379//----------------------------------------------------------------------------//
380
381namespace {
382 /// Provides information about a specific module file.
383 struct ModuleFileInfo {
384 /// The numberic ID for this module file.
385 unsigned ID;
386
387 /// The set of modules on which this module depends. Each entry is
388 /// a module ID.
389 SmallVector<unsigned, 4> Dependencies;
390 ASTFileSignature Signature;
391 };
392
393 struct ImportedModuleFileInfo {
394 off_t StoredSize;
395 time_t StoredModTime;
396 ASTFileSignature StoredSignature;
397 ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
398 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
399 };
400
401 /// Builder that generates the global module index file.
402 class GlobalModuleIndexBuilder {
403 FileManager &FileMgr;
404 const PCHContainerReader &PCHContainerRdr;
405
406 /// Mapping from files to module file information.
407 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
408
409 /// Information about each of the known module files.
410 ModuleFilesMap ModuleFiles;
411
412 /// Mapping from the imported module file to the imported
413 /// information.
414 using ImportedModuleFilesMap =
415 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
416
417 /// Information about each importing of a module file.
418 ImportedModuleFilesMap ImportedModuleFiles;
419
420 /// Mapping from identifiers to the list of module file IDs that
421 /// consider this identifier to be interesting.
422 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
423
424 /// A mapping from all interesting identifiers to the set of module
425 /// files in which those identifiers are considered interesting.
426 InterestingIdentifierMap InterestingIdentifiers;
427
428 /// Write the block-info block for the global module index file.
429 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
430
431 /// Retrieve the module file information for the given file.
432 ModuleFileInfo &getModuleFileInfo(FileEntryRef File) {
433 auto Known = ModuleFiles.find(File);
434 if (Known != ModuleFiles.end())
435 return Known->second;
436
437 unsigned NewID = ModuleFiles.size();
438 ModuleFileInfo &Info = ModuleFiles[File];
439 Info.ID = NewID;
440 return Info;
441 }
442
443 public:
444 explicit GlobalModuleIndexBuilder(
445 FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
446 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
447
448 /// Load the contents of the given module file into the builder.
449 llvm::Error loadModuleFile(FileEntryRef File);
450
451 /// Write the index to the given bitstream.
452 /// \returns true if an error occurred, false otherwise.
453 bool writeIndex(llvm::BitstreamWriter &Stream);
454 };
455}
456
457static void emitBlockID(unsigned ID, const char *Name,
458 llvm::BitstreamWriter &Stream,
460 Record.clear();
461 Record.push_back(ID);
462 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
463
464 // Emit the block name if present.
465 if (!Name || Name[0] == 0) return;
466 Record.clear();
467 while (*Name)
468 Record.push_back(*Name++);
469 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
470}
471
472static void emitRecordID(unsigned ID, const char *Name,
473 llvm::BitstreamWriter &Stream,
475 Record.clear();
476 Record.push_back(ID);
477 while (*Name)
478 Record.push_back(*Name++);
479 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
480}
481
482void
483GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
485 Stream.EnterBlockInfoBlock();
486
487#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
488#define RECORD(X) emitRecordID(X, #X, Stream, Record)
489 BLOCK(GLOBAL_INDEX_BLOCK);
490 RECORD(INDEX_METADATA);
491 RECORD(MODULE);
492 RECORD(IDENTIFIER_INDEX);
493#undef RECORD
494#undef BLOCK
495
496 Stream.ExitBlock();
497}
498
499namespace {
500 class InterestingASTIdentifierLookupTrait
502
503 public:
504 /// The identifier and whether it is "interesting".
505 typedef std::pair<StringRef, bool> data_type;
506
507 data_type ReadData(const internal_key_type& k,
508 const unsigned char* d,
509 unsigned DataLen) {
510 // The first bit indicates whether this identifier is interesting.
511 // That's all we care about.
512 using namespace llvm::support;
513 IdentifierID RawID =
514 endian::readNext<IdentifierID, llvm::endianness::little>(d);
515 bool IsInteresting = RawID & 0x01;
516 return std::make_pair(k, IsInteresting);
517 }
518 };
519}
520
521llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
522 // Open the module file.
523
524 auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
525 if (!Buffer)
526 return llvm::createStringError(Buffer.getError(),
527 "failed getting buffer for module file");
528
529 // Initialize the input stream
530 llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
531
532 // Sniff for the signature.
533 for (unsigned char C : {'C', 'P', 'C', 'H'})
534 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
535 if (Res.get() != C)
536 return llvm::createStringError(std::errc::illegal_byte_sequence,
537 "expected signature CPCH");
538 } else
539 return Res.takeError();
540
541 // Record this module file and assign it a unique ID (if it doesn't have
542 // one already).
543 unsigned ID = getModuleFileInfo(File).ID;
544
545 // Search for the blocks and records we care about.
546 enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
547 bool Done = false;
548 while (!Done) {
549 Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
550 if (!MaybeEntry)
551 return MaybeEntry.takeError();
552 llvm::BitstreamEntry Entry = MaybeEntry.get();
553
554 switch (Entry.Kind) {
555 case llvm::BitstreamEntry::Error:
556 Done = true;
557 continue;
558
559 case llvm::BitstreamEntry::Record:
560 // In the 'other' state, just skip the record. We don't care.
561 if (State == Other) {
562 if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
563 continue;
564 else
565 return Skipped.takeError();
566 }
567
568 // Handle potentially-interesting records below.
569 break;
570
571 case llvm::BitstreamEntry::SubBlock:
572 if (Entry.ID == CONTROL_BLOCK_ID) {
573 if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
574 return Err;
575
576 // Found the control block.
577 State = ControlBlock;
578 continue;
579 }
580
581 if (Entry.ID == AST_BLOCK_ID) {
582 if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
583 return Err;
584
585 // Found the AST block.
586 State = ASTBlock;
587 continue;
588 }
589
590 if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
591 if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
592 return Err;
593
594 // Found the Diagnostic Options block.
595 State = DiagnosticOptionsBlock;
596 continue;
597 }
598
599 if (llvm::Error Err = InStream.SkipBlock())
600 return Err;
601
602 continue;
603
604 case llvm::BitstreamEntry::EndBlock:
605 State = Other;
606 continue;
607 }
608
609 // Read the given record.
611 StringRef Blob;
612 Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
613 if (!MaybeCode)
614 return MaybeCode.takeError();
615 unsigned Code = MaybeCode.get();
616
617 // Handle module dependencies.
618 if (State == ControlBlock && Code == IMPORTS) {
619 // Load each of the imported PCH files.
620 unsigned Idx = 0, N = Record.size();
621 while (Idx < N) {
622 // Read information about the AST file.
623
624 // Skip the imported kind
625 ++Idx;
626
627 // Skip if it is standard C++ module
628 ++Idx;
629
630 // Skip the import location
631 ++Idx;
632
633 // Load stored size/modification time.
634 off_t StoredSize = (off_t)Record[Idx++];
635 time_t StoredModTime = (time_t)Record[Idx++];
636
637 // Skip the stored signature.
638 // FIXME: we could read the signature out of the import and validate it.
639 auto FirstSignatureByte = Record.begin() + Idx;
641 FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
643
644 // Skip the module name (currently this is only used for prebuilt
645 // modules while here we are only dealing with cached).
646 Idx += Record[Idx] + 1;
647
648 // Retrieve the imported file name.
649 unsigned Length = Record[Idx++];
650 SmallString<128> ImportedFile(Record.begin() + Idx,
651 Record.begin() + Idx + Length);
652 Idx += Length;
653
654 // Find the imported module file.
655 auto DependsOnFile =
656 FileMgr.getOptionalFileRef(ImportedFile, /*OpenFile=*/false,
657 /*CacheFailure=*/false);
658
659 if (!DependsOnFile)
660 return llvm::createStringError(std::errc::bad_file_descriptor,
661 "imported file \"%s\" not found",
662 ImportedFile.c_str());
663
664 // Save the information in ImportedModuleFileInfo so we can verify after
665 // loading all pcms.
666 ImportedModuleFiles.insert(std::make_pair(
667 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
668 StoredSignature)));
669
670 // Record the dependency.
671 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
672 getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
673 }
674
675 continue;
676 }
677
678 // Handle the identifier table
679 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
680 typedef llvm::OnDiskIterableChainedHashTable<
681 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
682 std::unique_ptr<InterestingIdentifierTable> Table(
683 InterestingIdentifierTable::Create(
684 (const unsigned char *)Blob.data() + Record[0],
685 (const unsigned char *)Blob.data() + sizeof(uint32_t),
686 (const unsigned char *)Blob.data()));
687 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
688 DEnd = Table->data_end();
689 D != DEnd; ++D) {
690 std::pair<StringRef, bool> Ident = *D;
691 if (Ident.second)
692 InterestingIdentifiers[Ident.first].push_back(ID);
693 else
694 (void)InterestingIdentifiers[Ident.first];
695 }
696 }
697
698 // Get Signature.
699 if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
700 auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
701 assert(Signature != ASTFileSignature::createDummy() &&
702 "Dummy AST file signature not backpatched in ASTWriter.");
703 getModuleFileInfo(File).Signature = Signature;
704 }
705
706 // We don't care about this record.
707 }
708
709 return llvm::Error::success();
710}
711
712namespace {
713
714/// Trait used to generate the identifier index as an on-disk hash
715/// table.
716class IdentifierIndexWriterTrait {
717public:
718 typedef StringRef key_type;
719 typedef StringRef key_type_ref;
720 typedef SmallVector<unsigned, 2> data_type;
721 typedef const SmallVector<unsigned, 2> &data_type_ref;
722 typedef unsigned hash_value_type;
723 typedef unsigned offset_type;
724
725 static hash_value_type ComputeHash(key_type_ref Key) {
726 return llvm::djbHash(Key);
727 }
728
729 std::pair<unsigned,unsigned>
730 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
731 using namespace llvm::support;
732 endian::Writer LE(Out, llvm::endianness::little);
733 unsigned KeyLen = Key.size();
734 unsigned DataLen = Data.size() * 4;
735 LE.write<uint16_t>(KeyLen);
736 LE.write<uint16_t>(DataLen);
737 return std::make_pair(KeyLen, DataLen);
738 }
739
740 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
741 Out.write(Key.data(), KeyLen);
742 }
743
744 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
745 unsigned DataLen) {
746 using namespace llvm::support;
747 for (unsigned I = 0, N = Data.size(); I != N; ++I)
748 endian::write<uint32_t>(Out, Data[I], llvm::endianness::little);
749 }
750};
751
752}
753
754bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
755 for (auto MapEntry : ImportedModuleFiles) {
756 auto File = MapEntry.first;
757 ImportedModuleFileInfo &Info = MapEntry.second;
758 if (getModuleFileInfo(File).Signature) {
759 if (getModuleFileInfo(File).Signature != Info.StoredSignature)
760 // Verify Signature.
761 return true;
762 } else if (Info.StoredSize != File.getSize() ||
763 Info.StoredModTime != File.getModificationTime())
764 // Verify Size and ModTime.
765 return true;
766 }
767
768 using namespace llvm;
769 llvm::TimeTraceScope TimeScope("Module WriteIndex");
770
771 // Emit the file header.
772 Stream.Emit((unsigned)'B', 8);
773 Stream.Emit((unsigned)'C', 8);
774 Stream.Emit((unsigned)'G', 8);
775 Stream.Emit((unsigned)'I', 8);
776
777 // Write the block-info block, which describes the records in this bitcode
778 // file.
779 emitBlockInfoBlock(Stream);
780
781 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
782
783 // Write the metadata.
785 Record.push_back(CurrentVersion);
786 Stream.EmitRecord(INDEX_METADATA, Record);
787
788 // Write the set of known module files.
789 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
790 MEnd = ModuleFiles.end();
791 M != MEnd; ++M) {
792 Record.clear();
793 Record.push_back(M->second.ID);
794 Record.push_back(M->first.getSize());
795 Record.push_back(M->first.getModificationTime());
796
797 // File name
798 StringRef Name(M->first.getName());
799 Record.push_back(Name.size());
800 Record.append(Name.begin(), Name.end());
801
802 // Dependencies
803 Record.push_back(M->second.Dependencies.size());
804 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
805 Stream.EmitRecord(MODULE, Record);
806 }
807
808 // Write the identifier -> module file mapping.
809 {
810 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
811 IdentifierIndexWriterTrait Trait;
812
813 // Populate the hash table.
814 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
815 IEnd = InterestingIdentifiers.end();
816 I != IEnd; ++I) {
817 Generator.insert(I->first(), I->second, Trait);
818 }
819
820 // Create the on-disk hash table in a buffer.
822 uint32_t BucketOffset;
823 {
824 using namespace llvm::support;
825 llvm::raw_svector_ostream Out(IdentifierTable);
826 // Make sure that no bucket is at offset 0
827 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
828 BucketOffset = Generator.Emit(Out, Trait);
829 }
830
831 // Create a blob abbreviation
832 auto Abbrev = std::make_shared<BitCodeAbbrev>();
833 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
835 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
836 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
837
838 // Write the identifier table
839 uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
840 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
841 }
842
843 Stream.ExitBlock();
844 return false;
845}
846
847llvm::Error
849 const PCHContainerReader &PCHContainerRdr,
850 StringRef Path) {
851 llvm::SmallString<128> IndexPath;
852 IndexPath += Path;
853 llvm::sys::path::append(IndexPath, IndexFileName);
854
855 // Coordinate building the global index file with other processes that might
856 // try to do the same.
857 llvm::LockFileManager Locked(IndexPath);
858 switch (Locked) {
859 case llvm::LockFileManager::LFS_Error:
860 return llvm::createStringError(std::errc::io_error, "LFS error");
861
862 case llvm::LockFileManager::LFS_Owned:
863 // We're responsible for building the index ourselves. Do so below.
864 break;
865
866 case llvm::LockFileManager::LFS_Shared:
867 // Someone else is responsible for building the index. We don't care
868 // when they finish, so we're done.
869 return llvm::createStringError(std::errc::device_or_resource_busy,
870 "someone else is building the index");
871 }
872
873 // The module index builder.
874 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
875
876 // Load each of the module files.
877 std::error_code EC;
878 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
879 D != DEnd && !EC;
880 D.increment(EC)) {
881 // If this isn't a module file, we don't care.
882 if (llvm::sys::path::extension(D->path()) != ".pcm") {
883 // ... unless it's a .pcm.lock file, which indicates that someone is
884 // in the process of rebuilding a module. They'll rebuild the index
885 // at the end of that translation unit, so we don't have to.
886 if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
887 return llvm::createStringError(std::errc::device_or_resource_busy,
888 "someone else is building the index");
889
890 continue;
891 }
892
893 // If we can't find the module file, skip it.
894 auto ModuleFile = FileMgr.getOptionalFileRef(D->path());
895 if (!ModuleFile)
896 continue;
897
898 // Load this module file.
899 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
900 return Err;
901 }
902
903 // The output buffer, into which the global index will be written.
904 SmallString<16> OutputBuffer;
905 {
906 llvm::BitstreamWriter OutputStream(OutputBuffer);
907 if (Builder.writeIndex(OutputStream))
908 return llvm::createStringError(std::errc::io_error,
909 "failed writing index");
910 }
911
912 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
913 OS << OutputBuffer;
914 return llvm::Error::success();
915 });
916}
917
918namespace {
919 class GlobalIndexIdentifierIterator : public IdentifierIterator {
920 /// The current position within the identifier lookup table.
921 IdentifierIndexTable::key_iterator Current;
922
923 /// The end position within the identifier lookup table.
924 IdentifierIndexTable::key_iterator End;
925
926 public:
927 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
928 Current = Idx.key_begin();
929 End = Idx.key_end();
930 }
931
932 StringRef Next() override {
933 if (Current == End)
934 return StringRef();
935
936 StringRef Result = *Current;
937 ++Current;
938 return Result;
939 }
940 };
941}
942
944 IdentifierIndexTable &Table =
945 *static_cast<IdentifierIndexTable *>(IdentifierIndex);
946 return new GlobalIndexIdentifierIterator(Table);
947}
#define RECORD(X)
static char ID
Definition: Arena.cpp:183
const Decl * D
IndirectLocalPath & Path
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
unsigned NameLen
const char * Data
#define BLOCK(DERIVED, BASE)
Definition: Template.h:621
__device__ __2f16 b
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
Implements support for file system lookup, file system caching, and directory search management.
Definition: FileManager.h:53
OptionalFileEntryRef getOptionalFileRef(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Get a FileEntryRef if it exists, without doing anything on error.
Definition: FileManager.h:240
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true, std::optional< int64_t > MaybeLimit=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A global index for a set of module files, providing information about the identifiers within those mo...
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Information about a module that has been loaded by the ASTReader.
Definition: ModuleFile.h:124
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1103
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
Definition: ASTBitCodes.h:296
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
Definition: ASTBitCodes.h:322
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
Definition: ASTBitCodes.h:344
@ IMPORTS
Record code for the list of other AST files imported by this AST file.
Definition: ASTBitCodes.h:355
@ SIGNATURE
Record code for the signature that identifiers this AST file.
Definition: ASTBitCodes.h:408
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
Definition: ASTBitCodes.h:63
@ IDENTIFIER_TABLE
Record code for the identifier table.
Definition: ASTBitCodes.h:501
unsigned ComputeHash(Selector Sel)
Definition: ASTCommon.cpp:294
std::shared_ptr< MatchComputation< T > > Generator
Definition: RewriteRule.h:65
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
unsigned long uint64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
The signature of a module, which is a hash of the AST content.
Definition: Module.h:57
static constexpr size_t size
Definition: Module.h:60
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
Definition: Module.h:75
static ASTFileSignature createDummy()
Definition: Module.h:85