clang 22.0.0git
GlobalModuleIndex.cpp
Go to the documentation of this file.
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GlobalModuleIndex class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "ASTReaderInternals.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/IOSandbox.h"
28#include "llvm/Support/LockFileManager.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/OnDiskHashTable.h"
31#include "llvm/Support/Path.h"
32#include "llvm/Support/TimeProfiler.h"
33#include "llvm/Support/raw_ostream.h"
34#include <cstdio>
35using namespace clang;
36using namespace serialization;
37
38//----------------------------------------------------------------------------//
39// Shared constants
40//----------------------------------------------------------------------------//
41namespace {
42 enum {
43 /// The block containing the index.
44 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
45 };
46
47 /// Describes the record types in the index.
48 enum IndexRecordTypes {
49 /// Contains version information and potentially other metadata,
50 /// used to determine if we can read this global index file.
51 INDEX_METADATA,
52 /// Describes a module, including its file name and dependencies.
53 MODULE,
54 /// The index for identifiers.
55 IDENTIFIER_INDEX
56 };
57}
58
59/// The name of the global index file.
60static const char * const IndexFileName = "modules.idx";
61
62/// The global index file version.
63static const unsigned CurrentVersion = 1;
64
65//----------------------------------------------------------------------------//
66// Global module index reader.
67//----------------------------------------------------------------------------//
68
69namespace {
70
71/// Trait used to read the identifier index from the on-disk hash
72/// table.
73class IdentifierIndexReaderTrait {
74public:
75 typedef StringRef external_key_type;
76 typedef StringRef internal_key_type;
77 typedef SmallVector<unsigned, 2> data_type;
78 typedef unsigned hash_value_type;
79 typedef unsigned offset_type;
80
81 static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
82 return a == b;
83 }
84
85 static hash_value_type ComputeHash(const internal_key_type& a) {
86 return llvm::djbHash(a);
87 }
88
89 static std::pair<unsigned, unsigned>
90 ReadKeyDataLength(const unsigned char*& d) {
91 using namespace llvm::support;
92 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
94 return std::make_pair(KeyLen, DataLen);
95 }
96
97 static const internal_key_type&
98 GetInternalKey(const external_key_type& x) { return x; }
99
100 static const external_key_type&
101 GetExternalKey(const internal_key_type& x) { return x; }
102
103 static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
104 return StringRef((const char *)d, n);
105 }
106
107 static data_type ReadData(const internal_key_type& k,
108 const unsigned char* d,
109 unsigned DataLen) {
110 using namespace llvm::support;
111
112 data_type Result;
113 while (DataLen > 0) {
114 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
115 Result.push_back(ID);
116 DataLen -= 4;
117 }
118
119 return Result;
120 }
121};
122
123typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
124 IdentifierIndexTable;
125
126}
127
128GlobalModuleIndex::GlobalModuleIndex(
129 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
130 llvm::BitstreamCursor Cursor)
131 : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
132 NumIdentifierLookupHits() {
133 auto Fail = [&](llvm::Error &&Err) {
134 report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
135 "' failed: " + toString(std::move(Err)));
136 };
137
138 llvm::TimeTraceScope TimeScope("Module LoadIndex");
139 // Read the global index.
140 bool InGlobalIndexBlock = false;
141 bool Done = false;
142 while (!Done) {
143 llvm::BitstreamEntry Entry;
144 if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
145 Entry = Res.get();
146 else
147 Fail(Res.takeError());
148
149 switch (Entry.Kind) {
150 case llvm::BitstreamEntry::Error:
151 return;
152
153 case llvm::BitstreamEntry::EndBlock:
154 if (InGlobalIndexBlock) {
155 InGlobalIndexBlock = false;
156 Done = true;
157 continue;
158 }
159 return;
160
161
162 case llvm::BitstreamEntry::Record:
163 // Entries in the global index block are handled below.
164 if (InGlobalIndexBlock)
165 break;
166
167 return;
168
169 case llvm::BitstreamEntry::SubBlock:
170 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
171 if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
172 Fail(std::move(Err));
173 InGlobalIndexBlock = true;
174 } else if (llvm::Error Err = Cursor.SkipBlock())
175 Fail(std::move(Err));
176 continue;
177 }
178
180 StringRef Blob;
181 Expected<unsigned> MaybeIndexRecord =
182 Cursor.readRecord(Entry.ID, Record, &Blob);
183 if (!MaybeIndexRecord)
184 Fail(MaybeIndexRecord.takeError());
185 IndexRecordTypes IndexRecord =
186 static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
187 switch (IndexRecord) {
188 case INDEX_METADATA:
189 // Make sure that the version matches.
190 if (Record.size() < 1 || Record[0] != CurrentVersion)
191 return;
192 break;
193
194 case MODULE: {
195 unsigned Idx = 0;
196 unsigned ID = Record[Idx++];
197
198 // Make room for this module's information.
199 if (ID == Modules.size())
200 Modules.push_back(ModuleInfo());
201 else
202 Modules.resize(ID + 1);
203
204 // Size/modification time for this module file at the time the
205 // global index was built.
206 Modules[ID].Size = Record[Idx++];
207 Modules[ID].ModTime = Record[Idx++];
208
209 // File name.
210 unsigned NameLen = Record[Idx++];
211 Modules[ID].FileName.assign(Record.begin() + Idx,
212 Record.begin() + Idx + NameLen);
213 Idx += NameLen;
214
215 // Dependencies
216 unsigned NumDeps = Record[Idx++];
217 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218 Record.begin() + Idx,
219 Record.begin() + Idx + NumDeps);
220 Idx += NumDeps;
221
222 // Make sure we're at the end of the record.
223 assert(Idx == Record.size() && "More module info?");
224
225 // Record this module as an unresolved module.
226 // FIXME: this doesn't work correctly for module names containing path
227 // separators.
228 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
229 // Remove the -<hash of ModuleMapPath>
230 ModuleName = ModuleName.rsplit('-').first;
231 UnresolvedModules[ModuleName] = ID;
232 break;
233 }
234
235 case IDENTIFIER_INDEX:
236 // Wire up the identifier index.
237 if (Record[0]) {
238 IdentifierIndex = IdentifierIndexTable::Create(
239 (const unsigned char *)Blob.data() + Record[0],
240 (const unsigned char *)Blob.data() + sizeof(uint32_t),
241 (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
242 }
243 break;
244 }
245 }
246}
247
249 delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
250}
251
252std::pair<GlobalModuleIndex *, llvm::Error>
254 // This is a compiler-internal input/output, let's bypass the sandbox.
255 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
256
257 // Load the index file, if it's there.
258 llvm::SmallString<128> IndexPath;
259 IndexPath += Path;
260 llvm::sys::path::append(IndexPath, IndexFileName);
261
262 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
263 llvm::MemoryBuffer::getFile(IndexPath.c_str());
264 if (!BufferOrErr)
265 return std::make_pair(nullptr,
266 llvm::errorCodeToError(BufferOrErr.getError()));
267 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
268
269 /// The main bitstream cursor for the main block.
270 llvm::BitstreamCursor Cursor(*Buffer);
271
272 // Sniff for the signature.
273 for (unsigned char C : {'B', 'C', 'G', 'I'}) {
274 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
275 if (Res.get() != C)
276 return std::make_pair(
277 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
278 "expected signature BCGI"));
279 } else
280 return std::make_pair(nullptr, Res.takeError());
281 }
282
283 return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
284 llvm::Error::success());
285}
286
288 ModuleFile *File,
289 SmallVectorImpl<ModuleFile *> &Dependencies) {
290 // Look for information about this module file.
291 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
292 = ModulesByFile.find(File);
293 if (Known == ModulesByFile.end())
294 return;
295
296 // Record dependencies.
297 Dependencies.clear();
298 ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
299 for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
300 if (ModuleFile *MF = Modules[I].File)
301 Dependencies.push_back(MF);
302 }
303}
304
305bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
306 Hits.clear();
307
308 // If there's no identifier index, there is nothing we can do.
309 if (!IdentifierIndex)
310 return false;
311
312 // Look into the identifier index.
313 ++NumIdentifierLookups;
314 IdentifierIndexTable &Table
315 = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
316 IdentifierIndexTable::iterator Known = Table.find(Name);
317 if (Known == Table.end()) {
318 return false;
319 }
320
321 SmallVector<unsigned, 2> ModuleIDs = *Known;
322 for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
324 Hits.insert(MF);
325 }
326
327 ++NumIdentifierLookupHits;
328 return true;
329}
330
332 // Look for the module in the global module index based on the module name.
333 StringRef Name = File->ModuleName;
334 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
335 if (Known == UnresolvedModules.end()) {
336 return true;
337 }
338
339 // Rectify this module with the global module index.
340 ModuleInfo &Info = Modules[Known->second];
341
342 // If the size and modification time match what we expected, record this
343 // module file.
344 bool Failed = true;
345 if (File->File.getSize() == Info.Size &&
346 File->File.getModificationTime() == Info.ModTime) {
347 Info.File = File;
348 ModulesByFile[File] = Known->second;
349
350 Failed = false;
351 }
352
353 // One way or another, we have resolved this module file.
354 UnresolvedModules.erase(Known);
355 return Failed;
356}
357
359 std::fprintf(stderr, "*** Global Module Index Statistics:\n");
360 if (NumIdentifierLookups) {
361 fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n",
362 NumIdentifierLookupHits, NumIdentifierLookups,
363 (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
364 }
365 std::fprintf(stderr, "\n");
366}
367
368LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
369 llvm::errs() << "*** Global Module Index Dump:\n";
370 llvm::errs() << "Module files:\n";
371 for (auto &MI : Modules) {
372 llvm::errs() << "** " << MI.FileName << "\n";
373 if (MI.File)
374 MI.File->dump();
375 else
376 llvm::errs() << "\n";
377 }
378 llvm::errs() << "\n";
379}
380
381//----------------------------------------------------------------------------//
382// Global module index writer.
383//----------------------------------------------------------------------------//
384
385namespace {
386 /// Provides information about a specific module file.
387 struct ModuleFileInfo {
388 /// The numberic ID for this module file.
389 unsigned ID;
390
391 /// The set of modules on which this module depends. Each entry is
392 /// a module ID.
393 SmallVector<unsigned, 4> Dependencies;
394 ASTFileSignature Signature;
395 };
396
397 struct ImportedModuleFileInfo {
398 off_t StoredSize;
399 time_t StoredModTime;
400 ASTFileSignature StoredSignature;
401 ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
402 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
403 };
404
405 /// Builder that generates the global module index file.
406 class GlobalModuleIndexBuilder {
407 FileManager &FileMgr;
408 const PCHContainerReader &PCHContainerRdr;
409
410 /// Mapping from files to module file information.
411 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
412
413 /// Information about each of the known module files.
414 ModuleFilesMap ModuleFiles;
415
416 /// Mapping from the imported module file to the imported
417 /// information.
418 using ImportedModuleFilesMap =
419 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
420
421 /// Information about each importing of a module file.
422 ImportedModuleFilesMap ImportedModuleFiles;
423
424 /// Mapping from identifiers to the list of module file IDs that
425 /// consider this identifier to be interesting.
426 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
427
428 /// A mapping from all interesting identifiers to the set of module
429 /// files in which those identifiers are considered interesting.
430 InterestingIdentifierMap InterestingIdentifiers;
431
432 /// Write the block-info block for the global module index file.
433 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
434
435 /// Retrieve the module file information for the given file.
436 ModuleFileInfo &getModuleFileInfo(FileEntryRef File) {
437 auto [It, Inserted] = ModuleFiles.try_emplace(File);
438 if (Inserted) {
439 unsigned NewID = ModuleFiles.size();
440 ModuleFileInfo &Info = It->second;
441 Info.ID = NewID;
442 }
443 return It->second;
444 }
445
446 public:
447 explicit GlobalModuleIndexBuilder(
448 FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
449 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
450
451 /// Load the contents of the given module file into the builder.
452 llvm::Error loadModuleFile(FileEntryRef File);
453
454 /// Write the index to the given bitstream.
455 /// \returns true if an error occurred, false otherwise.
456 bool writeIndex(llvm::BitstreamWriter &Stream);
457 };
458}
459
460static void emitBlockID(unsigned ID, const char *Name,
461 llvm::BitstreamWriter &Stream,
463 Record.clear();
464 Record.push_back(ID);
465 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
466
467 // Emit the block name if present.
468 if (!Name || Name[0] == 0) return;
469 Record.clear();
470 while (*Name)
471 Record.push_back(*Name++);
472 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
473}
474
475static void emitRecordID(unsigned ID, const char *Name,
476 llvm::BitstreamWriter &Stream,
478 Record.clear();
479 Record.push_back(ID);
480 while (*Name)
481 Record.push_back(*Name++);
482 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
483}
484
485void
486GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
488 Stream.EnterBlockInfoBlock();
489
490#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
491#define RECORD(X) emitRecordID(X, #X, Stream, Record)
492 BLOCK(GLOBAL_INDEX_BLOCK);
493 RECORD(INDEX_METADATA);
494 RECORD(MODULE);
495 RECORD(IDENTIFIER_INDEX);
496#undef RECORD
497#undef BLOCK
498
499 Stream.ExitBlock();
500}
501
502namespace {
503 class InterestingASTIdentifierLookupTrait
505
506 public:
507 /// The identifier and whether it is "interesting".
508 typedef std::pair<StringRef, bool> data_type;
509
510 data_type ReadData(const internal_key_type& k,
511 const unsigned char* d,
512 unsigned DataLen) {
513 // The first bit indicates whether this identifier is interesting.
514 // That's all we care about.
515 using namespace llvm::support;
516 IdentifierID RawID =
517 endian::readNext<IdentifierID, llvm::endianness::little>(d);
518 bool IsInteresting = RawID & 0x01;
519 return std::make_pair(k, IsInteresting);
520 }
521 };
522}
523
524llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
525 // Open the module file.
526
527 auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
528 if (!Buffer)
529 return llvm::createStringError(Buffer.getError(),
530 "failed getting buffer for module file");
531
532 // Initialize the input stream
533 llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
534
535 // Sniff for the signature.
536 for (unsigned char C : {'C', 'P', 'C', 'H'})
537 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
538 if (Res.get() != C)
539 return llvm::createStringError(std::errc::illegal_byte_sequence,
540 "expected signature CPCH");
541 } else
542 return Res.takeError();
543
544 // Record this module file and assign it a unique ID (if it doesn't have
545 // one already).
546 unsigned ID = getModuleFileInfo(File).ID;
547
548 // Search for the blocks and records we care about.
549 enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
550 bool Done = false;
551 while (!Done) {
552 Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
553 if (!MaybeEntry)
554 return MaybeEntry.takeError();
555 llvm::BitstreamEntry Entry = MaybeEntry.get();
556
557 switch (Entry.Kind) {
558 case llvm::BitstreamEntry::Error:
559 Done = true;
560 continue;
561
562 case llvm::BitstreamEntry::Record:
563 // In the 'other' state, just skip the record. We don't care.
564 if (State == Other) {
565 if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
566 continue;
567 else
568 return Skipped.takeError();
569 }
570
571 // Handle potentially-interesting records below.
572 break;
573
574 case llvm::BitstreamEntry::SubBlock:
575 if (Entry.ID == CONTROL_BLOCK_ID) {
576 if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
577 return Err;
578
579 // Found the control block.
580 State = ControlBlock;
581 continue;
582 }
583
584 if (Entry.ID == AST_BLOCK_ID) {
585 if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
586 return Err;
587
588 // Found the AST block.
589 State = ASTBlock;
590 continue;
591 }
592
593 if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
594 if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
595 return Err;
596
597 // Found the Diagnostic Options block.
598 State = DiagnosticOptionsBlock;
599 continue;
600 }
601
602 if (llvm::Error Err = InStream.SkipBlock())
603 return Err;
604
605 continue;
606
607 case llvm::BitstreamEntry::EndBlock:
608 State = Other;
609 continue;
610 }
611
612 // Read the given record.
614 StringRef Blob;
615 Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
616 if (!MaybeCode)
617 return MaybeCode.takeError();
618 unsigned Code = MaybeCode.get();
619
620 // Handle module dependencies.
621 if (State == ControlBlock && Code == IMPORT) {
622 unsigned Idx = 0;
623 // Read information about the AST file.
624
625 // Skip the imported kind
626 ++Idx;
627
628 // Skip the import location
629 ++Idx;
630
631 // Skip the module name (currently this is only used for prebuilt
632 // modules while here we are only dealing with cached).
633 Blob = Blob.substr(Record[Idx++]);
634
635 // Skip if it is standard C++ module
636 ++Idx;
637
638 // Load stored size/modification time.
639 off_t StoredSize = (off_t)Record[Idx++];
640 time_t StoredModTime = (time_t)Record[Idx++];
641
642 // Skip the stored signature.
643 // FIXME: we could read the signature out of the import and validate it.
644 StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
645 auto StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
646 SignatureBytes.end());
647 Blob = Blob.substr(ASTFileSignature::size);
648
649 // Retrieve the imported file name.
650 unsigned Length = Record[Idx++];
651 StringRef ImportedFile = Blob.substr(0, Length);
652 Blob = Blob.substr(Length);
653
654 // Find the imported module file.
655 auto DependsOnFile =
656 FileMgr.getOptionalFileRef(ImportedFile, /*OpenFile=*/false,
657 /*CacheFailure=*/false);
658
659 if (!DependsOnFile)
660 return llvm::createStringError(std::errc::bad_file_descriptor,
661 "imported file \"%s\" not found",
662 std::string(ImportedFile).c_str());
663
664 // Save the information in ImportedModuleFileInfo so we can verify after
665 // loading all pcms.
666 ImportedModuleFiles.insert(std::make_pair(
667 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
668 StoredSignature)));
669
670 // Record the dependency.
671 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
672 getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
673
674 continue;
675 }
676
677 // Handle the identifier table
678 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
679 typedef llvm::OnDiskIterableChainedHashTable<
680 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
681 std::unique_ptr<InterestingIdentifierTable> Table(
682 InterestingIdentifierTable::Create(
683 (const unsigned char *)Blob.data() + Record[0],
684 (const unsigned char *)Blob.data() + sizeof(uint32_t),
685 (const unsigned char *)Blob.data()));
686 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
687 DEnd = Table->data_end();
688 D != DEnd; ++D) {
689 std::pair<StringRef, bool> Ident = *D;
690 if (Ident.second)
691 InterestingIdentifiers[Ident.first].push_back(ID);
692 else
693 (void)InterestingIdentifiers[Ident.first];
694 }
695 }
696
697 // Get Signature.
698 if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
699 auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
700 assert(Signature != ASTFileSignature::createDummy() &&
701 "Dummy AST file signature not backpatched in ASTWriter.");
702 getModuleFileInfo(File).Signature = Signature;
703 }
704
705 // We don't care about this record.
706 }
707
708 return llvm::Error::success();
709}
710
711namespace {
712
713/// Trait used to generate the identifier index as an on-disk hash
714/// table.
715class IdentifierIndexWriterTrait {
716public:
717 typedef StringRef key_type;
718 typedef StringRef key_type_ref;
719 typedef SmallVector<unsigned, 2> data_type;
720 typedef const SmallVector<unsigned, 2> &data_type_ref;
721 typedef unsigned hash_value_type;
722 typedef unsigned offset_type;
723
724 static hash_value_type ComputeHash(key_type_ref Key) {
725 return llvm::djbHash(Key);
726 }
727
728 std::pair<unsigned,unsigned>
729 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
730 using namespace llvm::support;
731 endian::Writer LE(Out, llvm::endianness::little);
732 unsigned KeyLen = Key.size();
733 unsigned DataLen = Data.size() * 4;
734 LE.write<uint16_t>(KeyLen);
735 LE.write<uint16_t>(DataLen);
736 return std::make_pair(KeyLen, DataLen);
737 }
738
739 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
740 Out.write(Key.data(), KeyLen);
741 }
742
743 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
744 unsigned DataLen) {
745 using namespace llvm::support;
746 for (unsigned I = 0, N = Data.size(); I != N; ++I)
747 endian::write<uint32_t>(Out, Data[I], llvm::endianness::little);
748 }
749};
750
751}
752
753bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
754 for (auto MapEntry : ImportedModuleFiles) {
755 auto File = MapEntry.first;
756 ImportedModuleFileInfo &Info = MapEntry.second;
757 if (getModuleFileInfo(File).Signature) {
758 if (getModuleFileInfo(File).Signature != Info.StoredSignature)
759 // Verify Signature.
760 return true;
761 } else if (Info.StoredSize != File.getSize() ||
762 Info.StoredModTime != File.getModificationTime())
763 // Verify Size and ModTime.
764 return true;
765 }
766
767 using namespace llvm;
768 llvm::TimeTraceScope TimeScope("Module WriteIndex");
769
770 // Emit the file header.
771 Stream.Emit((unsigned)'B', 8);
772 Stream.Emit((unsigned)'C', 8);
773 Stream.Emit((unsigned)'G', 8);
774 Stream.Emit((unsigned)'I', 8);
775
776 // Write the block-info block, which describes the records in this bitcode
777 // file.
778 emitBlockInfoBlock(Stream);
779
780 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
781
782 // Write the metadata.
784 Record.push_back(CurrentVersion);
785 Stream.EmitRecord(INDEX_METADATA, Record);
786
787 // Write the set of known module files.
788 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
789 MEnd = ModuleFiles.end();
790 M != MEnd; ++M) {
791 Record.clear();
792 Record.push_back(M->second.ID);
793 Record.push_back(M->first.getSize());
794 Record.push_back(M->first.getModificationTime());
795
796 // File name
797 StringRef Name(M->first.getName());
798 Record.push_back(Name.size());
799 Record.append(Name.begin(), Name.end());
800
801 // Dependencies
802 Record.push_back(M->second.Dependencies.size());
803 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
804 Stream.EmitRecord(MODULE, Record);
805 }
806
807 // Write the identifier -> module file mapping.
808 {
809 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
810 IdentifierIndexWriterTrait Trait;
811
812 // Populate the hash table.
813 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
814 IEnd = InterestingIdentifiers.end();
815 I != IEnd; ++I) {
816 Generator.insert(I->first(), I->second, Trait);
817 }
818
819 // Create the on-disk hash table in a buffer.
821 uint32_t BucketOffset;
822 {
823 using namespace llvm::support;
824 llvm::raw_svector_ostream Out(IdentifierTable);
825 // Make sure that no bucket is at offset 0
826 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
827 BucketOffset = Generator.Emit(Out, Trait);
828 }
829
830 // Create a blob abbreviation
831 auto Abbrev = std::make_shared<BitCodeAbbrev>();
832 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
833 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
835 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
836
837 // Write the identifier table
838 uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
839 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
840 }
841
842 Stream.ExitBlock();
843 return false;
844}
845
846llvm::Error
848 const PCHContainerReader &PCHContainerRdr,
849 StringRef Path) {
850 // This is a compiler-internal input/output, let's bypass the sandbox.
851 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
852
853 llvm::SmallString<128> IndexPath;
854 IndexPath += Path;
855 llvm::sys::path::append(IndexPath, IndexFileName);
856
857 // Coordinate building the global index file with other processes that might
858 // try to do the same.
859 llvm::LockFileManager Lock(IndexPath);
860 bool Owned;
861 if (llvm::Error Err = Lock.tryLock().moveInto(Owned)) {
862 llvm::consumeError(std::move(Err));
863 return llvm::createStringError(std::errc::io_error, "LFS error");
864 }
865 if (!Owned) {
866 // Someone else is responsible for building the index. We don't care
867 // when they finish, so we're done.
868 return llvm::createStringError(std::errc::device_or_resource_busy,
869 "someone else is building the index");
870 }
871
872 // We're responsible for building the index ourselves.
873
874 // The module index builder.
875 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
876
877 // Load each of the module files.
878 std::error_code EC;
879 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
880 D != DEnd && !EC;
881 D.increment(EC)) {
882 // If this isn't a module file, we don't care.
883 if (llvm::sys::path::extension(D->path()) != ".pcm") {
884 // ... unless it's a .pcm.lock file, which indicates that someone is
885 // in the process of rebuilding a module. They'll rebuild the index
886 // at the end of that translation unit, so we don't have to.
887 if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
888 return llvm::createStringError(std::errc::device_or_resource_busy,
889 "someone else is building the index");
890
891 continue;
892 }
893
894 // If we can't find the module file, skip it.
895 auto ModuleFile = FileMgr.getOptionalFileRef(D->path());
896 if (!ModuleFile)
897 continue;
898
899 // Load this module file.
900 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
901 return Err;
902 }
903
904 // The output buffer, into which the global index will be written.
905 SmallString<16> OutputBuffer;
906 {
907 llvm::BitstreamWriter OutputStream(OutputBuffer);
908 if (Builder.writeIndex(OutputStream))
909 return llvm::createStringError(std::errc::io_error,
910 "failed writing index");
911 }
912
913 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
914 OS << OutputBuffer;
915 return llvm::Error::success();
916 });
917}
918
919namespace {
920 class GlobalIndexIdentifierIterator : public IdentifierIterator {
921 /// The current position within the identifier lookup table.
922 IdentifierIndexTable::key_iterator Current;
923
924 /// The end position within the identifier lookup table.
925 IdentifierIndexTable::key_iterator End;
926
927 public:
928 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
929 Current = Idx.key_begin();
930 End = Idx.key_end();
931 }
932
933 StringRef Next() override {
934 if (Current == End)
935 return StringRef();
936
937 StringRef Result = *Current;
938 ++Current;
939 return Result;
940 }
941 };
942}
943
945 IdentifierIndexTable &Table =
946 *static_cast<IdentifierIndexTable *>(IdentifierIndex);
947 return new GlobalIndexIdentifierIterator(Table);
948}
#define RECORD(CLASS, BASE)
Defines the clang::FileManager interface and associated types.
FormatToken * Next
The next token in the unwrapped line.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
Definition MachO.h:31
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define IMPORT(DERIVED, BASE)
Definition Template.h:628
#define BLOCK(DERIVED, BASE)
Definition Template.h:644
__device__ __2f16 b
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Implements support for file system lookup, file system caching, and directory search management.
Definition FileManager.h:53
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
llvm::SmallPtrSet< ModuleFile *, 4 > HitSet
A set of module files in which we found a result.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
Definition Interp.h:1249
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
Definition ASTBitCodes.h:63
std::shared_ptr< MatchComputation< T > > Generator
Definition RewriteRule.h:65
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Definition Decl.h:1746
unsigned long uint64_t
unsigned int uint32_t
__LIBC_ATTRS FILE * stderr
The signature of a module, which is a hash of the AST content.
Definition Module.h:58
static constexpr size_t size
Definition Module.h:61
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
Definition Module.h:81
static ASTFileSignature createDummy()
Definition Module.h:91