19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/LockFileManager.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/OnDiskHashTable.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/raw_ostream.h"
35using namespace serialization;
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
47 enum IndexRecordTypes {
72class IdentifierIndexReaderTrait {
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
80 static bool EqualKey(
const internal_key_type& a,
const internal_key_type&
b) {
84 static hash_value_type
ComputeHash(
const internal_key_type& a) {
85 return llvm::djbHash(a);
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(
const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
92 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 return std::make_pair(KeyLen, DataLen);
96 static const internal_key_type&
97 GetInternalKey(
const external_key_type& x) {
return x; }
99 static const external_key_type&
100 GetExternalKey(
const internal_key_type& x) {
return x; }
102 static internal_key_type ReadKey(
const unsigned char* d,
unsigned n) {
103 return StringRef((
const char *)d, n);
106 static data_type ReadData(
const internal_key_type& k,
107 const unsigned char* d,
109 using namespace llvm::support;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
114 Result.push_back(ID);
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
127GlobalModuleIndex::GlobalModuleIndex(
128 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
129 llvm::BitstreamCursor Cursor)
130 : Buffer(
std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
131 NumIdentifierLookupHits() {
132 auto Fail = [&](llvm::Error &&Err) {
133 report_fatal_error(
"Module index '" + Buffer->getBufferIdentifier() +
134 "' failed: " +
toString(std::move(Err)));
137 llvm::TimeTraceScope TimeScope(
"Module LoadIndex");
139 bool InGlobalIndexBlock =
false;
142 llvm::BitstreamEntry Entry;
146 Fail(Res.takeError());
148 switch (Entry.Kind) {
149 case llvm::BitstreamEntry::Error:
152 case llvm::BitstreamEntry::EndBlock:
153 if (InGlobalIndexBlock) {
154 InGlobalIndexBlock =
false;
161 case llvm::BitstreamEntry::Record:
163 if (InGlobalIndexBlock)
168 case llvm::BitstreamEntry::SubBlock:
169 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
170 if (llvm::Error Err =
Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
171 Fail(std::move(Err));
172 InGlobalIndexBlock =
true;
173 }
else if (llvm::Error Err =
Cursor.SkipBlock())
174 Fail(std::move(Err));
182 if (!MaybeIndexRecord)
183 Fail(MaybeIndexRecord.takeError());
184 IndexRecordTypes IndexRecord =
185 static_cast<IndexRecordTypes
>(MaybeIndexRecord.get());
186 switch (IndexRecord) {
198 if (ID == Modules.size())
199 Modules.push_back(ModuleInfo());
201 Modules.resize(ID + 1);
206 Modules[
ID].ModTime =
Record[Idx++];
210 Modules[
ID].FileName.assign(
Record.begin() + Idx,
215 unsigned NumDeps =
Record[Idx++];
216 Modules[
ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218 Record.begin() + Idx + NumDeps);
222 assert(Idx ==
Record.size() &&
"More module info?");
227 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].
FileName);
229 ModuleName = ModuleName.rsplit(
'-').first;
230 UnresolvedModules[ModuleName] =
ID;
234 case IDENTIFIER_INDEX:
237 IdentifierIndex = IdentifierIndexTable::Create(
238 (
const unsigned char *)Blob.data() +
Record[0],
239 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
240 (
const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
248 delete static_cast<IdentifierIndexTable *
>(IdentifierIndex);
251std::pair<GlobalModuleIndex *, llvm::Error>
258 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
259 llvm::MemoryBuffer::getFile(IndexPath.c_str());
261 return std::make_pair(
nullptr,
262 llvm::errorCodeToError(BufferOrErr.getError()));
263 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
266 llvm::BitstreamCursor Cursor(*Buffer);
269 for (
unsigned char C : {
'B',
'C',
'G',
'I'}) {
272 return std::make_pair(
273 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
274 "expected signature BCGI"));
276 return std::make_pair(
nullptr, Res.takeError());
279 return std::make_pair(
new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
280 llvm::Error::success());
287 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
288 = ModulesByFile.find(
File);
289 if (Known == ModulesByFile.end())
293 Dependencies.clear();
295 for (
unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
297 Dependencies.push_back(MF);
305 if (!IdentifierIndex)
309 ++NumIdentifierLookups;
310 IdentifierIndexTable &Table
311 = *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
312 IdentifierIndexTable::iterator Known = Table.find(Name);
313 if (Known == Table.end()) {
318 for (
unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 ++NumIdentifierLookupHits;
329 StringRef Name =
File->ModuleName;
330 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
331 if (Known == UnresolvedModules.end()) {
336 ModuleInfo &Info = Modules[Known->second];
341 if (
File->File.getSize() == Info.Size &&
342 File->File.getModificationTime() == Info.ModTime) {
344 ModulesByFile[
File] = Known->second;
350 UnresolvedModules.erase(Known);
355 std::fprintf(stderr,
"*** Global Module Index Statistics:\n");
356 if (NumIdentifierLookups) {
357 fprintf(stderr,
" %u / %u identifier lookups succeeded (%f%%)\n",
358 NumIdentifierLookupHits, NumIdentifierLookups,
359 (
double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
361 std::fprintf(stderr,
"\n");
365 llvm::errs() <<
"*** Global Module Index Dump:\n";
366 llvm::errs() <<
"Module files:\n";
367 for (
auto &MI : Modules) {
368 llvm::errs() <<
"** " << MI.FileName <<
"\n";
372 llvm::errs() <<
"\n";
374 llvm::errs() <<
"\n";
383 struct ModuleFileInfo {
393 struct ImportedModuleFileInfo {
395 time_t StoredModTime;
398 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
402 class GlobalModuleIndexBuilder {
407 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
410 ModuleFilesMap ModuleFiles;
414 using ImportedModuleFilesMap =
415 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
418 ImportedModuleFilesMap ImportedModuleFiles;
422 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
426 InterestingIdentifierMap InterestingIdentifiers;
429 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
433 auto Known = ModuleFiles.find(
File);
434 if (Known != ModuleFiles.end())
435 return Known->second;
437 unsigned NewID = ModuleFiles.size();
444 explicit GlobalModuleIndexBuilder(
446 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
453 bool writeIndex(llvm::BitstreamWriter &Stream);
458 llvm::BitstreamWriter &Stream,
462 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID,
Record);
465 if (!Name || Name[0] == 0)
return;
468 Record.push_back(*Name++);
469 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
Record);
473 llvm::BitstreamWriter &Stream,
478 Record.push_back(*Name++);
479 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME,
Record);
483GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
485 Stream.EnterBlockInfoBlock();
487#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
488#define RECORD(X) emitRecordID(X, #X, Stream, Record)
489 BLOCK(GLOBAL_INDEX_BLOCK);
500 class InterestingASTIdentifierLookupTrait
505 typedef std::pair<StringRef, bool> data_type;
507 data_type ReadData(
const internal_key_type& k,
508 const unsigned char* d,
512 using namespace llvm::support;
514 endian::readNext<IdentifierID, llvm::endianness::little>(d);
515 bool IsInteresting = RawID & 0x01;
516 return std::make_pair(k, IsInteresting);
526 return llvm::createStringError(Buffer.getError(),
527 "failed getting buffer for module file");
530 llvm::BitstreamCursor InStream(PCHContainerRdr.
ExtractPCH(**Buffer));
533 for (
unsigned char C : {
'C',
'P',
'C',
'H'})
536 return llvm::createStringError(std::errc::illegal_byte_sequence,
537 "expected signature CPCH");
539 return Res.takeError();
543 unsigned ID = getModuleFileInfo(
File).ID;
546 enum {
Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State =
Other;
551 return MaybeEntry.takeError();
552 llvm::BitstreamEntry Entry = MaybeEntry.get();
554 switch (Entry.Kind) {
555 case llvm::BitstreamEntry::Error:
559 case llvm::BitstreamEntry::Record:
561 if (State ==
Other) {
565 return Skipped.takeError();
571 case llvm::BitstreamEntry::SubBlock:
577 State = ControlBlock;
582 if (llvm::Error Err = InStream.EnterSubBlock(
AST_BLOCK_ID))
595 State = DiagnosticOptionsBlock;
599 if (llvm::Error Err = InStream.SkipBlock())
604 case llvm::BitstreamEntry::EndBlock:
614 return MaybeCode.takeError();
615 unsigned Code = MaybeCode.get();
618 if (State == ControlBlock && Code ==
IMPORTS) {
620 unsigned Idx = 0, N =
Record.size();
634 off_t StoredSize = (off_t)
Record[Idx++];
635 time_t StoredModTime = (time_t)
Record[Idx++];
639 auto FirstSignatureByte =
Record.begin() + Idx;
649 unsigned Length =
Record[Idx++];
651 Record.begin() + Idx + Length);
660 return llvm::createStringError(std::errc::bad_file_descriptor,
661 "imported file \"%s\" not found",
662 ImportedFile.c_str());
666 ImportedModuleFiles.insert(std::make_pair(
667 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
671 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
672 getModuleFileInfo(
File).Dependencies.push_back(DependsOnID);
680 typedef llvm::OnDiskIterableChainedHashTable<
681 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
682 std::unique_ptr<InterestingIdentifierTable> Table(
683 InterestingIdentifierTable::Create(
684 (
const unsigned char *)Blob.data() +
Record[0],
685 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
686 (
const unsigned char *)Blob.data()));
687 for (InterestingIdentifierTable::data_iterator
D = Table->data_begin(),
688 DEnd = Table->data_end();
690 std::pair<StringRef, bool> Ident = *
D;
692 InterestingIdentifiers[Ident.first].push_back(ID);
694 (
void)InterestingIdentifiers[Ident.first];
699 if (State == DiagnosticOptionsBlock && Code ==
SIGNATURE) {
702 "Dummy AST file signature not backpatched in ASTWriter.");
703 getModuleFileInfo(
File).Signature = Signature;
709 return llvm::Error::success();
716class IdentifierIndexWriterTrait {
718 typedef StringRef key_type;
719 typedef StringRef key_type_ref;
722 typedef unsigned hash_value_type;
723 typedef unsigned offset_type;
725 static hash_value_type
ComputeHash(key_type_ref Key) {
726 return llvm::djbHash(Key);
729 std::pair<unsigned,unsigned>
730 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref
Data) {
731 using namespace llvm::support;
732 endian::Writer
LE(Out, llvm::endianness::little);
733 unsigned KeyLen = Key.size();
734 unsigned DataLen =
Data.size() * 4;
735 LE.write<uint16_t>(KeyLen);
736 LE.write<uint16_t>(DataLen);
737 return std::make_pair(KeyLen, DataLen);
740 void EmitKey(raw_ostream& Out, key_type_ref Key,
unsigned KeyLen) {
741 Out.write(Key.data(), KeyLen);
744 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref
Data,
746 using namespace llvm::support;
747 for (
unsigned I = 0, N =
Data.size(); I != N; ++I)
748 endian::write<uint32_t>(Out,
Data[I], llvm::endianness::little);
754bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
755 for (
auto MapEntry : ImportedModuleFiles) {
756 auto File = MapEntry.first;
757 ImportedModuleFileInfo &Info = MapEntry.second;
758 if (getModuleFileInfo(
File).Signature) {
759 if (getModuleFileInfo(
File).Signature != Info.StoredSignature)
762 }
else if (Info.StoredSize !=
File.getSize() ||
763 Info.StoredModTime !=
File.getModificationTime())
768 using namespace llvm;
769 llvm::TimeTraceScope TimeScope(
"Module WriteIndex");
772 Stream.Emit((
unsigned)
'B', 8);
773 Stream.Emit((
unsigned)
'C', 8);
774 Stream.Emit((
unsigned)
'G', 8);
775 Stream.Emit((
unsigned)
'I', 8);
779 emitBlockInfoBlock(Stream);
781 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
786 Stream.EmitRecord(INDEX_METADATA,
Record);
789 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
790 MEnd = ModuleFiles.end();
793 Record.push_back(M->second.ID);
794 Record.push_back(M->first.getSize());
795 Record.push_back(M->first.getModificationTime());
798 StringRef Name(M->first.getName());
799 Record.push_back(Name.size());
800 Record.append(Name.begin(), Name.end());
803 Record.push_back(M->second.Dependencies.size());
804 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
805 Stream.EmitRecord(MODULE,
Record);
810 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait>
Generator;
811 IdentifierIndexWriterTrait Trait;
814 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
815 IEnd = InterestingIdentifiers.end();
817 Generator.insert(I->first(), I->second, Trait);
824 using namespace llvm::support;
827 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
828 BucketOffset =
Generator.Emit(Out, Trait);
832 auto Abbrev = std::make_shared<BitCodeAbbrev>();
833 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
835 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
836 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
857 llvm::LockFileManager Locked(IndexPath);
859 case llvm::LockFileManager::LFS_Error:
860 return llvm::createStringError(std::errc::io_error,
"LFS error");
862 case llvm::LockFileManager::LFS_Owned:
866 case llvm::LockFileManager::LFS_Shared:
869 return llvm::createStringError(std::errc::device_or_resource_busy,
870 "someone else is building the index");
874 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
878 for (llvm::sys::fs::directory_iterator
D(
Path, EC), DEnd;
882 if (llvm::sys::path::extension(
D->path()) !=
".pcm") {
886 if (llvm::sys::path::extension(
D->path()) ==
".pcm.lock")
887 return llvm::createStringError(std::errc::device_or_resource_busy,
888 "someone else is building the index");
899 if (llvm::Error Err = Builder.loadModuleFile(*
ModuleFile))
906 llvm::BitstreamWriter OutputStream(OutputBuffer);
907 if (Builder.writeIndex(OutputStream))
908 return llvm::createStringError(std::errc::io_error,
909 "failed writing index");
912 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
914 return llvm::Error::success();
921 IdentifierIndexTable::key_iterator Current;
924 IdentifierIndexTable::key_iterator End;
927 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
928 Current = Idx.key_begin();
932 StringRef Next()
override {
936 StringRef Result = *Current;
944 IdentifierIndexTable &Table =
945 *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
946 return new GlobalIndexIdentifierIterator(Table);
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define BLOCK(DERIVED, BASE)
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Implements support for file system lookup, file system caching, and directory search management.
OptionalFileEntryRef getOptionalFileRef(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Get a FileEntryRef if it exists, without doing anything on error.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true, std::optional< int64_t > MaybeLimit=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A global index for a set of module files, providing information about the identifiers within those mo...
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Information about a module that has been loaded by the ASTReader.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ IMPORTS
Record code for the list of other AST files imported by this AST file.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Diagnostic wrappers for TextAPI types for error reporting.
The signature of a module, which is a hash of the AST content.
static constexpr size_t size
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
static ASTFileSignature createDummy()