19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/LockFileManager.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/OnDiskHashTable.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/raw_ostream.h"
35using namespace serialization;
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
47 enum IndexRecordTypes {
72class IdentifierIndexReaderTrait {
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
80 static bool EqualKey(
const internal_key_type& a,
const internal_key_type&
b) {
84 static hash_value_type
ComputeHash(
const internal_key_type& a) {
85 return llvm::djbHash(a);
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(
const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
92 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 return std::make_pair(KeyLen, DataLen);
96 static const internal_key_type&
97 GetInternalKey(
const external_key_type& x) {
return x; }
99 static const external_key_type&
100 GetExternalKey(
const internal_key_type& x) {
return x; }
102 static internal_key_type ReadKey(
const unsigned char* d,
unsigned n) {
103 return StringRef((
const char *)d, n);
106 static data_type ReadData(
const internal_key_type& k,
107 const unsigned char* d,
109 using namespace llvm::support;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
114 Result.push_back(ID);
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
127GlobalModuleIndex::GlobalModuleIndex(
128 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
129 llvm::BitstreamCursor Cursor)
130 : Buffer(
std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
131 NumIdentifierLookupHits() {
132 auto Fail = [&](llvm::Error &&Err) {
133 report_fatal_error(
"Module index '" + Buffer->getBufferIdentifier() +
134 "' failed: " +
toString(std::move(Err)));
137 llvm::TimeTraceScope TimeScope(
"Module LoadIndex");
139 bool InGlobalIndexBlock =
false;
142 llvm::BitstreamEntry Entry;
146 Fail(Res.takeError());
148 switch (Entry.Kind) {
149 case llvm::BitstreamEntry::Error:
152 case llvm::BitstreamEntry::EndBlock:
153 if (InGlobalIndexBlock) {
154 InGlobalIndexBlock =
false;
161 case llvm::BitstreamEntry::Record:
163 if (InGlobalIndexBlock)
168 case llvm::BitstreamEntry::SubBlock:
169 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
170 if (llvm::Error Err =
Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
171 Fail(std::move(Err));
172 InGlobalIndexBlock =
true;
173 }
else if (llvm::Error Err =
Cursor.SkipBlock())
174 Fail(std::move(Err));
182 if (!MaybeIndexRecord)
183 Fail(MaybeIndexRecord.takeError());
184 IndexRecordTypes IndexRecord =
185 static_cast<IndexRecordTypes
>(MaybeIndexRecord.get());
186 switch (IndexRecord) {
198 if (ID == Modules.size())
199 Modules.push_back(ModuleInfo());
201 Modules.resize(ID + 1);
206 Modules[
ID].ModTime =
Record[Idx++];
210 Modules[
ID].FileName.assign(
Record.begin() + Idx,
215 unsigned NumDeps =
Record[Idx++];
216 Modules[
ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218 Record.begin() + Idx + NumDeps);
222 assert(Idx ==
Record.size() &&
"More module info?");
227 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].
FileName);
229 ModuleName = ModuleName.rsplit(
'-').first;
230 UnresolvedModules[ModuleName] =
ID;
234 case IDENTIFIER_INDEX:
237 IdentifierIndex = IdentifierIndexTable::Create(
238 (
const unsigned char *)Blob.data() +
Record[0],
239 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
240 (
const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
248 delete static_cast<IdentifierIndexTable *
>(IdentifierIndex);
251std::pair<GlobalModuleIndex *, llvm::Error>
258 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
259 llvm::MemoryBuffer::getFile(IndexPath.c_str());
261 return std::make_pair(
nullptr,
262 llvm::errorCodeToError(BufferOrErr.getError()));
263 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
266 llvm::BitstreamCursor Cursor(*Buffer);
269 for (
unsigned char C : {
'B',
'C',
'G',
'I'}) {
272 return std::make_pair(
273 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
274 "expected signature BCGI"));
276 return std::make_pair(
nullptr, Res.takeError());
279 return std::make_pair(
new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
280 llvm::Error::success());
287 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
288 = ModulesByFile.find(
File);
289 if (Known == ModulesByFile.end())
293 Dependencies.clear();
295 for (
unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
297 Dependencies.push_back(MF);
305 if (!IdentifierIndex)
309 ++NumIdentifierLookups;
310 IdentifierIndexTable &Table
311 = *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
312 IdentifierIndexTable::iterator Known = Table.find(Name);
313 if (Known == Table.end()) {
318 for (
unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 ++NumIdentifierLookupHits;
329 StringRef Name =
File->ModuleName;
330 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
331 if (Known == UnresolvedModules.end()) {
336 ModuleInfo &Info = Modules[Known->second];
341 if (
File->File.getSize() == Info.Size &&
342 File->File.getModificationTime() == Info.ModTime) {
344 ModulesByFile[
File] = Known->second;
350 UnresolvedModules.erase(Known);
355 std::fprintf(stderr,
"*** Global Module Index Statistics:\n");
356 if (NumIdentifierLookups) {
357 fprintf(stderr,
" %u / %u identifier lookups succeeded (%f%%)\n",
358 NumIdentifierLookupHits, NumIdentifierLookups,
359 (
double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
361 std::fprintf(stderr,
"\n");
365 llvm::errs() <<
"*** Global Module Index Dump:\n";
366 llvm::errs() <<
"Module files:\n";
367 for (
auto &MI : Modules) {
368 llvm::errs() <<
"** " << MI.FileName <<
"\n";
372 llvm::errs() <<
"\n";
374 llvm::errs() <<
"\n";
383 struct ModuleFileInfo {
393 struct ImportedModuleFileInfo {
395 time_t StoredModTime;
398 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
402 class GlobalModuleIndexBuilder {
407 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
410 ModuleFilesMap ModuleFiles;
414 using ImportedModuleFilesMap =
415 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
418 ImportedModuleFilesMap ImportedModuleFiles;
422 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
426 InterestingIdentifierMap InterestingIdentifiers;
429 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
433 auto Known = ModuleFiles.find(
File);
434 if (Known != ModuleFiles.end())
435 return Known->second;
437 unsigned NewID = ModuleFiles.size();
444 explicit GlobalModuleIndexBuilder(
446 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
453 bool writeIndex(llvm::BitstreamWriter &Stream);
458 llvm::BitstreamWriter &Stream,
462 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID,
Record);
465 if (!Name || Name[0] == 0)
return;
468 Record.push_back(*Name++);
469 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
Record);
473 llvm::BitstreamWriter &Stream,
478 Record.push_back(*Name++);
479 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME,
Record);
483GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
485 Stream.EnterBlockInfoBlock();
487#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
488#define RECORD(X) emitRecordID(X, #X, Stream, Record)
489 BLOCK(GLOBAL_INDEX_BLOCK);
500 class InterestingASTIdentifierLookupTrait
505 typedef std::pair<StringRef, bool> data_type;
507 data_type ReadData(
const internal_key_type& k,
508 const unsigned char* d,
512 using namespace llvm::support;
513 unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d);
514 bool IsInteresting = RawID & 0x01;
515 return std::make_pair(k, IsInteresting);
525 return llvm::createStringError(Buffer.getError(),
526 "failed getting buffer for module file");
529 llvm::BitstreamCursor InStream(PCHContainerRdr.
ExtractPCH(**Buffer));
532 for (
unsigned char C : {
'C',
'P',
'C',
'H'})
535 return llvm::createStringError(std::errc::illegal_byte_sequence,
536 "expected signature CPCH");
538 return Res.takeError();
542 unsigned ID = getModuleFileInfo(
File).ID;
545 enum {
Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State =
Other;
550 return MaybeEntry.takeError();
551 llvm::BitstreamEntry Entry = MaybeEntry.get();
553 switch (Entry.Kind) {
554 case llvm::BitstreamEntry::Error:
558 case llvm::BitstreamEntry::Record:
560 if (State ==
Other) {
564 return Skipped.takeError();
570 case llvm::BitstreamEntry::SubBlock:
576 State = ControlBlock;
581 if (llvm::Error Err = InStream.EnterSubBlock(
AST_BLOCK_ID))
594 State = DiagnosticOptionsBlock;
598 if (llvm::Error Err = InStream.SkipBlock())
603 case llvm::BitstreamEntry::EndBlock:
613 return MaybeCode.takeError();
614 unsigned Code = MaybeCode.get();
617 if (State == ControlBlock && Code ==
IMPORTS) {
619 unsigned Idx = 0, N =
Record.size();
633 off_t StoredSize = (off_t)
Record[Idx++];
634 time_t StoredModTime = (time_t)
Record[Idx++];
638 auto FirstSignatureByte =
Record.begin() + Idx;
648 unsigned Length =
Record[Idx++];
650 Record.begin() + Idx + Length);
659 return llvm::createStringError(std::errc::bad_file_descriptor,
660 "imported file \"%s\" not found",
661 ImportedFile.c_str());
665 ImportedModuleFiles.insert(std::make_pair(
666 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
670 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
671 getModuleFileInfo(
File).Dependencies.push_back(DependsOnID);
679 typedef llvm::OnDiskIterableChainedHashTable<
680 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
681 std::unique_ptr<InterestingIdentifierTable> Table(
682 InterestingIdentifierTable::Create(
683 (
const unsigned char *)Blob.data() +
Record[0],
684 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
685 (
const unsigned char *)Blob.data()));
686 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
687 DEnd = Table->data_end();
689 std::pair<StringRef, bool> Ident = *D;
691 InterestingIdentifiers[Ident.first].push_back(ID);
693 (
void)InterestingIdentifiers[Ident.first];
698 if (State == DiagnosticOptionsBlock && Code ==
SIGNATURE) {
701 "Dummy AST file signature not backpatched in ASTWriter.");
702 getModuleFileInfo(
File).Signature = Signature;
708 return llvm::Error::success();
715class IdentifierIndexWriterTrait {
717 typedef StringRef key_type;
718 typedef StringRef key_type_ref;
721 typedef unsigned hash_value_type;
722 typedef unsigned offset_type;
724 static hash_value_type
ComputeHash(key_type_ref Key) {
725 return llvm::djbHash(Key);
728 std::pair<unsigned,unsigned>
729 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref
Data) {
730 using namespace llvm::support;
731 endian::Writer
LE(Out, llvm::endianness::little);
732 unsigned KeyLen = Key.size();
733 unsigned DataLen =
Data.size() * 4;
734 LE.write<uint16_t>(KeyLen);
735 LE.write<uint16_t>(DataLen);
736 return std::make_pair(KeyLen, DataLen);
739 void EmitKey(raw_ostream& Out, key_type_ref Key,
unsigned KeyLen) {
740 Out.write(Key.data(), KeyLen);
743 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref
Data,
745 using namespace llvm::support;
746 for (
unsigned I = 0, N =
Data.size(); I != N; ++I)
747 endian::write<uint32_t>(Out,
Data[I], llvm::endianness::little);
753bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
754 for (
auto MapEntry : ImportedModuleFiles) {
755 auto File = MapEntry.first;
756 ImportedModuleFileInfo &Info = MapEntry.second;
757 if (getModuleFileInfo(
File).Signature) {
758 if (getModuleFileInfo(
File).Signature != Info.StoredSignature)
761 }
else if (Info.StoredSize !=
File.getSize() ||
762 Info.StoredModTime !=
File.getModificationTime())
767 using namespace llvm;
768 llvm::TimeTraceScope TimeScope(
"Module WriteIndex");
771 Stream.Emit((
unsigned)
'B', 8);
772 Stream.Emit((
unsigned)
'C', 8);
773 Stream.Emit((
unsigned)
'G', 8);
774 Stream.Emit((
unsigned)
'I', 8);
778 emitBlockInfoBlock(Stream);
780 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
785 Stream.EmitRecord(INDEX_METADATA,
Record);
788 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
789 MEnd = ModuleFiles.end();
792 Record.push_back(M->second.ID);
793 Record.push_back(M->first.getSize());
794 Record.push_back(M->first.getModificationTime());
797 StringRef Name(M->first.getName());
798 Record.push_back(Name.size());
799 Record.append(Name.begin(), Name.end());
802 Record.push_back(M->second.Dependencies.size());
803 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
804 Stream.EmitRecord(MODULE,
Record);
809 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait>
Generator;
810 IdentifierIndexWriterTrait Trait;
813 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
814 IEnd = InterestingIdentifiers.end();
816 Generator.insert(I->first(), I->second, Trait);
821 uint32_t BucketOffset;
823 using namespace llvm::support;
826 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
827 BucketOffset =
Generator.Emit(Out, Trait);
831 auto Abbrev = std::make_shared<BitCodeAbbrev>();
832 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
833 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
835 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
856 llvm::LockFileManager Locked(IndexPath);
858 case llvm::LockFileManager::LFS_Error:
859 return llvm::createStringError(std::errc::io_error,
"LFS error");
861 case llvm::LockFileManager::LFS_Owned:
865 case llvm::LockFileManager::LFS_Shared:
868 return llvm::createStringError(std::errc::device_or_resource_busy,
869 "someone else is building the index");
873 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
877 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
881 if (llvm::sys::path::extension(D->path()) !=
".pcm") {
885 if (llvm::sys::path::extension(D->path()) ==
".pcm.lock")
886 return llvm::createStringError(std::errc::device_or_resource_busy,
887 "someone else is building the index");
898 if (llvm::Error Err = Builder.loadModuleFile(*
ModuleFile))
905 llvm::BitstreamWriter OutputStream(OutputBuffer);
906 if (Builder.writeIndex(OutputStream))
907 return llvm::createStringError(std::errc::io_error,
908 "failed writing index");
911 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
913 return llvm::Error::success();
920 IdentifierIndexTable::key_iterator Current;
923 IdentifierIndexTable::key_iterator End;
926 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
927 Current = Idx.key_begin();
931 StringRef Next()
override {
935 StringRef Result = *Current;
943 IdentifierIndexTable &Table =
944 *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
945 return new GlobalIndexIdentifierIterator(Table);
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define BLOCK(DERIVED, BASE)
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Implements support for file system lookup, file system caching, and directory search management.
OptionalFileEntryRef getOptionalFileRef(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Get a FileEntryRef if it exists, without doing anything on error.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A global index for a set of module files, providing information about the identifiers within those mo...
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Information about a module that has been loaded by the ASTReader.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ IMPORTS
Record code for the list of other AST files imported by this AST file.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Diagnostic wrappers for TextAPI types for error reporting.
The signature of a module, which is a hash of the AST content.
static constexpr size_t size
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
static ASTFileSignature createDummy()