19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/IOSandbox.h"
28#include "llvm/Support/LockFileManager.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/OnDiskHashTable.h"
31#include "llvm/Support/Path.h"
32#include "llvm/Support/TimeProfiler.h"
33#include "llvm/Support/raw_ostream.h"
44 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
48 enum IndexRecordTypes {
73class IdentifierIndexReaderTrait {
75 typedef StringRef external_key_type;
76 typedef StringRef internal_key_type;
78 typedef unsigned hash_value_type;
79 typedef unsigned offset_type;
81 static bool EqualKey(
const internal_key_type& a,
const internal_key_type&
b) {
85 static hash_value_type
ComputeHash(
const internal_key_type& a) {
86 return llvm::djbHash(a);
89 static std::pair<unsigned, unsigned>
90 ReadKeyDataLength(
const unsigned char*& d) {
91 using namespace llvm::support;
92 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
94 return std::make_pair(KeyLen, DataLen);
97 static const internal_key_type&
98 GetInternalKey(
const external_key_type& x) {
return x; }
100 static const external_key_type&
101 GetExternalKey(
const internal_key_type& x) {
return x; }
103 static internal_key_type ReadKey(
const unsigned char* d,
unsigned n) {
104 return StringRef((
const char *)d, n);
107 static data_type ReadData(
const internal_key_type& k,
108 const unsigned char* d,
110 using namespace llvm::support;
113 while (DataLen > 0) {
114 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
115 Result.push_back(ID);
123typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
124 IdentifierIndexTable;
128GlobalModuleIndex::GlobalModuleIndex(
129 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
130 llvm::BitstreamCursor Cursor)
131 : Buffer(
std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
132 NumIdentifierLookupHits() {
133 auto Fail = [&](llvm::Error &&Err) {
134 report_fatal_error(
"Module index '" + Buffer->getBufferIdentifier() +
135 "' failed: " +
toString(std::move(Err)));
138 llvm::TimeTraceScope TimeScope(
"Module LoadIndex");
140 bool InGlobalIndexBlock =
false;
143 llvm::BitstreamEntry Entry;
147 Fail(Res.takeError());
149 switch (Entry.Kind) {
150 case llvm::BitstreamEntry::Error:
153 case llvm::BitstreamEntry::EndBlock:
154 if (InGlobalIndexBlock) {
155 InGlobalIndexBlock =
false;
162 case llvm::BitstreamEntry::Record:
164 if (InGlobalIndexBlock)
169 case llvm::BitstreamEntry::SubBlock:
170 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
171 if (llvm::Error Err =
Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
172 Fail(std::move(Err));
173 InGlobalIndexBlock =
true;
174 }
else if (llvm::Error Err =
Cursor.SkipBlock())
175 Fail(std::move(Err));
183 if (!MaybeIndexRecord)
184 Fail(MaybeIndexRecord.takeError());
185 IndexRecordTypes IndexRecord =
186 static_cast<IndexRecordTypes
>(MaybeIndexRecord.get());
187 switch (IndexRecord) {
196 unsigned ID =
Record[Idx++];
199 if (ID == Modules.size())
200 Modules.push_back(ModuleInfo());
202 Modules.resize(ID + 1);
206 Modules[ID].Size =
Record[Idx++];
207 Modules[ID].ModTime =
Record[Idx++];
210 unsigned NameLen =
Record[Idx++];
211 Modules[ID].FileName.assign(
Record.begin() + Idx,
212 Record.begin() + Idx + NameLen);
216 unsigned NumDeps =
Record[Idx++];
217 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
219 Record.begin() + Idx + NumDeps);
223 assert(Idx ==
Record.size() &&
"More module info?");
228 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].
FileName);
230 ModuleName = ModuleName.rsplit(
'-').first;
231 UnresolvedModules[ModuleName] = ID;
235 case IDENTIFIER_INDEX:
238 IdentifierIndex = IdentifierIndexTable::Create(
239 (
const unsigned char *)Blob.data() +
Record[0],
240 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
241 (
const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
249 delete static_cast<IdentifierIndexTable *
>(IdentifierIndex);
252std::pair<GlobalModuleIndex *, llvm::Error>
255 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
262 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
263 llvm::MemoryBuffer::getFile(IndexPath.c_str());
265 return std::make_pair(
nullptr,
266 llvm::errorCodeToError(BufferOrErr.getError()));
267 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
270 llvm::BitstreamCursor Cursor(*Buffer);
273 for (
unsigned char C : {
'B',
'C',
'G',
'I'}) {
276 return std::make_pair(
277 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
278 "expected signature BCGI"));
280 return std::make_pair(
nullptr, Res.takeError());
283 return std::make_pair(
new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
284 llvm::Error::success());
291 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
292 = ModulesByFile.find(
File);
293 if (Known == ModulesByFile.end())
297 Dependencies.clear();
299 for (
unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
300 if (ModuleFile *MF = Modules[I].
File)
301 Dependencies.push_back(MF);
309 if (!IdentifierIndex)
313 ++NumIdentifierLookups;
314 IdentifierIndexTable &Table
315 = *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
316 IdentifierIndexTable::iterator Known = Table.find(Name);
317 if (Known == Table.end()) {
322 for (
unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 if (ModuleFile *MF = Modules[ModuleIDs[I]].
File)
327 ++NumIdentifierLookupHits;
333 StringRef Name =
File->ModuleName;
334 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
335 if (Known == UnresolvedModules.end()) {
340 ModuleInfo &Info = Modules[Known->second];
345 if (
File->File.getSize() == Info.Size &&
346 File->File.getModificationTime() == Info.ModTime) {
348 ModulesByFile[
File] = Known->second;
354 UnresolvedModules.erase(Known);
359 std::fprintf(
stderr,
"*** Global Module Index Statistics:\n");
360 if (NumIdentifierLookups) {
361 fprintf(
stderr,
" %u / %u identifier lookups succeeded (%f%%)\n",
362 NumIdentifierLookupHits, NumIdentifierLookups,
363 (
double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
365 std::fprintf(
stderr,
"\n");
369 llvm::errs() <<
"*** Global Module Index Dump:\n";
370 llvm::errs() <<
"Module files:\n";
371 for (
auto &MI : Modules) {
372 llvm::errs() <<
"** " << MI.FileName <<
"\n";
376 llvm::errs() <<
"\n";
378 llvm::errs() <<
"\n";
387 struct ModuleFileInfo {
397 struct ImportedModuleFileInfo {
399 time_t StoredModTime;
402 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
406 class GlobalModuleIndexBuilder {
407 FileManager &FileMgr;
408 const PCHContainerReader &PCHContainerRdr;
411 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
414 ModuleFilesMap ModuleFiles;
418 using ImportedModuleFilesMap =
419 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
422 ImportedModuleFilesMap ImportedModuleFiles;
426 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
430 InterestingIdentifierMap InterestingIdentifiers;
433 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
437 auto [It, Inserted] = ModuleFiles.try_emplace(
File);
439 unsigned NewID = ModuleFiles.size();
447 explicit GlobalModuleIndexBuilder(
448 FileManager &FileMgr,
const PCHContainerReader &PCHContainerRdr)
449 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
452 llvm::Error loadModuleFile(FileEntryRef
File);
456 bool writeIndex(llvm::BitstreamWriter &Stream);
461 llvm::BitstreamWriter &Stream,
465 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID,
Record);
468 if (!Name || Name[0] == 0)
return;
471 Record.push_back(*Name++);
472 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
Record);
476 llvm::BitstreamWriter &Stream,
481 Record.push_back(*Name++);
482 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME,
Record);
486GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
488 Stream.EnterBlockInfoBlock();
490#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
491#define RECORD(X) emitRecordID(X, #X, Stream, Record)
492 BLOCK(GLOBAL_INDEX_BLOCK);
503 class InterestingASTIdentifierLookupTrait
508 typedef std::pair<StringRef, bool> data_type;
510 data_type ReadData(
const internal_key_type& k,
511 const unsigned char* d,
515 using namespace llvm::support;
517 endian::readNext<IdentifierID, llvm::endianness::little>(d);
518 bool IsInteresting = RawID & 0x01;
519 return std::make_pair(k, IsInteresting);
527 auto Buffer =
FileMgr.getBufferForFile(
File,
true);
529 return llvm::createStringError(Buffer.getError(),
530 "failed getting buffer for module file");
533 llvm::BitstreamCursor InStream(PCHContainerRdr.
ExtractPCH(**Buffer));
536 for (
unsigned char C : {
'C',
'P',
'C',
'H'})
539 return llvm::createStringError(std::errc::illegal_byte_sequence,
540 "expected signature CPCH");
542 return Res.takeError();
546 unsigned ID = getModuleFileInfo(
File).ID;
549 enum {
Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State =
Other;
554 return MaybeEntry.takeError();
555 llvm::BitstreamEntry Entry = MaybeEntry.get();
557 switch (Entry.Kind) {
558 case llvm::BitstreamEntry::Error:
562 case llvm::BitstreamEntry::Record:
564 if (State ==
Other) {
568 return Skipped.takeError();
574 case llvm::BitstreamEntry::SubBlock:
580 State = ControlBlock;
585 if (llvm::Error Err = InStream.EnterSubBlock(
AST_BLOCK_ID))
598 State = DiagnosticOptionsBlock;
602 if (llvm::Error Err = InStream.SkipBlock())
607 case llvm::BitstreamEntry::EndBlock:
617 return MaybeCode.takeError();
618 unsigned Code = MaybeCode.get();
621 if (State == ControlBlock && Code ==
IMPORT) {
633 Blob = Blob.substr(
Record[Idx++]);
639 off_t StoredSize = (off_t)
Record[Idx++];
640 time_t StoredModTime = (time_t)
Record[Idx++];
647 SignatureBytes.end());
651 unsigned Length =
Record[Idx++];
652 StringRef ImportedFile = Blob.substr(0, Length);
653 Blob = Blob.substr(Length);
657 FileMgr.getOptionalFileRef(ImportedFile,
false,
661 return llvm::createStringError(std::errc::bad_file_descriptor,
662 "imported file \"%s\" not found",
663 std::string(ImportedFile).c_str());
667 ImportedModuleFiles.insert(std::make_pair(
668 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
672 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
673 getModuleFileInfo(
File).Dependencies.push_back(DependsOnID);
680 typedef llvm::OnDiskIterableChainedHashTable<
681 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
682 std::unique_ptr<InterestingIdentifierTable> Table(
683 InterestingIdentifierTable::Create(
684 (
const unsigned char *)Blob.data() +
Record[0],
685 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
686 (
const unsigned char *)Blob.data()));
687 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
688 DEnd = Table->data_end();
690 std::pair<StringRef, bool> Ident = *D;
692 InterestingIdentifiers[Ident.first].push_back(ID);
694 (
void)InterestingIdentifiers[Ident.first];
699 if (State == DiagnosticOptionsBlock && Code ==
SIGNATURE) {
702 "Dummy AST file signature not backpatched in ASTWriter.");
703 getModuleFileInfo(
File).Signature = Signature;
709 return llvm::Error::success();
716class IdentifierIndexWriterTrait {
718 typedef StringRef key_type;
719 typedef StringRef key_type_ref;
720 typedef SmallVector<unsigned, 2> data_type;
721 typedef const SmallVector<unsigned, 2> &data_type_ref;
722 typedef unsigned hash_value_type;
723 typedef unsigned offset_type;
725 static hash_value_type
ComputeHash(key_type_ref Key) {
726 return llvm::djbHash(Key);
729 std::pair<unsigned,unsigned>
730 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref
Data) {
731 using namespace llvm::support;
732 endian::Writer
LE(Out, llvm::endianness::little);
733 unsigned KeyLen = Key.size();
734 unsigned DataLen =
Data.size() * 4;
735 LE.write<uint16_t>(KeyLen);
736 LE.write<uint16_t>(DataLen);
737 return std::make_pair(KeyLen, DataLen);
740 void EmitKey(raw_ostream& Out, key_type_ref Key,
unsigned KeyLen) {
741 Out.write(Key.data(), KeyLen);
744 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref
Data,
746 using namespace llvm::support;
747 for (
unsigned I = 0, N =
Data.size(); I != N; ++I)
748 endian::write<uint32_t>(Out,
Data[I], llvm::endianness::little);
754bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
755 for (
auto MapEntry : ImportedModuleFiles) {
756 auto File = MapEntry.first;
757 ImportedModuleFileInfo &Info = MapEntry.second;
758 if (getModuleFileInfo(
File).Signature) {
759 if (getModuleFileInfo(
File).Signature != Info.StoredSignature)
762 }
else if (Info.StoredSize !=
File.getSize() ||
763 Info.StoredModTime !=
File.getModificationTime())
768 using namespace llvm;
769 llvm::TimeTraceScope TimeScope(
"Module WriteIndex");
772 Stream.Emit((
unsigned)
'B', 8);
773 Stream.Emit((
unsigned)
'C', 8);
774 Stream.Emit((
unsigned)
'G', 8);
775 Stream.Emit((
unsigned)
'I', 8);
779 emitBlockInfoBlock(Stream);
781 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
786 Stream.EmitRecord(INDEX_METADATA,
Record);
789 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
790 MEnd = ModuleFiles.end();
793 Record.push_back(M->second.ID);
794 Record.push_back(M->first.getSize());
795 Record.push_back(M->first.getModificationTime());
798 StringRef Name(M->first.getName());
799 Record.push_back(Name.size());
800 Record.append(Name.begin(), Name.end());
803 Record.push_back(M->second.Dependencies.size());
804 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
805 Stream.EmitRecord(MODULE,
Record);
810 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait>
Generator;
811 IdentifierIndexWriterTrait Trait;
814 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
815 IEnd = InterestingIdentifiers.end();
817 Generator.insert(I->first(), I->second, Trait);
824 using namespace llvm::support;
827 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
828 BucketOffset =
Generator.Emit(Out, Trait);
832 auto Abbrev = std::make_shared<BitCodeAbbrev>();
833 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
835 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
836 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
852 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
860 llvm::LockFileManager Lock(IndexPath);
862 if (llvm::Error Err = Lock.tryLock().moveInto(Owned)) {
863 llvm::consumeError(std::move(Err));
864 return llvm::createStringError(std::errc::io_error,
"LFS error");
869 return llvm::createStringError(std::errc::device_or_resource_busy,
870 "someone else is building the index");
876 GlobalModuleIndexBuilder Builder(
FileMgr, PCHContainerRdr);
880 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
884 if (llvm::sys::path::extension(D->path()) !=
".pcm") {
888 if (llvm::sys::path::extension(D->path()) ==
".pcm.lock")
889 return llvm::createStringError(std::errc::device_or_resource_busy,
890 "someone else is building the index");
896 auto ModuleFile =
FileMgr.getOptionalFileRef(D->path());
901 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
908 llvm::BitstreamWriter OutputStream(OutputBuffer);
909 if (Builder.writeIndex(OutputStream))
910 return llvm::createStringError(std::errc::io_error,
911 "failed writing index");
914 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
916 return llvm::Error::success();
923 IdentifierIndexTable::key_iterator Current;
926 IdentifierIndexTable::key_iterator End;
929 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
930 Current = Idx.key_begin();
934 StringRef
Next()
override {
938 StringRef Result = *Current;
946 IdentifierIndexTable &Table =
947 *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
948 return new GlobalIndexIdentifierIterator(Table);
#define RECORD(CLASS, BASE)
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define IMPORT(DERIVED, BASE)
#define BLOCK(DERIVED, BASE)
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Implements support for file system lookup, file system caching, and directory search management.
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
llvm::SmallPtrSet< ModuleFile *, 4 > HitSet
A set of module files in which we found a result.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
__LIBC_ATTRS FILE * stderr
The signature of a module, which is a hash of the AST content.
static constexpr size_t size
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
static ASTFileSignature createDummy()