19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/IOSandbox.h"
28#include "llvm/Support/LockFileManager.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/OnDiskHashTable.h"
31#include "llvm/Support/Path.h"
32#include "llvm/Support/TimeProfiler.h"
33#include "llvm/Support/raw_ostream.h"
44 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
48 enum IndexRecordTypes {
73class IdentifierIndexReaderTrait {
75 typedef StringRef external_key_type;
76 typedef StringRef internal_key_type;
78 typedef unsigned hash_value_type;
79 typedef unsigned offset_type;
81 static bool EqualKey(
const internal_key_type& a,
const internal_key_type&
b) {
85 static hash_value_type
ComputeHash(
const internal_key_type& a) {
86 return llvm::djbHash(a);
89 static std::pair<unsigned, unsigned>
90 ReadKeyDataLength(
const unsigned char*& d) {
91 using namespace llvm::support;
92 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
94 return std::make_pair(KeyLen, DataLen);
97 static const internal_key_type&
98 GetInternalKey(
const external_key_type& x) {
return x; }
100 static const external_key_type&
101 GetExternalKey(
const internal_key_type& x) {
return x; }
103 static internal_key_type ReadKey(
const unsigned char* d,
unsigned n) {
104 return StringRef((
const char *)d, n);
107 static data_type ReadData(
const internal_key_type& k,
108 const unsigned char* d,
110 using namespace llvm::support;
113 while (DataLen > 0) {
114 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
115 Result.push_back(ID);
123typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
124 IdentifierIndexTable;
128GlobalModuleIndex::GlobalModuleIndex(
129 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
130 llvm::BitstreamCursor Cursor)
131 : Buffer(
std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
132 NumIdentifierLookupHits() {
133 auto Fail = [&](llvm::Error &&Err) {
134 report_fatal_error(
"Module index '" + Buffer->getBufferIdentifier() +
135 "' failed: " +
toString(std::move(Err)));
138 llvm::TimeTraceScope TimeScope(
"Module LoadIndex");
140 bool InGlobalIndexBlock =
false;
143 llvm::BitstreamEntry Entry;
147 Fail(Res.takeError());
149 switch (Entry.Kind) {
150 case llvm::BitstreamEntry::Error:
153 case llvm::BitstreamEntry::EndBlock:
154 if (InGlobalIndexBlock) {
155 InGlobalIndexBlock =
false;
162 case llvm::BitstreamEntry::Record:
164 if (InGlobalIndexBlock)
169 case llvm::BitstreamEntry::SubBlock:
170 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
171 if (llvm::Error Err =
Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
172 Fail(std::move(Err));
173 InGlobalIndexBlock =
true;
174 }
else if (llvm::Error Err =
Cursor.SkipBlock())
175 Fail(std::move(Err));
183 if (!MaybeIndexRecord)
184 Fail(MaybeIndexRecord.takeError());
185 IndexRecordTypes IndexRecord =
186 static_cast<IndexRecordTypes
>(MaybeIndexRecord.get());
187 switch (IndexRecord) {
196 unsigned ID =
Record[Idx++];
199 if (ID == Modules.size())
200 Modules.push_back(ModuleInfo());
202 Modules.resize(ID + 1);
206 Modules[ID].Size =
Record[Idx++];
207 Modules[ID].ModTime =
Record[Idx++];
210 unsigned NameLen =
Record[Idx++];
211 Modules[ID].FileName.assign(
Record.begin() + Idx,
212 Record.begin() + Idx + NameLen);
216 unsigned NumDeps =
Record[Idx++];
217 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
219 Record.begin() + Idx + NumDeps);
223 assert(Idx ==
Record.size() &&
"More module info?");
228 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].
FileName);
230 ModuleName = ModuleName.rsplit(
'-').first;
231 UnresolvedModules[ModuleName] = ID;
235 case IDENTIFIER_INDEX:
238 IdentifierIndex = IdentifierIndexTable::Create(
239 (
const unsigned char *)Blob.data() +
Record[0],
240 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
241 (
const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
249 delete static_cast<IdentifierIndexTable *
>(IdentifierIndex);
252std::pair<GlobalModuleIndex *, llvm::Error>
255 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
262 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
263 llvm::MemoryBuffer::getFile(IndexPath.c_str());
265 return std::make_pair(
nullptr,
266 llvm::errorCodeToError(BufferOrErr.getError()));
267 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
270 llvm::BitstreamCursor Cursor(*Buffer);
273 for (
unsigned char C : {
'B',
'C',
'G',
'I'}) {
276 return std::make_pair(
277 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
278 "expected signature BCGI"));
280 return std::make_pair(
nullptr, Res.takeError());
283 return std::make_pair(
new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
284 llvm::Error::success());
291 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
292 = ModulesByFile.find(
File);
293 if (Known == ModulesByFile.end())
297 Dependencies.clear();
299 for (
unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
300 if (ModuleFile *MF = Modules[I].
File)
301 Dependencies.push_back(MF);
309 if (!IdentifierIndex)
313 ++NumIdentifierLookups;
314 IdentifierIndexTable &Table
315 = *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
316 IdentifierIndexTable::iterator Known = Table.find(Name);
317 if (Known == Table.end()) {
322 for (
unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 if (ModuleFile *MF = Modules[ModuleIDs[I]].
File)
327 ++NumIdentifierLookupHits;
333 StringRef Name =
File->ModuleName;
334 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
335 if (Known == UnresolvedModules.end()) {
340 ModuleInfo &Info = Modules[Known->second];
345 if (
File->File.getSize() == Info.Size &&
346 File->File.getModificationTime() == Info.ModTime) {
348 ModulesByFile[
File] = Known->second;
354 UnresolvedModules.erase(Known);
359 std::fprintf(
stderr,
"*** Global Module Index Statistics:\n");
360 if (NumIdentifierLookups) {
361 fprintf(
stderr,
" %u / %u identifier lookups succeeded (%f%%)\n",
362 NumIdentifierLookupHits, NumIdentifierLookups,
363 (
double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
365 std::fprintf(
stderr,
"\n");
369 llvm::errs() <<
"*** Global Module Index Dump:\n";
370 llvm::errs() <<
"Module files:\n";
371 for (
auto &MI : Modules) {
372 llvm::errs() <<
"** " << MI.FileName <<
"\n";
376 llvm::errs() <<
"\n";
378 llvm::errs() <<
"\n";
387 struct ModuleFileInfo {
397 struct ImportedModuleFileInfo {
399 time_t StoredModTime;
402 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
406 class GlobalModuleIndexBuilder {
407 FileManager &FileMgr;
408 const PCHContainerReader &PCHContainerRdr;
411 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
414 ModuleFilesMap ModuleFiles;
418 using ImportedModuleFilesMap =
419 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
422 ImportedModuleFilesMap ImportedModuleFiles;
426 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
430 InterestingIdentifierMap InterestingIdentifiers;
433 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
437 auto [It, Inserted] = ModuleFiles.try_emplace(
File);
439 unsigned NewID = ModuleFiles.size();
447 explicit GlobalModuleIndexBuilder(
448 FileManager &FileMgr,
const PCHContainerReader &PCHContainerRdr)
449 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
452 llvm::Error loadModuleFile(FileEntryRef
File);
456 bool writeIndex(llvm::BitstreamWriter &Stream);
461 llvm::BitstreamWriter &Stream,
465 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID,
Record);
468 if (!Name || Name[0] == 0)
return;
471 Record.push_back(*Name++);
472 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
Record);
476 llvm::BitstreamWriter &Stream,
481 Record.push_back(*Name++);
482 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME,
Record);
486GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
488 Stream.EnterBlockInfoBlock();
490#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
491#define RECORD(X) emitRecordID(X, #X, Stream, Record)
492 BLOCK(GLOBAL_INDEX_BLOCK);
503 class InterestingASTIdentifierLookupTrait
508 typedef std::pair<StringRef, bool> data_type;
510 data_type ReadData(
const internal_key_type& k,
511 const unsigned char* d,
515 using namespace llvm::support;
517 endian::readNext<IdentifierID, llvm::endianness::little>(d);
518 bool IsInteresting = RawID & 0x01;
519 return std::make_pair(k, IsInteresting);
527 auto Buffer =
FileMgr.getBufferForFile(
File,
true);
529 return llvm::createStringError(Buffer.getError(),
530 "failed getting buffer for module file");
533 llvm::BitstreamCursor InStream(PCHContainerRdr.
ExtractPCH(**Buffer));
536 for (
unsigned char C : {
'C',
'P',
'C',
'H'})
539 return llvm::createStringError(std::errc::illegal_byte_sequence,
540 "expected signature CPCH");
542 return Res.takeError();
546 unsigned ID = getModuleFileInfo(
File).ID;
549 enum {
Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State =
Other;
554 return MaybeEntry.takeError();
555 llvm::BitstreamEntry Entry = MaybeEntry.get();
557 switch (Entry.Kind) {
558 case llvm::BitstreamEntry::Error:
562 case llvm::BitstreamEntry::Record:
564 if (State ==
Other) {
568 return Skipped.takeError();
574 case llvm::BitstreamEntry::SubBlock:
580 State = ControlBlock;
585 if (llvm::Error Err = InStream.EnterSubBlock(
AST_BLOCK_ID))
598 State = DiagnosticOptionsBlock;
602 if (llvm::Error Err = InStream.SkipBlock())
607 case llvm::BitstreamEntry::EndBlock:
617 return MaybeCode.takeError();
618 unsigned Code = MaybeCode.get();
621 if (State == ControlBlock && Code ==
IMPORT) {
633 Blob = Blob.substr(
Record[Idx++]);
639 off_t StoredSize = (off_t)
Record[Idx++];
640 time_t StoredModTime = (time_t)
Record[Idx++];
646 SignatureBytes.end());
650 unsigned Length =
Record[Idx++];
651 StringRef ImportedFile = Blob.substr(0, Length);
652 Blob = Blob.substr(Length);
656 FileMgr.getOptionalFileRef(ImportedFile,
false,
660 return llvm::createStringError(std::errc::bad_file_descriptor,
661 "imported file \"%s\" not found",
662 std::string(ImportedFile).c_str());
666 ImportedModuleFiles.insert(std::make_pair(
667 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
671 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
672 getModuleFileInfo(
File).Dependencies.push_back(DependsOnID);
679 typedef llvm::OnDiskIterableChainedHashTable<
680 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
681 std::unique_ptr<InterestingIdentifierTable> Table(
682 InterestingIdentifierTable::Create(
683 (
const unsigned char *)Blob.data() +
Record[0],
684 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
685 (
const unsigned char *)Blob.data()));
686 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
687 DEnd = Table->data_end();
689 std::pair<StringRef, bool> Ident = *D;
691 InterestingIdentifiers[Ident.first].push_back(ID);
693 (
void)InterestingIdentifiers[Ident.first];
698 if (State == DiagnosticOptionsBlock && Code ==
SIGNATURE) {
701 "Dummy AST file signature not backpatched in ASTWriter.");
702 getModuleFileInfo(
File).Signature = Signature;
708 return llvm::Error::success();
715class IdentifierIndexWriterTrait {
717 typedef StringRef key_type;
718 typedef StringRef key_type_ref;
719 typedef SmallVector<unsigned, 2> data_type;
720 typedef const SmallVector<unsigned, 2> &data_type_ref;
721 typedef unsigned hash_value_type;
722 typedef unsigned offset_type;
724 static hash_value_type
ComputeHash(key_type_ref Key) {
725 return llvm::djbHash(Key);
728 std::pair<unsigned,unsigned>
729 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref
Data) {
730 using namespace llvm::support;
731 endian::Writer
LE(Out, llvm::endianness::little);
732 unsigned KeyLen = Key.size();
733 unsigned DataLen =
Data.size() * 4;
734 LE.write<uint16_t>(KeyLen);
735 LE.write<uint16_t>(DataLen);
736 return std::make_pair(KeyLen, DataLen);
739 void EmitKey(raw_ostream& Out, key_type_ref Key,
unsigned KeyLen) {
740 Out.write(Key.data(), KeyLen);
743 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref
Data,
745 using namespace llvm::support;
746 for (
unsigned I = 0, N =
Data.size(); I != N; ++I)
747 endian::write<uint32_t>(Out,
Data[I], llvm::endianness::little);
753bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
754 for (
auto MapEntry : ImportedModuleFiles) {
755 auto File = MapEntry.first;
756 ImportedModuleFileInfo &Info = MapEntry.second;
757 if (getModuleFileInfo(
File).Signature) {
758 if (getModuleFileInfo(
File).Signature != Info.StoredSignature)
761 }
else if (Info.StoredSize !=
File.getSize() ||
762 Info.StoredModTime !=
File.getModificationTime())
767 using namespace llvm;
768 llvm::TimeTraceScope TimeScope(
"Module WriteIndex");
771 Stream.Emit((
unsigned)
'B', 8);
772 Stream.Emit((
unsigned)
'C', 8);
773 Stream.Emit((
unsigned)
'G', 8);
774 Stream.Emit((
unsigned)
'I', 8);
778 emitBlockInfoBlock(Stream);
780 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
785 Stream.EmitRecord(INDEX_METADATA,
Record);
788 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
789 MEnd = ModuleFiles.end();
792 Record.push_back(M->second.ID);
793 Record.push_back(M->first.getSize());
794 Record.push_back(M->first.getModificationTime());
797 StringRef Name(M->first.getName());
798 Record.push_back(Name.size());
799 Record.append(Name.begin(), Name.end());
802 Record.push_back(M->second.Dependencies.size());
803 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
804 Stream.EmitRecord(MODULE,
Record);
809 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait>
Generator;
810 IdentifierIndexWriterTrait Trait;
813 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
814 IEnd = InterestingIdentifiers.end();
816 Generator.insert(I->first(), I->second, Trait);
823 using namespace llvm::support;
826 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
827 BucketOffset =
Generator.Emit(Out, Trait);
831 auto Abbrev = std::make_shared<BitCodeAbbrev>();
832 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
833 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
834 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
835 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
851 auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
859 llvm::LockFileManager Lock(IndexPath);
861 if (llvm::Error Err = Lock.tryLock().moveInto(Owned)) {
862 llvm::consumeError(std::move(Err));
863 return llvm::createStringError(std::errc::io_error,
"LFS error");
868 return llvm::createStringError(std::errc::device_or_resource_busy,
869 "someone else is building the index");
875 GlobalModuleIndexBuilder Builder(
FileMgr, PCHContainerRdr);
879 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
883 if (llvm::sys::path::extension(D->path()) !=
".pcm") {
887 if (llvm::sys::path::extension(D->path()) ==
".pcm.lock")
888 return llvm::createStringError(std::errc::device_or_resource_busy,
889 "someone else is building the index");
895 auto ModuleFile =
FileMgr.getOptionalFileRef(D->path());
900 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
907 llvm::BitstreamWriter OutputStream(OutputBuffer);
908 if (Builder.writeIndex(OutputStream))
909 return llvm::createStringError(std::errc::io_error,
910 "failed writing index");
913 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
915 return llvm::Error::success();
922 IdentifierIndexTable::key_iterator Current;
925 IdentifierIndexTable::key_iterator End;
928 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
929 Current = Idx.key_begin();
933 StringRef
Next()
override {
937 StringRef Result = *Current;
945 IdentifierIndexTable &Table =
946 *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
947 return new GlobalIndexIdentifierIterator(Table);
#define RECORD(CLASS, BASE)
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define IMPORT(DERIVED, BASE)
#define BLOCK(DERIVED, BASE)
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Implements support for file system lookup, file system caching, and directory search management.
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
llvm::SmallPtrSet< ModuleFile *, 4 > HitSet
A set of module files in which we found a result.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
__LIBC_ATTRS FILE * stderr
The signature of a module, which is a hash of the AST content.
static constexpr size_t size
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
static ASTFileSignature createDummy()