198944Sobrien//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===// 298944Sobrien// 319370Spst// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 498944Sobrien// See https://llvm.org/LICENSE.txt for license information. 5130803Smarcel// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6130803Smarcel// 719370Spst//===----------------------------------------------------------------------===// 898944Sobrien// 919370Spst// This file implements the GlobalModuleIndex class. 1098944Sobrien// 1198944Sobrien//===----------------------------------------------------------------------===// 1298944Sobrien 1398944Sobrien#include "clang/Serialization/GlobalModuleIndex.h" 1419370Spst#include "ASTReaderInternals.h" 1598944Sobrien#include "clang/Basic/FileManager.h" 1698944Sobrien#include "clang/Lex/HeaderSearch.h" 1798944Sobrien#include "clang/Serialization/ASTBitCodes.h" 1898944Sobrien#include "clang/Serialization/ModuleFile.h" 1919370Spst#include "clang/Serialization/PCHContainerOperations.h" 2098944Sobrien#include "llvm/ADT/DenseMap.h" 2198944Sobrien#include "llvm/ADT/MapVector.h" 2298944Sobrien#include "llvm/ADT/SmallString.h" 2398944Sobrien#include "llvm/ADT/StringRef.h" 2498944Sobrien#include "llvm/Bitstream/BitstreamReader.h" 2519370Spst#include "llvm/Bitstream/BitstreamWriter.h" 2619370Spst#include "llvm/Support/DJB.h" 2719370Spst#include "llvm/Support/FileSystem.h" 2819370Spst#include "llvm/Support/FileUtilities.h" 2919370Spst#include "llvm/Support/LockFileManager.h" 3019370Spst#include "llvm/Support/MemoryBuffer.h" 3119370Spst#include "llvm/Support/OnDiskHashTable.h" 3298944Sobrien#include "llvm/Support/Path.h" 3319370Spst#include "llvm/Support/TimeProfiler.h" 3419370Spst#include <cstdio> 3598944Sobrienusing namespace clang; 3619370Spstusing namespace serialization; 3746283Sdfr 3819370Spst//----------------------------------------------------------------------------// 3998944Sobrien// Shared constants 4098944Sobrien//----------------------------------------------------------------------------// 4119370Spstnamespace { 4298944Sobrien enum { 4398944Sobrien /// The block containing the index. 4498944Sobrien GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID 45130803Smarcel }; 46130803Smarcel 47130803Smarcel /// Describes the record types in the index. 4819370Spst enum IndexRecordTypes { 4919370Spst /// Contains version information and potentially other metadata, 5019370Spst /// used to determine if we can read this global index file. 5198944Sobrien INDEX_METADATA, 5219370Spst /// Describes a module, including its file name and dependencies. 5398944Sobrien MODULE, 5419370Spst /// The index for identifiers. 5598944Sobrien IDENTIFIER_INDEX 5619370Spst }; 5798944Sobrien} 5819370Spst 5998944Sobrien/// The name of the global index file. 6019370Spststatic const char * const IndexFileName = "modules.idx"; 6198944Sobrien 6219370Spst/// The global index file version. 6398944Sobrienstatic const unsigned CurrentVersion = 1; 6446283Sdfr 6598944Sobrien//----------------------------------------------------------------------------// 6698944Sobrien// Global module index reader. 6798944Sobrien//----------------------------------------------------------------------------// 6898944Sobrien 69130803Smarcelnamespace { 7098944Sobrien 7198944Sobrien/// Trait used to read the identifier index from the on-disk hash 72130803Smarcel/// table. 7398944Sobrienclass IdentifierIndexReaderTrait { 7498944Sobrienpublic: 7598944Sobrien typedef StringRef external_key_type; 7698944Sobrien typedef StringRef internal_key_type; 7798944Sobrien typedef SmallVector<unsigned, 2> data_type; 7898944Sobrien typedef unsigned hash_value_type; 7998944Sobrien typedef unsigned offset_type; 80130803Smarcel 81130803Smarcel static bool EqualKey(const internal_key_type& a, const internal_key_type& b) { 8298944Sobrien return a == b; 8398944Sobrien } 8446283Sdfr 8546283Sdfr static hash_value_type ComputeHash(const internal_key_type& a) { 8646283Sdfr return llvm::djbHash(a); 8798944Sobrien } 8898944Sobrien 8998944Sobrien static std::pair<unsigned, unsigned> 9098944Sobrien ReadKeyDataLength(const unsigned char*& d) { 9146283Sdfr using namespace llvm::support; 9298944Sobrien unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d); 9346283Sdfr unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d); 9498944Sobrien return std::make_pair(KeyLen, DataLen); 9546283Sdfr } 9698944Sobrien 9798944Sobrien static const internal_key_type& 9898944Sobrien GetInternalKey(const external_key_type& x) { return x; } 9946283Sdfr 10098944Sobrien static const external_key_type& 10146283Sdfr GetExternalKey(const internal_key_type& x) { return x; } 10298944Sobrien 10398944Sobrien static internal_key_type ReadKey(const unsigned char* d, unsigned n) { 10419370Spst return StringRef((const char *)d, n); 10598944Sobrien } 10698944Sobrien 10719370Spst static data_type ReadData(const internal_key_type& k, 10819370Spst const unsigned char* d, 10998944Sobrien unsigned DataLen) { 11019370Spst using namespace llvm::support; 11198944Sobrien 11298944Sobrien data_type Result; 11398944Sobrien while (DataLen > 0) { 11498944Sobrien unsigned ID = endian::readNext<uint32_t, little, unaligned>(d); 11598944Sobrien Result.push_back(ID); 11698944Sobrien DataLen -= 4; 11798944Sobrien } 11846283Sdfr 11998944Sobrien return Result; 12098944Sobrien } 12198944Sobrien}; 12298944Sobrien 12398944Sobrientypedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait> 12498944Sobrien IdentifierIndexTable; 12598944Sobrien 12698944Sobrien} 12798944Sobrien 12898944SobrienGlobalModuleIndex::GlobalModuleIndex( 12998944Sobrien std::unique_ptr<llvm::MemoryBuffer> IndexBuffer, 13098944Sobrien llvm::BitstreamCursor Cursor) 13198944Sobrien : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(), 13298944Sobrien NumIdentifierLookupHits() { 13398944Sobrien auto Fail = [&](llvm::Error &&Err) { 13498944Sobrien report_fatal_error("Module index '" + Buffer->getBufferIdentifier() + 13598944Sobrien "' failed: " + toString(std::move(Err))); 13698944Sobrien }; 13798944Sobrien 13898944Sobrien llvm::TimeTraceScope TimeScope("Module LoadIndex"); 13998944Sobrien // Read the global index. 14098944Sobrien bool InGlobalIndexBlock = false; 14198944Sobrien bool Done = false; 14298944Sobrien while (!Done) { 14398944Sobrien llvm::BitstreamEntry Entry; 14498944Sobrien if (Expected<llvm::BitstreamEntry> Res = Cursor.advance()) 14598944Sobrien Entry = Res.get(); 14698944Sobrien else 14798944Sobrien Fail(Res.takeError()); 14898944Sobrien 14998944Sobrien switch (Entry.Kind) { 15046283Sdfr case llvm::BitstreamEntry::Error: 15146283Sdfr return; 15246283Sdfr 15346283Sdfr case llvm::BitstreamEntry::EndBlock: 15446283Sdfr if (InGlobalIndexBlock) { 15546283Sdfr InGlobalIndexBlock = false; 15646283Sdfr Done = true; 15746283Sdfr continue; 15846283Sdfr } 15946283Sdfr return; 16046283Sdfr 16146283Sdfr 16246283Sdfr case llvm::BitstreamEntry::Record: 16346283Sdfr // Entries in the global index block are handled below. 16446283Sdfr if (InGlobalIndexBlock) 16546283Sdfr break; 16646283Sdfr 16746283Sdfr return; 16846283Sdfr 16946283Sdfr case llvm::BitstreamEntry::SubBlock: 17046283Sdfr if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) { 17146283Sdfr if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID)) 17246283Sdfr Fail(std::move(Err)); 17346283Sdfr InGlobalIndexBlock = true; 17419370Spst } else if (llvm::Error Err = Cursor.SkipBlock()) 17519370Spst Fail(std::move(Err)); 17619370Spst continue; 17719370Spst } 17819370Spst 17919370Spst SmallVector<uint64_t, 64> Record; 18019370Spst StringRef Blob; 18119370Spst Expected<unsigned> MaybeIndexRecord = 18219370Spst Cursor.readRecord(Entry.ID, Record, &Blob); 18319370Spst if (!MaybeIndexRecord) 18498944Sobrien Fail(MaybeIndexRecord.takeError()); 18598944Sobrien IndexRecordTypes IndexRecord = 18698944Sobrien static_cast<IndexRecordTypes>(MaybeIndexRecord.get()); 18798944Sobrien switch (IndexRecord) { 18898944Sobrien case INDEX_METADATA: 189130803Smarcel // Make sure that the version matches. 19098944Sobrien if (Record.size() < 1 || Record[0] != CurrentVersion) 19198944Sobrien return; 19298944Sobrien break; 19398944Sobrien 19498944Sobrien case MODULE: { 19598944Sobrien unsigned Idx = 0; 19698944Sobrien unsigned ID = Record[Idx++]; 19798944Sobrien 19898944Sobrien // Make room for this module's information. 19998944Sobrien if (ID == Modules.size()) 20098944Sobrien Modules.push_back(ModuleInfo()); 201130803Smarcel else 20298944Sobrien Modules.resize(ID + 1); 20398944Sobrien 20498944Sobrien // Size/modification time for this module file at the time the 20598944Sobrien // global index was built. 20698944Sobrien Modules[ID].Size = Record[Idx++]; 20798944Sobrien Modules[ID].ModTime = Record[Idx++]; 20898944Sobrien 20998944Sobrien // File name. 21098944Sobrien unsigned NameLen = Record[Idx++]; 21198944Sobrien Modules[ID].FileName.assign(Record.begin() + Idx, 21298944Sobrien Record.begin() + Idx + NameLen); 21398944Sobrien Idx += NameLen; 21498944Sobrien 21598944Sobrien // Dependencies 21698944Sobrien unsigned NumDeps = Record[Idx++]; 21798944Sobrien Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(), 21898944Sobrien Record.begin() + Idx, 21919370Spst Record.begin() + Idx + NumDeps); 22019370Spst Idx += NumDeps; 22119370Spst 22219370Spst // Make sure we're at the end of the record. 22319370Spst assert(Idx == Record.size() && "More module info?"); 22419370Spst 22519370Spst // Record this module as an unresolved module. 22619370Spst // FIXME: this doesn't work correctly for module names containing path 22719370Spst // separators. 22819370Spst StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName); 22919370Spst // Remove the -<hash of ModuleMapPath> 23019370Spst ModuleName = ModuleName.rsplit('-').first; 23119370Spst UnresolvedModules[ModuleName] = ID; 23219370Spst break; 23319370Spst } 23419370Spst 23519370Spst case IDENTIFIER_INDEX: 23619370Spst // Wire up the identifier index. 23719370Spst if (Record[0]) { 23819370Spst IdentifierIndex = IdentifierIndexTable::Create( 23919370Spst (const unsigned char *)Blob.data() + Record[0], 24019370Spst (const unsigned char *)Blob.data() + sizeof(uint32_t), 24198944Sobrien (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait()); 24219370Spst } 24398944Sobrien break; 24498944Sobrien } 24519370Spst } 24619370Spst} 24719370Spst 24819370SpstGlobalModuleIndex::~GlobalModuleIndex() { 24919370Spst delete static_cast<IdentifierIndexTable *>(IdentifierIndex); 25019370Spst} 25119370Spst 25219370Spststd::pair<GlobalModuleIndex *, llvm::Error> 25319370SpstGlobalModuleIndex::readIndex(StringRef Path) { 25419370Spst // Load the index file, if it's there. 25519370Spst llvm::SmallString<128> IndexPath; 25619370Spst IndexPath += Path; 25719370Spst llvm::sys::path::append(IndexPath, IndexFileName); 25819370Spst 25919370Spst llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr = 26019370Spst llvm::MemoryBuffer::getFile(IndexPath.c_str()); 26146283Sdfr if (!BufferOrErr) 26219370Spst return std::make_pair(nullptr, 26319370Spst llvm::errorCodeToError(BufferOrErr.getError())); 26419370Spst std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get()); 26546283Sdfr 26619370Spst /// The main bitstream cursor for the main block. 26719370Spst llvm::BitstreamCursor Cursor(*Buffer); 26819370Spst 26919370Spst // Sniff for the signature. 27019370Spst for (unsigned char C : {'B', 'C', 'G', 'I'}) { 27119370Spst if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) { 27219370Spst if (Res.get() != C) 27319370Spst return std::make_pair( 27419370Spst nullptr, llvm::createStringError(std::errc::illegal_byte_sequence, 27519370Spst "expected signature BCGI")); 27619370Spst } else 27719370Spst return std::make_pair(nullptr, Res.takeError()); 27819370Spst } 27919370Spst 28019370Spst return std::make_pair(new GlobalModuleIndex(std::move(Buffer), Cursor), 28119370Spst llvm::Error::success()); 28219370Spst} 28319370Spst 28419370Spstvoid 285130803SmarcelGlobalModuleIndex::getKnownModules(SmallVectorImpl<ModuleFile *> &ModuleFiles) { 28619370Spst ModuleFiles.clear(); 28719370Spst for (unsigned I = 0, N = Modules.size(); I != N; ++I) { 28819370Spst if (ModuleFile *MF = Modules[I].File) 28919370Spst ModuleFiles.push_back(MF); 29019370Spst } 29119370Spst} 29219370Spst 29319370Spstvoid GlobalModuleIndex::getModuleDependencies( 29419370Spst ModuleFile *File, 29519370Spst SmallVectorImpl<ModuleFile *> &Dependencies) { 29619370Spst // Look for information about this module file. 297130803Smarcel llvm::DenseMap<ModuleFile *, unsigned>::iterator Known 29819370Spst = ModulesByFile.find(File); 29919370Spst if (Known == ModulesByFile.end()) 30019370Spst return; 30119370Spst 30219370Spst // Record dependencies. 30319370Spst Dependencies.clear(); 30419370Spst ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies; 30519370Spst for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) { 30619370Spst if (ModuleFile *MF = Modules[I].File) 30746283Sdfr Dependencies.push_back(MF); 30846283Sdfr } 30919370Spst} 31046283Sdfr 31146283Sdfrbool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { 31246283Sdfr Hits.clear(); 31346283Sdfr 31446283Sdfr // If there's no identifier index, there is nothing we can do. 31546283Sdfr if (!IdentifierIndex) 31646283Sdfr return false; 31719370Spst 31898944Sobrien // Look into the identifier index. 31998944Sobrien ++NumIdentifierLookups; 32098944Sobrien IdentifierIndexTable &Table 32198944Sobrien = *static_cast<IdentifierIndexTable *>(IdentifierIndex); 32298944Sobrien IdentifierIndexTable::iterator Known = Table.find(Name); 32398944Sobrien if (Known == Table.end()) { 32446283Sdfr return false; 32546283Sdfr } 32646283Sdfr 32746283Sdfr SmallVector<unsigned, 2> ModuleIDs = *Known; 32846283Sdfr for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) { 329130803Smarcel if (ModuleFile *MF = Modules[ModuleIDs[I]].File) 330130803Smarcel Hits.insert(MF); 331130803Smarcel } 33246283Sdfr 333130803Smarcel ++NumIdentifierLookupHits; 334130803Smarcel return true; 33546283Sdfr} 336130803Smarcel 337130803Smarcelbool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) { 338130803Smarcel // Look for the module in the global module index based on the module name. 33946283Sdfr StringRef Name = File->ModuleName; 34046283Sdfr llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name); 34198944Sobrien if (Known == UnresolvedModules.end()) { 34298944Sobrien return true; 34398944Sobrien } 344130803Smarcel 34598944Sobrien // Rectify this module with the global module index. 34698944Sobrien ModuleInfo &Info = Modules[Known->second]; 34798944Sobrien 34898944Sobrien // If the size and modification time match what we expected, record this 34946283Sdfr // module file. 35098944Sobrien bool Failed = true; 35119370Spst if (File->File->getSize() == Info.Size && 35246283Sdfr File->File->getModificationTime() == Info.ModTime) { 353130803Smarcel Info.File = File; 354130803Smarcel ModulesByFile[File] = Known->second; 35546283Sdfr 356130803Smarcel Failed = false; 35746283Sdfr } 358130803Smarcel 35946283Sdfr // One way or another, we have resolved this module file. 36046283Sdfr UnresolvedModules.erase(Known); 361130803Smarcel return Failed; 362130803Smarcel} 36346283Sdfr 364130803Smarcelvoid GlobalModuleIndex::printStats() { 365130803Smarcel std::fprintf(stderr, "*** Global Module Index Statistics:\n"); 366130803Smarcel if (NumIdentifierLookups) { 36746283Sdfr fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n", 368130803Smarcel NumIdentifierLookupHits, NumIdentifierLookups, 369130803Smarcel (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups); 370130803Smarcel } 371130803Smarcel std::fprintf(stderr, "\n"); 372130803Smarcel} 373130803Smarcel 37446283SdfrLLVM_DUMP_METHOD void GlobalModuleIndex::dump() { 375130803Smarcel llvm::errs() << "*** Global Module Index Dump:\n"; 376130803Smarcel llvm::errs() << "Module files:\n"; 37746283Sdfr for (auto &MI : Modules) { 378130803Smarcel llvm::errs() << "** " << MI.FileName << "\n"; 379130803Smarcel if (MI.File) 380130803Smarcel MI.File->dump(); 381130803Smarcel else 38246283Sdfr llvm::errs() << "\n"; 383130803Smarcel } 384130803Smarcel llvm::errs() << "\n"; 38546283Sdfr} 38646283Sdfr 38798944Sobrien//----------------------------------------------------------------------------// 38898944Sobrien// Global module index writer. 38946283Sdfr//----------------------------------------------------------------------------// 39098944Sobrien 39146283Sdfrnamespace { 39246283Sdfr /// Provides information about a specific module file. 39398944Sobrien struct ModuleFileInfo { 39446283Sdfr /// The numberic ID for this module file. 39598944Sobrien unsigned ID; 39698944Sobrien 39798944Sobrien /// The set of modules on which this module depends. Each entry is 39846283Sdfr /// a module ID. 39946283Sdfr SmallVector<unsigned, 4> Dependencies; 40046283Sdfr ASTFileSignature Signature; 40146283Sdfr }; 40246283Sdfr 40346283Sdfr struct ImportedModuleFileInfo { 40446283Sdfr off_t StoredSize; 40546283Sdfr time_t StoredModTime; 40646283Sdfr ASTFileSignature StoredSignature; 40746283Sdfr ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig) 40846283Sdfr : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {} 40946283Sdfr }; 41046283Sdfr 41146283Sdfr /// Builder that generates the global module index file. 41246283Sdfr class GlobalModuleIndexBuilder { 41346283Sdfr FileManager &FileMgr; 41446283Sdfr const PCHContainerReader &PCHContainerRdr; 41546283Sdfr 41646283Sdfr /// Mapping from files to module file information. 41746283Sdfr typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap; 41846283Sdfr 41946283Sdfr /// Information about each of the known module files. 42046283Sdfr ModuleFilesMap ModuleFiles; 42146283Sdfr 42246283Sdfr /// Mapping from the imported module file to the imported 42346283Sdfr /// information. 42446283Sdfr typedef std::multimap<const FileEntry *, ImportedModuleFileInfo> 42546283Sdfr ImportedModuleFilesMap; 42646283Sdfr 42746283Sdfr /// Information about each importing of a module file. 42846283Sdfr ImportedModuleFilesMap ImportedModuleFiles; 42946283Sdfr 43046283Sdfr /// Mapping from identifiers to the list of module file IDs that 43146283Sdfr /// consider this identifier to be interesting. 43246283Sdfr typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap; 43346283Sdfr 43498944Sobrien /// A mapping from all interesting identifiers to the set of module 43598944Sobrien /// files in which those identifiers are considered interesting. 43698944Sobrien InterestingIdentifierMap InterestingIdentifiers; 43798944Sobrien 43898944Sobrien /// Write the block-info block for the global module index file. 43998944Sobrien void emitBlockInfoBlock(llvm::BitstreamWriter &Stream); 44098944Sobrien 44146283Sdfr /// Retrieve the module file information for the given file. 44246283Sdfr ModuleFileInfo &getModuleFileInfo(const FileEntry *File) { 44398944Sobrien llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known 444130803Smarcel = ModuleFiles.find(File); 44598944Sobrien if (Known != ModuleFiles.end()) 44646283Sdfr return Known->second; 44746283Sdfr 44846283Sdfr unsigned NewID = ModuleFiles.size(); 44946283Sdfr ModuleFileInfo &Info = ModuleFiles[File]; 45046283Sdfr Info.ID = NewID; 45198944Sobrien return Info; 45246283Sdfr } 45346283Sdfr 45446283Sdfr public: 45546283Sdfr explicit GlobalModuleIndexBuilder( 45698944Sobrien FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr) 45746283Sdfr : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {} 45898944Sobrien 45998944Sobrien /// Load the contents of the given module file into the builder. 46098944Sobrien llvm::Error loadModuleFile(const FileEntry *File); 46198944Sobrien 46246283Sdfr /// Write the index to the given bitstream. 46346283Sdfr /// \returns true if an error occurred, false otherwise. 46446283Sdfr bool writeIndex(llvm::BitstreamWriter &Stream); 46546283Sdfr }; 46646283Sdfr} 46746283Sdfr 46846283Sdfrstatic void emitBlockID(unsigned ID, const char *Name, 46946283Sdfr llvm::BitstreamWriter &Stream, 47046283Sdfr SmallVectorImpl<uint64_t> &Record) { 47146283Sdfr Record.clear(); 47246283Sdfr Record.push_back(ID); 47346283Sdfr Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); 47446283Sdfr 47546283Sdfr // Emit the block name if present. 47646283Sdfr if (!Name || Name[0] == 0) return; 47746283Sdfr Record.clear(); 478130803Smarcel while (*Name) 479130803Smarcel Record.push_back(*Name++); 480130803Smarcel Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record); 481130803Smarcel} 482130803Smarcel 483130803Smarcelstatic void emitRecordID(unsigned ID, const char *Name, 484130803Smarcel llvm::BitstreamWriter &Stream, 485130803Smarcel SmallVectorImpl<uint64_t> &Record) { 48646283Sdfr Record.clear(); 48746283Sdfr Record.push_back(ID); 48819370Spst while (*Name) 48919370Spst Record.push_back(*Name++); 49098944Sobrien Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); 49119370Spst} 49219370Spst 49319370Spstvoid 49419370SpstGlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) { 49598944Sobrien SmallVector<uint64_t, 64> Record; 49698944Sobrien Stream.EnterBlockInfoBlock(); 49798944Sobrien 49898944Sobrien#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record) 499130803Smarcel#define RECORD(X) emitRecordID(X, #X, Stream, Record) 50098944Sobrien BLOCK(GLOBAL_INDEX_BLOCK); 50198944Sobrien RECORD(INDEX_METADATA); 50298944Sobrien RECORD(MODULE); 50398944Sobrien RECORD(IDENTIFIER_INDEX); 50498944Sobrien#undef RECORD 50546283Sdfr#undef BLOCK 50646283Sdfr 50798944Sobrien Stream.ExitBlock(); 50846283Sdfr} 509130803Smarcel 510130803Smarcelnamespace { 511130803Smarcel class InterestingASTIdentifierLookupTrait 512130803Smarcel : public serialization::reader::ASTIdentifierLookupTraitBase { 513130803Smarcel 514130803Smarcel public: 515130803Smarcel /// The identifier and whether it is "interesting". 516130803Smarcel typedef std::pair<StringRef, bool> data_type; 517130803Smarcel 51846283Sdfr data_type ReadData(const internal_key_type& k, 51946283Sdfr const unsigned char* d, 52046283Sdfr unsigned DataLen) { 521130803Smarcel // The first bit indicates whether this identifier is interesting. 52246283Sdfr // That's all we care about. 52346283Sdfr using namespace llvm::support; 52446283Sdfr unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d); 52546283Sdfr bool IsInteresting = RawID & 0x01; 52619370Spst return std::make_pair(k, IsInteresting); 52719370Spst } 52819370Spst }; 52919370Spst} 53019370Spst 53119370Spstllvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { 53219370Spst // Open the module file. 53319370Spst 53419370Spst auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true); 53598944Sobrien if (!Buffer) 53619370Spst return llvm::createStringError(Buffer.getError(), 53746283Sdfr "failed getting buffer for module file"); 53898944Sobrien 53919370Spst // Initialize the input stream 54019370Spst llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer)); 54198944Sobrien 54219370Spst // Sniff for the signature. 54398944Sobrien for (unsigned char C : {'C', 'P', 'C', 'H'}) 54498944Sobrien if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) { 54598944Sobrien if (Res.get() != C) 54698944Sobrien return llvm::createStringError(std::errc::illegal_byte_sequence, 54798944Sobrien "expected signature CPCH"); 54898944Sobrien } else 54998944Sobrien return Res.takeError(); 55098944Sobrien 55198944Sobrien // Record this module file and assign it a unique ID (if it doesn't have 55298944Sobrien // one already). 55398944Sobrien unsigned ID = getModuleFileInfo(File).ID; 55498944Sobrien 555130803Smarcel // Search for the blocks and records we care about. 55698944Sobrien enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other; 55798944Sobrien bool Done = false; 55898944Sobrien while (!Done) { 55998944Sobrien Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance(); 56098944Sobrien if (!MaybeEntry) 56198944Sobrien return MaybeEntry.takeError(); 56298944Sobrien llvm::BitstreamEntry Entry = MaybeEntry.get(); 56398944Sobrien 56446283Sdfr switch (Entry.Kind) { 56546283Sdfr case llvm::BitstreamEntry::Error: 56698944Sobrien Done = true; 56746283Sdfr continue; 56846283Sdfr 56946283Sdfr case llvm::BitstreamEntry::Record: 57046283Sdfr // In the 'other' state, just skip the record. We don't care. 57146283Sdfr if (State == Other) { 572130803Smarcel if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID)) 57346283Sdfr continue; 57419370Spst else 57519370Spst return Skipped.takeError(); 57619370Spst } 57719370Spst 57819370Spst // Handle potentially-interesting records below. 57919370Spst break; 58046283Sdfr 581130803Smarcel case llvm::BitstreamEntry::SubBlock: 58246283Sdfr if (Entry.ID == CONTROL_BLOCK_ID) { 58346283Sdfr if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID)) 584130803Smarcel return Err; 585130803Smarcel 58646283Sdfr // Found the control block. 587130803Smarcel State = ControlBlock; 588130803Smarcel continue; 58946283Sdfr } 59046283Sdfr 591130803Smarcel if (Entry.ID == AST_BLOCK_ID) { 592130803Smarcel if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID)) 59346283Sdfr return Err; 59446283Sdfr 59546283Sdfr // Found the AST block. 59646283Sdfr State = ASTBlock; 59746283Sdfr continue; 59846283Sdfr } 59946283Sdfr 60019370Spst if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) { 60119370Spst if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID)) 60219370Spst return Err; 60346283Sdfr 60446283Sdfr // Found the Diagnostic Options block. 60598944Sobrien State = DiagnosticOptionsBlock; 60698944Sobrien continue; 607130803Smarcel } 60898944Sobrien 60998944Sobrien if (llvm::Error Err = InStream.SkipBlock()) 610130803Smarcel return Err; 611130803Smarcel 61246283Sdfr continue; 61398944Sobrien 61498944Sobrien case llvm::BitstreamEntry::EndBlock: 61598944Sobrien State = Other; 61698944Sobrien continue; 61746283Sdfr } 61898944Sobrien 61998944Sobrien // Read the given record. 62046283Sdfr SmallVector<uint64_t, 64> Record; 62198944Sobrien StringRef Blob; 622130803Smarcel Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob); 62398944Sobrien if (!MaybeCode) 62498944Sobrien return MaybeCode.takeError(); 62598944Sobrien unsigned Code = MaybeCode.get(); 626130803Smarcel 62746283Sdfr // Handle module dependencies. 62846283Sdfr if (State == ControlBlock && Code == IMPORTS) { 629130803Smarcel // Load each of the imported PCH files. 630130803Smarcel unsigned Idx = 0, N = Record.size(); 631130803Smarcel while (Idx < N) { 632130803Smarcel // Read information about the AST file. 633130803Smarcel 634130803Smarcel // Skip the imported kind 635130803Smarcel ++Idx; 636130803Smarcel 63798944Sobrien // Skip the import location 63846283Sdfr ++Idx; 63946283Sdfr 64019370Spst // Load stored size/modification time. 64119370Spst off_t StoredSize = (off_t)Record[Idx++]; 64246283Sdfr time_t StoredModTime = (time_t)Record[Idx++]; 64319370Spst 64419370Spst // Skip the stored signature. 64519370Spst // FIXME: we could read the signature out of the import and validate it. 64619370Spst auto FirstSignatureByte = Record.begin() + Idx; 64719370Spst ASTFileSignature StoredSignature = ASTFileSignature::create( 64898944Sobrien FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size); 64919370Spst Idx += ASTFileSignature::size; 65019370Spst 65119370Spst // Skip the module name (currently this is only used for prebuilt 65219370Spst // modules while here we are only dealing with cached). 653130803Smarcel Idx += Record[Idx] + 1; 65498944Sobrien 65519370Spst // Retrieve the imported file name. 656130803Smarcel unsigned Length = Record[Idx++]; 65719370Spst SmallString<128> ImportedFile(Record.begin() + Idx, 65819370Spst Record.begin() + Idx + Length); 65919370Spst Idx += Length; 66019370Spst 66119370Spst // Find the imported module file. 66219370Spst auto DependsOnFile 66319370Spst = FileMgr.getFile(ImportedFile, /*OpenFile=*/false, 664130803Smarcel /*CacheFailure=*/false); 665130803Smarcel 666130803Smarcel if (!DependsOnFile) 667130803Smarcel return llvm::createStringError(std::errc::bad_file_descriptor, 668130803Smarcel "imported file \"%s\" not found", 669130803Smarcel ImportedFile.c_str()); 670130803Smarcel 671130803Smarcel // Save the information in ImportedModuleFileInfo so we can verify after 672130803Smarcel // loading all pcms. 673130803Smarcel ImportedModuleFiles.insert(std::make_pair( 674130803Smarcel *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime, 675130803Smarcel StoredSignature))); 676130803Smarcel 677130803Smarcel // Record the dependency. 678130803Smarcel unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID; 679130803Smarcel getModuleFileInfo(File).Dependencies.push_back(DependsOnID); 680130803Smarcel } 681130803Smarcel 682130803Smarcel continue; 683130803Smarcel } 684130803Smarcel 685130803Smarcel // Handle the identifier table 686130803Smarcel if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) { 687130803Smarcel typedef llvm::OnDiskIterableChainedHashTable< 688130803Smarcel InterestingASTIdentifierLookupTrait> InterestingIdentifierTable; 689130803Smarcel std::unique_ptr<InterestingIdentifierTable> Table( 690130803Smarcel InterestingIdentifierTable::Create( 691130803Smarcel (const unsigned char *)Blob.data() + Record[0], 692130803Smarcel (const unsigned char *)Blob.data() + sizeof(uint32_t), 693130803Smarcel (const unsigned char *)Blob.data())); 694130803Smarcel for (InterestingIdentifierTable::data_iterator D = Table->data_begin(), 695130803Smarcel DEnd = Table->data_end(); 696130803Smarcel D != DEnd; ++D) { 697130803Smarcel std::pair<StringRef, bool> Ident = *D; 698130803Smarcel if (Ident.second) 699130803Smarcel InterestingIdentifiers[Ident.first].push_back(ID); 700130803Smarcel else 701130803Smarcel (void)InterestingIdentifiers[Ident.first]; 702130803Smarcel } 703130803Smarcel } 704130803Smarcel 705130803Smarcel // Get Signature. 706130803Smarcel if (State == DiagnosticOptionsBlock && Code == SIGNATURE) 707130803Smarcel getModuleFileInfo(File).Signature = ASTFileSignature::create( 708130803Smarcel Record.begin(), Record.begin() + ASTFileSignature::size); 709130803Smarcel 710130803Smarcel // We don't care about this record. 711130803Smarcel } 712130803Smarcel 713130803Smarcel return llvm::Error::success(); 714130803Smarcel} 715130803Smarcel 716130803Smarcelnamespace { 717130803Smarcel 718130803Smarcel/// Trait used to generate the identifier index as an on-disk hash 71919370Spst/// table. 72019370Spstclass IdentifierIndexWriterTrait { 72119370Spstpublic: 72219370Spst typedef StringRef key_type; 72398944Sobrien typedef StringRef key_type_ref; 72419370Spst typedef SmallVector<unsigned, 2> data_type; 72598944Sobrien typedef const SmallVector<unsigned, 2> &data_type_ref; 72698944Sobrien typedef unsigned hash_value_type; 72798944Sobrien typedef unsigned offset_type; 72819370Spst 72919370Spst static hash_value_type ComputeHash(key_type_ref Key) { 73019370Spst return llvm::djbHash(Key); 73119370Spst } 73298944Sobrien 73319370Spst std::pair<unsigned,unsigned> 73419370Spst EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) { 73519370Spst using namespace llvm::support; 73619370Spst endian::Writer LE(Out, little); 73719370Spst unsigned KeyLen = Key.size(); 73819370Spst unsigned DataLen = Data.size() * 4; 73919370Spst LE.write<uint16_t>(KeyLen); 74019370Spst LE.write<uint16_t>(DataLen); 74198944Sobrien return std::make_pair(KeyLen, DataLen); 74219370Spst } 74319370Spst 74498944Sobrien void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) { 74598944Sobrien Out.write(Key.data(), KeyLen); 74646283Sdfr } 74719370Spst 74846283Sdfr void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data, 74919370Spst unsigned DataLen) { 75019370Spst using namespace llvm::support; 75146283Sdfr for (unsigned I = 0, N = Data.size(); I != N; ++I) 75246283Sdfr endian::write<uint32_t>(Out, Data[I], little); 75346283Sdfr } 75446283Sdfr}; 75519370Spst 75698944Sobrien} 75798944Sobrien 75846283Sdfrbool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) { 75946283Sdfr for (auto MapEntry : ImportedModuleFiles) { 76019370Spst auto *File = MapEntry.first; 76119370Spst ImportedModuleFileInfo &Info = MapEntry.second; 76219370Spst if (getModuleFileInfo(File).Signature) { 76319370Spst if (getModuleFileInfo(File).Signature != Info.StoredSignature) 76446283Sdfr // Verify Signature. 76546283Sdfr return true; 76646283Sdfr } else if (Info.StoredSize != File->getSize() || 76746283Sdfr Info.StoredModTime != File->getModificationTime()) 76846283Sdfr // Verify Size and ModTime. 76946283Sdfr return true; 77019370Spst } 77146283Sdfr 77246283Sdfr using namespace llvm; 77346283Sdfr llvm::TimeTraceScope TimeScope("Module WriteIndex"); 77446283Sdfr 77519370Spst // Emit the file header. 776130803Smarcel Stream.Emit((unsigned)'B', 8); 77746283Sdfr Stream.Emit((unsigned)'C', 8); 77846283Sdfr Stream.Emit((unsigned)'G', 8); 779130803Smarcel Stream.Emit((unsigned)'I', 8); 780130803Smarcel 78146283Sdfr // Write the block-info block, which describes the records in this bitcode 78219370Spst // file. 78319370Spst emitBlockInfoBlock(Stream); 78419370Spst 78519370Spst Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3); 78698944Sobrien 78798944Sobrien // Write the metadata. 78898944Sobrien SmallVector<uint64_t, 2> Record; 78919370Spst Record.push_back(CurrentVersion); 79019370Spst Stream.EmitRecord(INDEX_METADATA, Record); 79119370Spst 79219370Spst // Write the set of known module files. 79319370Spst for (ModuleFilesMap::iterator M = ModuleFiles.begin(), 79419370Spst MEnd = ModuleFiles.end(); 79519370Spst M != MEnd; ++M) { 79619370Spst Record.clear(); 79719370Spst Record.push_back(M->second.ID); 79819370Spst Record.push_back(M->first->getSize()); 79919370Spst Record.push_back(M->first->getModificationTime()); 800130803Smarcel 801130803Smarcel // File name 802130803Smarcel StringRef Name(M->first->getName()); 80319370Spst Record.push_back(Name.size()); 80419370Spst Record.append(Name.begin(), Name.end()); 80519370Spst 80619370Spst // Dependencies 80719370Spst Record.push_back(M->second.Dependencies.size()); 80819370Spst Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end()); 80919370Spst Stream.EmitRecord(MODULE, Record); 81019370Spst } 81119370Spst 81219370Spst // Write the identifier -> module file mapping. 81319370Spst { 81419370Spst llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator; 81519370Spst IdentifierIndexWriterTrait Trait; 81619370Spst 81719370Spst // Populate the hash table. 81819370Spst for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(), 819130803Smarcel IEnd = InterestingIdentifiers.end(); 820130803Smarcel I != IEnd; ++I) { 821130803Smarcel Generator.insert(I->first(), I->second, Trait); 822130803Smarcel } 823130803Smarcel 824130803Smarcel // Create the on-disk hash table in a buffer. 825130803Smarcel SmallString<4096> IdentifierTable; 826130803Smarcel uint32_t BucketOffset; 827130803Smarcel { 828130803Smarcel using namespace llvm::support; 829130803Smarcel llvm::raw_svector_ostream Out(IdentifierTable); 830130803Smarcel // Make sure that no bucket is at offset 0 831130803Smarcel endian::write<uint32_t>(Out, 0, little); 832130803Smarcel BucketOffset = Generator.Emit(Out, Trait); 833130803Smarcel } 834130803Smarcel 835130803Smarcel // Create a blob abbreviation 836130803Smarcel auto Abbrev = std::make_shared<BitCodeAbbrev>(); 837130803Smarcel Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX)); 838130803Smarcel Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); 839130803Smarcel Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); 840130803Smarcel unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); 841130803Smarcel 842130803Smarcel // Write the identifier table 84319370Spst uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset}; 84419370Spst Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable); 84519370Spst } 84619370Spst 84719370Spst Stream.ExitBlock(); 84898944Sobrien return false; 84998944Sobrien} 85098944Sobrien 85198944Sobrienllvm::Error 85298944SobrienGlobalModuleIndex::writeIndex(FileManager &FileMgr, 85398944Sobrien const PCHContainerReader &PCHContainerRdr, 85498944Sobrien StringRef Path) { 85519370Spst llvm::SmallString<128> IndexPath; 85646283Sdfr IndexPath += Path; 85719370Spst llvm::sys::path::append(IndexPath, IndexFileName); 85819370Spst 85919370Spst // Coordinate building the global index file with other processes that might 86019370Spst // try to do the same. 86198944Sobrien llvm::LockFileManager Locked(IndexPath); 86219370Spst switch (Locked) { 86319370Spst case llvm::LockFileManager::LFS_Error: 86419370Spst return llvm::createStringError(std::errc::io_error, "LFS error"); 865130803Smarcel 86619370Spst case llvm::LockFileManager::LFS_Owned: 86798944Sobrien // We're responsible for building the index ourselves. Do so below. 86898944Sobrien break; 86998944Sobrien 87098944Sobrien case llvm::LockFileManager::LFS_Shared: 87198944Sobrien // Someone else is responsible for building the index. We don't care 87298944Sobrien // when they finish, so we're done. 87398944Sobrien return llvm::createStringError(std::errc::device_or_resource_busy, 87498944Sobrien "someone else is building the index"); 87598944Sobrien } 87698944Sobrien 87798944Sobrien // The module index builder. 87898944Sobrien GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr); 87998944Sobrien 88098944Sobrien // Load each of the module files. 88198944Sobrien std::error_code EC; 88219370Spst for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd; 88319370Spst D != DEnd && !EC; 88419370Spst D.increment(EC)) { 88519370Spst // If this isn't a module file, we don't care. 88619370Spst if (llvm::sys::path::extension(D->path()) != ".pcm") { 88719370Spst // ... unless it's a .pcm.lock file, which indicates that someone is 88819370Spst // in the process of rebuilding a module. They'll rebuild the index 88998944Sobrien // at the end of that translation unit, so we don't have to. 89019370Spst if (llvm::sys::path::extension(D->path()) == ".pcm.lock") 89119370Spst return llvm::createStringError(std::errc::device_or_resource_busy, 89219370Spst "someone else is building the index"); 89319370Spst 89419370Spst continue; 89519370Spst } 89619370Spst 89719370Spst // If we can't find the module file, skip it. 89846283Sdfr auto ModuleFile = FileMgr.getFile(D->path()); 89919370Spst if (!ModuleFile) 90019370Spst continue; 90119370Spst 90246283Sdfr // Load this module file. 90346283Sdfr if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile)) 90446283Sdfr return Err; 90546283Sdfr } 90646283Sdfr 90746283Sdfr // The output buffer, into which the global index will be written. 90846283Sdfr SmallString<16> OutputBuffer; 90946283Sdfr { 910130803Smarcel llvm::BitstreamWriter OutputStream(OutputBuffer); 911130803Smarcel if (Builder.writeIndex(OutputStream)) 91219370Spst return llvm::createStringError(std::errc::io_error, 91319370Spst "failed writing index"); 91419370Spst } 91598944Sobrien 91619370Spst return llvm::writeFileAtomically((IndexPath + "-%%%%%%%%").str(), IndexPath, 91746283Sdfr OutputBuffer); 91819370Spst} 91946283Sdfr 92046283Sdfrnamespace { 92146283Sdfr class GlobalIndexIdentifierIterator : public IdentifierIterator { 92246283Sdfr /// The current position within the identifier lookup table. 92319370Spst IdentifierIndexTable::key_iterator Current; 92419370Spst 92598944Sobrien /// The end position within the identifier lookup table. 92698944Sobrien IdentifierIndexTable::key_iterator End; 92798944Sobrien 92898944Sobrien public: 92998944Sobrien explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) { 93098944Sobrien Current = Idx.key_begin(); 93198944Sobrien End = Idx.key_end(); 93298944Sobrien } 93398944Sobrien 93498944Sobrien StringRef Next() override { 93598944Sobrien if (Current == End) 93698944Sobrien return StringRef(); 93798944Sobrien 93898944Sobrien StringRef Result = *Current; 93998944Sobrien ++Current; 94098944Sobrien return Result; 94198944Sobrien } 94298944Sobrien }; 94398944Sobrien} 94498944Sobrien 94598944SobrienIdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const { 94698944Sobrien IdentifierIndexTable &Table = 94798944Sobrien *static_cast<IdentifierIndexTable *>(IdentifierIndex); 94898944Sobrien return new GlobalIndexIdentifierIterator(Table); 94998944Sobrien} 95098944Sobrien