1193326Sed//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file implements the PTHLexer interface. 11193326Sed// 12193326Sed//===----------------------------------------------------------------------===// 13193326Sed 14249423Sdim#include "clang/Lex/PTHLexer.h" 15193326Sed#include "clang/Basic/FileManager.h" 16218893Sdim#include "clang/Basic/FileSystemStatCache.h" 17193326Sed#include "clang/Basic/IdentifierTable.h" 18193326Sed#include "clang/Basic/OnDiskHashTable.h" 19249423Sdim#include "clang/Basic/TokenKinds.h" 20199482Srdivacky#include "clang/Lex/LexDiagnostic.h" 21249423Sdim#include "clang/Lex/PTHManager.h" 22193326Sed#include "clang/Lex/Preprocessor.h" 23193326Sed#include "clang/Lex/Token.h" 24198398Srdivacky#include "llvm/ADT/OwningPtr.h" 25198398Srdivacky#include "llvm/ADT/StringExtras.h" 26193326Sed#include "llvm/ADT/StringMap.h" 27193326Sed#include "llvm/Support/MemoryBuffer.h" 28218893Sdim#include "llvm/Support/system_error.h" 29193326Sedusing namespace clang; 30193326Sedusing namespace clang::io; 31193326Sed 32193326Sed#define DISK_TOKEN_SIZE (1+1+2+4+4) 33193326Sed 34193326Sed//===----------------------------------------------------------------------===// 35193326Sed// PTHLexer methods. 36193326Sed//===----------------------------------------------------------------------===// 37193326Sed 38193326SedPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, 39193326Sed const unsigned char *ppcond, PTHManager &PM) 40193326Sed : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 41193326Sed PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { 42198092Srdivacky 43193326Sed FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); 44193326Sed} 45193326Sed 46263508Sdimbool PTHLexer::Lex(Token& Tok) { 47193326Sed //===--------------------------------------==// 48193326Sed // Read the raw token data. 49193326Sed //===--------------------------------------==// 50198092Srdivacky 51193326Sed // Shadow CurPtr into an automatic variable. 52198092Srdivacky const unsigned char *CurPtrShadow = CurPtr; 53193326Sed 54193326Sed // Read in the data for the token. 55193326Sed unsigned Word0 = ReadLE32(CurPtrShadow); 56193326Sed uint32_t IdentifierID = ReadLE32(CurPtrShadow); 57193326Sed uint32_t FileOffset = ReadLE32(CurPtrShadow); 58198092Srdivacky 59193326Sed tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF); 60193326Sed Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF); 61193326Sed uint32_t Len = Word0 >> 16; 62193326Sed 63193326Sed CurPtr = CurPtrShadow; 64198092Srdivacky 65193326Sed //===--------------------------------------==// 66193326Sed // Construct the token itself. 67193326Sed //===--------------------------------------==// 68198092Srdivacky 69193326Sed Tok.startToken(); 70193326Sed Tok.setKind(TKind); 71193326Sed Tok.setFlag(TFlags); 72193326Sed assert(!LexingRawMode); 73226633Sdim Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset)); 74193326Sed Tok.setLength(Len); 75193326Sed 76193326Sed // Handle identifiers. 77193326Sed if (Tok.isLiteral()) { 78193326Sed Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID)); 79193326Sed } 80193326Sed else if (IdentifierID) { 81193326Sed MIOpt.ReadToken(); 82193326Sed IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); 83198092Srdivacky 84193326Sed Tok.setIdentifierInfo(II); 85198092Srdivacky 86193326Sed // Change the kind of this identifier to the appropriate token kind, e.g. 87193326Sed // turning "for" into a keyword. 88193326Sed Tok.setKind(II->getTokenID()); 89198092Srdivacky 90193326Sed if (II->isHandleIdentifierCase()) 91263508Sdim return PP->HandleIdentifier(Tok); 92263508Sdim 93263508Sdim return true; 94193326Sed } 95198092Srdivacky 96193326Sed //===--------------------------------------==// 97193326Sed // Process the token. 98193326Sed //===--------------------------------------==// 99193326Sed if (TKind == tok::eof) { 100193326Sed // Save the end-of-file token. 101193326Sed EofToken = Tok; 102198092Srdivacky 103193326Sed assert(!ParsingPreprocessorDirective); 104193326Sed assert(!LexingRawMode); 105198092Srdivacky 106263508Sdim return LexEndOfFile(Tok); 107193326Sed } 108198092Srdivacky 109193326Sed if (TKind == tok::hash && Tok.isAtStartOfLine()) { 110193326Sed LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 111193326Sed assert(!LexingRawMode); 112193326Sed PP->HandleDirective(Tok); 113198092Srdivacky 114263508Sdim return false; 115193326Sed } 116198092Srdivacky 117221345Sdim if (TKind == tok::eod) { 118193326Sed assert(ParsingPreprocessorDirective); 119193326Sed ParsingPreprocessorDirective = false; 120263508Sdim return true; 121193326Sed } 122193326Sed 123193326Sed MIOpt.ReadToken(); 124263508Sdim return true; 125193326Sed} 126193326Sed 127212904Sdimbool PTHLexer::LexEndOfFile(Token &Result) { 128212904Sdim // If we hit the end of the file while parsing a preprocessor directive, 129212904Sdim // end the preprocessor directive first. The next token returned will 130212904Sdim // then be the end of file. 131212904Sdim if (ParsingPreprocessorDirective) { 132212904Sdim ParsingPreprocessorDirective = false; // Done parsing the "line". 133212904Sdim return true; // Have a token. 134212904Sdim } 135212904Sdim 136212904Sdim assert(!LexingRawMode); 137212904Sdim 138212904Sdim // If we are in a #if directive, emit an error. 139212904Sdim while (!ConditionalStack.empty()) { 140226633Sdim if (PP->getCodeCompletionFileLoc() != FileStartLoc) 141212904Sdim PP->Diag(ConditionalStack.back().IfLoc, 142212904Sdim diag::err_pp_unterminated_conditional); 143212904Sdim ConditionalStack.pop_back(); 144212904Sdim } 145212904Sdim 146212904Sdim // Finally, let the preprocessor handle this. 147212904Sdim return PP->HandleEndOfFile(Result); 148212904Sdim} 149212904Sdim 150193326Sed// FIXME: We can just grab the last token instead of storing a copy 151193326Sed// into EofToken. 152193326Sedvoid PTHLexer::getEOF(Token& Tok) { 153193326Sed assert(EofToken.is(tok::eof)); 154193326Sed Tok = EofToken; 155193326Sed} 156193326Sed 157193326Sedvoid PTHLexer::DiscardToEndOfLine() { 158193326Sed assert(ParsingPreprocessorDirective && ParsingFilename == false && 159193326Sed "Must be in a preprocessing directive!"); 160193326Sed 161193326Sed // We assume that if the preprocessor wishes to discard to the end of 162193326Sed // the line that it also means to end the current preprocessor directive. 163193326Sed ParsingPreprocessorDirective = false; 164198092Srdivacky 165193326Sed // Skip tokens by only peeking at their token kind and the flags. 166193326Sed // We don't need to actually reconstruct full tokens from the token buffer. 167193326Sed // This saves some copies and it also reduces IdentifierInfo* lookup. 168193326Sed const unsigned char* p = CurPtr; 169193326Sed while (1) { 170193326Sed // Read the token kind. Are we at the end of the file? 171193326Sed tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 172193326Sed if (x == tok::eof) break; 173198092Srdivacky 174193326Sed // Read the token flags. Are we at the start of the next line? 175193326Sed Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 176193326Sed if (y & Token::StartOfLine) break; 177193326Sed 178193326Sed // Skip to the next token. 179193326Sed p += DISK_TOKEN_SIZE; 180193326Sed } 181198092Srdivacky 182193326Sed CurPtr = p; 183193326Sed} 184193326Sed 185193326Sed/// SkipBlock - Used by Preprocessor to skip the current conditional block. 186193326Sedbool PTHLexer::SkipBlock() { 187193326Sed assert(CurPPCondPtr && "No cached PP conditional information."); 188193326Sed assert(LastHashTokPtr && "No known '#' token."); 189198092Srdivacky 190193326Sed const unsigned char* HashEntryI = 0; 191193326Sed uint32_t TableIdx; 192198092Srdivacky 193193326Sed do { 194193326Sed // Read the token offset from the side-table. 195243830Sdim uint32_t Offset = ReadLE32(CurPPCondPtr); 196198092Srdivacky 197198092Srdivacky // Read the target table index from the side-table. 198193326Sed TableIdx = ReadLE32(CurPPCondPtr); 199198092Srdivacky 200193326Sed // Compute the actual memory address of the '#' token data for this entry. 201193326Sed HashEntryI = TokBuf + Offset; 202193326Sed 203193326Sed // Optmization: "Sibling jumping". #if...#else...#endif blocks can 204193326Sed // contain nested blocks. In the side-table we can jump over these 205193326Sed // nested blocks instead of doing a linear search if the next "sibling" 206193326Sed // entry is not at a location greater than LastHashTokPtr. 207193326Sed if (HashEntryI < LastHashTokPtr && TableIdx) { 208193326Sed // In the side-table we are still at an entry for a '#' token that 209193326Sed // is earlier than the last one we saw. Check if the location we would 210193326Sed // stride gets us closer. 211193326Sed const unsigned char* NextPPCondPtr = 212193326Sed PPCond + TableIdx*(sizeof(uint32_t)*2); 213193326Sed assert(NextPPCondPtr >= CurPPCondPtr); 214193326Sed // Read where we should jump to. 215243830Sdim const unsigned char* HashEntryJ = TokBuf + ReadLE32(NextPPCondPtr); 216198092Srdivacky 217193326Sed if (HashEntryJ <= LastHashTokPtr) { 218193326Sed // Jump directly to the next entry in the side table. 219193326Sed HashEntryI = HashEntryJ; 220193326Sed TableIdx = ReadLE32(NextPPCondPtr); 221193326Sed CurPPCondPtr = NextPPCondPtr; 222193326Sed } 223193326Sed } 224193326Sed } 225198092Srdivacky while (HashEntryI < LastHashTokPtr); 226193326Sed assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 227193326Sed assert(TableIdx && "No jumping from #endifs."); 228198092Srdivacky 229193326Sed // Update our side-table iterator. 230193326Sed const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 231193326Sed assert(NextPPCondPtr >= CurPPCondPtr); 232193326Sed CurPPCondPtr = NextPPCondPtr; 233198092Srdivacky 234193326Sed // Read where we should jump to. 235193326Sed HashEntryI = TokBuf + ReadLE32(NextPPCondPtr); 236193326Sed uint32_t NextIdx = ReadLE32(NextPPCondPtr); 237198092Srdivacky 238193326Sed // By construction NextIdx will be zero if this is a #endif. This is useful 239193326Sed // to know to obviate lexing another token. 240193326Sed bool isEndif = NextIdx == 0; 241198092Srdivacky 242193326Sed // This case can occur when we see something like this: 243193326Sed // 244193326Sed // #if ... 245193326Sed // /* a comment or nothing */ 246193326Sed // #elif 247193326Sed // 248193326Sed // If we are skipping the first #if block it will be the case that CurPtr 249193326Sed // already points 'elif'. Just return. 250198092Srdivacky 251193326Sed if (CurPtr > HashEntryI) { 252193326Sed assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 253193326Sed // Did we reach a #endif? If so, go ahead and consume that token as well. 254193326Sed if (isEndif) 255193326Sed CurPtr += DISK_TOKEN_SIZE*2; 256193326Sed else 257193326Sed LastHashTokPtr = HashEntryI; 258198092Srdivacky 259193326Sed return isEndif; 260193326Sed } 261193326Sed 262193326Sed // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 263193326Sed CurPtr = HashEntryI; 264198092Srdivacky 265193326Sed // Update the location of the last observed '#'. This is useful if we 266193326Sed // are skipping multiple blocks. 267193326Sed LastHashTokPtr = CurPtr; 268193326Sed 269193326Sed // Skip the '#' token. 270193326Sed assert(((tok::TokenKind)*CurPtr) == tok::hash); 271193326Sed CurPtr += DISK_TOKEN_SIZE; 272198092Srdivacky 273193326Sed // Did we reach a #endif? If so, go ahead and consume that token as well. 274193326Sed if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 275193326Sed 276193326Sed return isEndif; 277193326Sed} 278193326Sed 279193326SedSourceLocation PTHLexer::getSourceLocation() { 280193326Sed // getSourceLocation is not on the hot path. It is used to get the location 281193326Sed // of the next token when transitioning back to this lexer when done 282193326Sed // handling a #included file. Just read the necessary data from the token 283193326Sed // data buffer to construct the SourceLocation object. 284193326Sed // NOTE: This is a virtual function; hence it is defined out-of-line. 285193326Sed const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4); 286193326Sed uint32_t Offset = ReadLE32(OffsetPtr); 287226633Sdim return FileStartLoc.getLocWithOffset(Offset); 288193326Sed} 289193326Sed 290193326Sed//===----------------------------------------------------------------------===// 291193326Sed// PTH file lookup: map from strings to file data. 292193326Sed//===----------------------------------------------------------------------===// 293193326Sed 294193326Sed/// PTHFileLookup - This internal data structure is used by the PTHManager 295193326Sed/// to map from FileEntry objects managed by FileManager to offsets within 296193326Sed/// the PTH file. 297193326Sednamespace { 298199990Srdivackyclass PTHFileData { 299193326Sed const uint32_t TokenOff; 300193326Sed const uint32_t PPCondOff; 301193326Sedpublic: 302193326Sed PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) 303193326Sed : TokenOff(tokenOff), PPCondOff(ppCondOff) {} 304198092Srdivacky 305198092Srdivacky uint32_t getTokenOffset() const { return TokenOff; } 306198092Srdivacky uint32_t getPPCondOffset() const { return PPCondOff; } 307193326Sed}; 308198092Srdivacky 309198092Srdivacky 310199990Srdivackyclass PTHFileLookupCommonTrait { 311193326Sedpublic: 312193326Sed typedef std::pair<unsigned char, const char*> internal_key_type; 313193326Sed 314193326Sed static unsigned ComputeHash(internal_key_type x) { 315198398Srdivacky return llvm::HashString(x.second); 316193326Sed } 317198092Srdivacky 318193326Sed static std::pair<unsigned, unsigned> 319193326Sed ReadKeyDataLength(const unsigned char*& d) { 320193326Sed unsigned keyLen = (unsigned) ReadUnalignedLE16(d); 321193326Sed unsigned dataLen = (unsigned) *(d++); 322193326Sed return std::make_pair(keyLen, dataLen); 323193326Sed } 324198092Srdivacky 325193326Sed static internal_key_type ReadKey(const unsigned char* d, unsigned) { 326193326Sed unsigned char k = *(d++); // Read the entry kind. 327193326Sed return std::make_pair(k, (const char*) d); 328193326Sed } 329193326Sed}; 330198092Srdivacky 331199990Srdivackyclass PTHFileLookupTrait : public PTHFileLookupCommonTrait { 332193326Sedpublic: 333193326Sed typedef const FileEntry* external_key_type; 334193326Sed typedef PTHFileData data_type; 335198092Srdivacky 336193326Sed static internal_key_type GetInternalKey(const FileEntry* FE) { 337193326Sed return std::make_pair((unsigned char) 0x1, FE->getName()); 338193326Sed } 339193326Sed 340193326Sed static bool EqualKey(internal_key_type a, internal_key_type b) { 341193326Sed return a.first == b.first && strcmp(a.second, b.second) == 0; 342198092Srdivacky } 343198092Srdivacky 344198092Srdivacky static PTHFileData ReadData(const internal_key_type& k, 345198092Srdivacky const unsigned char* d, unsigned) { 346193326Sed assert(k.first == 0x1 && "Only file lookups can match!"); 347193326Sed uint32_t x = ::ReadUnalignedLE32(d); 348193326Sed uint32_t y = ::ReadUnalignedLE32(d); 349198092Srdivacky return PTHFileData(x, y); 350193326Sed } 351193326Sed}; 352193326Sed 353199990Srdivackyclass PTHStringLookupTrait { 354193326Sedpublic: 355198092Srdivacky typedef uint32_t 356193326Sed data_type; 357193326Sed 358193326Sed typedef const std::pair<const char*, unsigned> 359193326Sed external_key_type; 360193326Sed 361193326Sed typedef external_key_type internal_key_type; 362198092Srdivacky 363193326Sed static bool EqualKey(const internal_key_type& a, 364193326Sed const internal_key_type& b) { 365193326Sed return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 366193326Sed : false; 367193326Sed } 368198092Srdivacky 369193326Sed static unsigned ComputeHash(const internal_key_type& a) { 370226633Sdim return llvm::HashString(StringRef(a.first, a.second)); 371193326Sed } 372198092Srdivacky 373193326Sed // This hopefully will just get inlined and removed by the optimizer. 374193326Sed static const internal_key_type& 375193326Sed GetInternalKey(const external_key_type& x) { return x; } 376198092Srdivacky 377193326Sed static std::pair<unsigned, unsigned> 378193326Sed ReadKeyDataLength(const unsigned char*& d) { 379193326Sed return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t)); 380193326Sed } 381198092Srdivacky 382193326Sed static std::pair<const char*, unsigned> 383193326Sed ReadKey(const unsigned char* d, unsigned n) { 384193326Sed assert(n >= 2 && d[n-1] == '\0'); 385193326Sed return std::make_pair((const char*) d, n-1); 386193326Sed } 387198092Srdivacky 388193326Sed static uint32_t ReadData(const internal_key_type& k, const unsigned char* d, 389193326Sed unsigned) { 390193326Sed return ::ReadUnalignedLE32(d); 391193326Sed } 392193326Sed}; 393193326Sed 394198092Srdivacky} // end anonymous namespace 395198092Srdivacky 396193326Sedtypedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; 397193326Sedtypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; 398193326Sed 399193326Sed//===----------------------------------------------------------------------===// 400193326Sed// PTHManager methods. 401193326Sed//===----------------------------------------------------------------------===// 402193326Sed 403193326SedPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 404193326Sed const unsigned char* idDataTable, 405198092Srdivacky IdentifierInfo** perIDCache, 406193326Sed void* stringIdLookup, unsigned numIds, 407193326Sed const unsigned char* spellingBase, 408193326Sed const char* originalSourceFile) 409193326Sed: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 410193326Sed IdDataTable(idDataTable), StringIdLookup(stringIdLookup), 411193326Sed NumIds(numIds), PP(0), SpellingBase(spellingBase), 412193326Sed OriginalSourceFile(originalSourceFile) {} 413193326Sed 414193326SedPTHManager::~PTHManager() { 415193326Sed delete Buf; 416193326Sed delete (PTHFileLookup*) FileLookup; 417193326Sed delete (PTHStringIdLookup*) StringIdLookup; 418193326Sed free(PerIDCache); 419193326Sed} 420193326Sed 421226633Sdimstatic void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) { 422226633Sdim Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg)); 423193326Sed} 424193326Sed 425226633SdimPTHManager *PTHManager::Create(const std::string &file, 426226633Sdim DiagnosticsEngine &Diags) { 427193326Sed // Memory map the PTH file. 428234353Sdim OwningPtr<llvm::MemoryBuffer> File; 429198092Srdivacky 430218893Sdim if (llvm::MemoryBuffer::getFile(file, File)) { 431218893Sdim // FIXME: Add ec.message() to this diag. 432199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 433193326Sed return 0; 434193326Sed } 435198092Srdivacky 436193326Sed // Get the buffer ranges and check if there are at least three 32-bit 437193326Sed // words at the end of the file. 438243830Sdim const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart(); 439243830Sdim const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd(); 440193326Sed 441193326Sed // Check the prologue of the file. 442239462Sdim if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) || 443239462Sdim memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) { 444199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 445193326Sed return 0; 446193326Sed } 447198092Srdivacky 448193326Sed // Read the PTH version. 449239462Sdim const unsigned char *p = BufBeg + (sizeof("cfe-pth")); 450193326Sed unsigned Version = ReadLE32(p); 451198092Srdivacky 452199482Srdivacky if (Version < PTHManager::Version) { 453199482Srdivacky InvalidPTH(Diags, 454198092Srdivacky Version < PTHManager::Version 455193326Sed ? "PTH file uses an older PTH format that is no longer supported" 456193326Sed : "PTH file uses a newer PTH format that cannot be read"); 457193326Sed return 0; 458193326Sed } 459193326Sed 460198092Srdivacky // Compute the address of the index table at the end of the PTH file. 461193326Sed const unsigned char *PrologueOffset = p; 462198092Srdivacky 463193326Sed if (PrologueOffset >= BufEnd) { 464199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 465193326Sed return 0; 466193326Sed } 467198092Srdivacky 468193326Sed // Construct the file lookup table. This will be used for mapping from 469193326Sed // FileEntry*'s to cached tokens. 470193326Sed const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2; 471193326Sed const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset); 472198092Srdivacky 473193326Sed if (!(FileTable > BufBeg && FileTable < BufEnd)) { 474199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 475193326Sed return 0; // FIXME: Proper error diagnostic? 476193326Sed } 477198092Srdivacky 478234353Sdim OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); 479198092Srdivacky 480193326Sed // Warn if the PTH file is empty. We still want to create a PTHManager 481193326Sed // as the PTH could be used with -include-pth. 482193326Sed if (FL->isEmpty()) 483199482Srdivacky InvalidPTH(Diags, "PTH file contains no cached source data"); 484198092Srdivacky 485193326Sed // Get the location of the table mapping from persistent ids to the 486193326Sed // data needed to reconstruct identifiers. 487193326Sed const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0; 488193326Sed const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset); 489198092Srdivacky 490193326Sed if (!(IData >= BufBeg && IData < BufEnd)) { 491199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 492193326Sed return 0; 493193326Sed } 494198092Srdivacky 495193326Sed // Get the location of the hashtable mapping between strings and 496193326Sed // persistent IDs. 497193326Sed const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1; 498193326Sed const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset); 499193326Sed if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) { 500199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 501193326Sed return 0; 502193326Sed } 503193326Sed 504234353Sdim OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, 505193326Sed BufBeg)); 506198092Srdivacky 507193326Sed // Get the location of the spelling cache. 508193326Sed const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3; 509193326Sed const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); 510193326Sed if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) { 511199482Srdivacky Diags.Report(diag::err_invalid_pth_file) << file; 512193326Sed return 0; 513193326Sed } 514198092Srdivacky 515193326Sed // Get the number of IdentifierInfos and pre-allocate the identifier cache. 516193326Sed uint32_t NumIds = ReadLE32(IData); 517198092Srdivacky 518221345Sdim // Pre-allocate the persistent ID -> IdentifierInfo* cache. We use calloc() 519193326Sed // so that we in the best case only zero out memory once when the OS returns 520193326Sed // us new pages. 521193326Sed IdentifierInfo** PerIDCache = 0; 522198092Srdivacky 523193326Sed if (NumIds) { 524198092Srdivacky PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); 525193326Sed if (!PerIDCache) { 526199482Srdivacky InvalidPTH(Diags, "Could not allocate memory for processing PTH file"); 527193326Sed return 0; 528193326Sed } 529193326Sed } 530193326Sed 531193326Sed // Compute the address of the original source file. 532193326Sed const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4; 533193326Sed unsigned len = ReadUnalignedLE16(originalSourceBase); 534198092Srdivacky if (!len) originalSourceBase = 0; 535198092Srdivacky 536193326Sed // Create the new PTHManager. 537193326Sed return new PTHManager(File.take(), FL.take(), IData, PerIDCache, 538193326Sed SL.take(), NumIds, spellingBase, 539193326Sed (const char*) originalSourceBase); 540193326Sed} 541193326Sed 542193326SedIdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { 543193326Sed // Look in the PTH file for the string data for the IdentifierInfo object. 544193326Sed const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID; 545193326Sed const unsigned char* IDData = 546193326Sed (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry); 547193326Sed assert(IDData < (const unsigned char*)Buf->getBufferEnd()); 548198092Srdivacky 549193326Sed // Allocate the object. 550193326Sed std::pair<IdentifierInfo,const unsigned char*> *Mem = 551193326Sed Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); 552193326Sed 553193326Sed Mem->second = IDData; 554193326Sed assert(IDData[0] != '\0'); 555193326Sed IdentifierInfo *II = new ((void*) Mem) IdentifierInfo(); 556198092Srdivacky 557193326Sed // Store the new IdentifierInfo in the cache. 558193326Sed PerIDCache[PersistentID] = II; 559198398Srdivacky assert(II->getNameStart() && II->getNameStart()[0] != '\0'); 560193326Sed return II; 561193326Sed} 562193326Sed 563226633SdimIdentifierInfo* PTHManager::get(StringRef Name) { 564193326Sed PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup); 565193326Sed // Double check our assumption that the last character isn't '\0'. 566226633Sdim assert(Name.empty() || Name.back() != '\0'); 567205219Srdivacky PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(), 568205219Srdivacky Name.size())); 569193326Sed if (I == SL.end()) // No identifier found? 570193326Sed return 0; 571193326Sed 572193326Sed // Match found. Return the identifier! 573193326Sed assert(*I > 0); 574193326Sed return GetIdentifierInfo(*I-1); 575193326Sed} 576193326Sed 577193326SedPTHLexer *PTHManager::CreateLexer(FileID FID) { 578193326Sed const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID); 579193326Sed if (!FE) 580193326Sed return 0; 581198092Srdivacky 582193326Sed // Lookup the FileEntry object in our file lookup data structure. It will 583193326Sed // return a variant that indicates whether or not there is an offset within 584193326Sed // the PTH file that contains cached tokens. 585193326Sed PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); 586193326Sed PTHFileLookup::iterator I = PFL.find(FE); 587198092Srdivacky 588193326Sed if (I == PFL.end()) // No tokens available? 589193326Sed return 0; 590198092Srdivacky 591198092Srdivacky const PTHFileData& FileData = *I; 592198092Srdivacky 593193326Sed const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); 594193326Sed // Compute the offset of the token data within the buffer. 595193326Sed const unsigned char* data = BufStart + FileData.getTokenOffset(); 596193326Sed 597193326Sed // Get the location of pp-conditional table. 598193326Sed const unsigned char* ppcond = BufStart + FileData.getPPCondOffset(); 599193326Sed uint32_t Len = ReadLE32(ppcond); 600193326Sed if (Len == 0) ppcond = 0; 601198092Srdivacky 602193326Sed assert(PP && "No preprocessor set yet!"); 603198092Srdivacky return new PTHLexer(*PP, FID, data, ppcond, *this); 604193326Sed} 605193326Sed 606193326Sed//===----------------------------------------------------------------------===// 607193326Sed// 'stat' caching. 608193326Sed//===----------------------------------------------------------------------===// 609193326Sed 610193326Sednamespace { 611199990Srdivackyclass PTHStatData { 612193326Sedpublic: 613263508Sdim const bool HasData; 614263508Sdim uint64_t Size; 615263508Sdim time_t ModTime; 616263508Sdim llvm::sys::fs::UniqueID UniqueID; 617263508Sdim bool IsDirectory; 618198092Srdivacky 619263508Sdim PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID, 620263508Sdim bool IsDirectory) 621263508Sdim : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID), 622263508Sdim IsDirectory(IsDirectory) {} 623198092Srdivacky 624263508Sdim PTHStatData() : HasData(false) {} 625193326Sed}; 626198092Srdivacky 627199990Srdivackyclass PTHStatLookupTrait : public PTHFileLookupCommonTrait { 628193326Sedpublic: 629193326Sed typedef const char* external_key_type; // const char* 630193326Sed typedef PTHStatData data_type; 631198092Srdivacky 632193326Sed static internal_key_type GetInternalKey(const char *path) { 633193326Sed // The key 'kind' doesn't matter here because it is ignored in EqualKey. 634193326Sed return std::make_pair((unsigned char) 0x0, path); 635193326Sed } 636193326Sed 637193326Sed static bool EqualKey(internal_key_type a, internal_key_type b) { 638193326Sed // When doing 'stat' lookups we don't care about the kind of 'a' and 'b', 639193326Sed // just the paths. 640193326Sed return strcmp(a.second, b.second) == 0; 641198092Srdivacky } 642198092Srdivacky 643193326Sed static data_type ReadData(const internal_key_type& k, const unsigned char* d, 644198092Srdivacky unsigned) { 645198092Srdivacky 646193326Sed if (k.first /* File or Directory */) { 647263508Sdim bool IsDirectory = true; 648263508Sdim if (k.first == 0x1 /* File */) { 649263508Sdim IsDirectory = false; 650263508Sdim d += 4 * 2; // Skip the first 2 words. 651263508Sdim } 652263508Sdim 653263508Sdim uint64_t File = ReadUnalignedLE64(d); 654263508Sdim uint64_t Device = ReadUnalignedLE64(d); 655263508Sdim llvm::sys::fs::UniqueID UniqueID(File, Device); 656263508Sdim time_t ModTime = ReadUnalignedLE64(d); 657263508Sdim uint64_t Size = ReadUnalignedLE64(d); 658263508Sdim return data_type(Size, ModTime, UniqueID, IsDirectory); 659193326Sed } 660193326Sed 661193326Sed // Negative stat. Don't read anything. 662193326Sed return data_type(); 663193326Sed } 664193326Sed}; 665193326Sed 666218893Sdimclass PTHStatCache : public FileSystemStatCache { 667193326Sed typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy; 668193326Sed CacheTy Cache; 669193326Sed 670198092Srdivackypublic: 671193326Sed PTHStatCache(PTHFileLookup &FL) : 672193326Sed Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(), 673193326Sed FL.getBase()) {} 674193326Sed 675193326Sed ~PTHStatCache() {} 676198092Srdivacky 677263508Sdim LookupResult getStat(const char *Path, FileData &Data, bool isFile, 678263508Sdim int *FileDescriptor) { 679193326Sed // Do the lookup for the file's data in the PTH file. 680218893Sdim CacheTy::iterator I = Cache.find(Path); 681193326Sed 682193326Sed // If we don't get a hit in the PTH file just forward to 'stat'. 683205219Srdivacky if (I == Cache.end()) 684263508Sdim return statChained(Path, Data, isFile, FileDescriptor); 685198092Srdivacky 686263508Sdim const PTHStatData &D = *I; 687198092Srdivacky 688263508Sdim if (!D.HasData) 689218893Sdim return CacheMissing; 690193326Sed 691263508Sdim Data.Size = D.Size; 692263508Sdim Data.ModTime = D.ModTime; 693263508Sdim Data.UniqueID = D.UniqueID; 694263508Sdim Data.IsDirectory = D.IsDirectory; 695263508Sdim Data.IsNamedPipe = false; 696263508Sdim Data.InPCH = true; 697263508Sdim 698218893Sdim return CacheExists; 699193326Sed } 700193326Sed}; 701193326Sed} // end anonymous namespace 702193326Sed 703218893SdimFileSystemStatCache *PTHManager::createStatCache() { 704193326Sed return new PTHStatCache(*((PTHFileLookup*) FileLookup)); 705193326Sed} 706