PTHLexer.cpp revision 198398
11556Srgrimes//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 21556Srgrimes// 31556Srgrimes// The LLVM Compiler Infrastructure 41556Srgrimes// 51556Srgrimes// This file is distributed under the University of Illinois Open Source 61556Srgrimes// License. See LICENSE.TXT for details. 71556Srgrimes// 81556Srgrimes//===----------------------------------------------------------------------===// 91556Srgrimes// 101556Srgrimes// This file implements the PTHLexer interface. 111556Srgrimes// 121556Srgrimes//===----------------------------------------------------------------------===// 131556Srgrimes 141556Srgrimes#include "clang/Basic/TokenKinds.h" 151556Srgrimes#include "clang/Basic/FileManager.h" 161556Srgrimes#include "clang/Basic/IdentifierTable.h" 171556Srgrimes#include "clang/Basic/OnDiskHashTable.h" 181556Srgrimes#include "clang/Lex/PTHLexer.h" 191556Srgrimes#include "clang/Lex/Preprocessor.h" 201556Srgrimes#include "clang/Lex/PTHManager.h" 211556Srgrimes#include "clang/Lex/Token.h" 221556Srgrimes#include "clang/Lex/Preprocessor.h" 231556Srgrimes#include "llvm/ADT/OwningPtr.h" 241556Srgrimes#include "llvm/ADT/StringExtras.h" 251556Srgrimes#include "llvm/ADT/StringMap.h" 261556Srgrimes#include "llvm/Support/MemoryBuffer.h" 271556Srgrimes#include <sys/stat.h> 281556Srgrimesusing namespace clang; 291556Srgrimesusing namespace clang::io; 301556Srgrimes 311556Srgrimes#define DISK_TOKEN_SIZE (1+1+2+4+4) 321556Srgrimes 331556Srgrimes//===----------------------------------------------------------------------===// 341556Srgrimes// PTHLexer methods. 351556Srgrimes//===----------------------------------------------------------------------===// 361556Srgrimes 371556SrgrimesPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, 381556Srgrimes const unsigned char *ppcond, PTHManager &PM) 3936049Scharnier : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 4036049Scharnier PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { 4136049Scharnier 4236049Scharnier FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); 4350471Speter} 441556Srgrimes 451556Srgrimesvoid PTHLexer::Lex(Token& Tok) { 461556SrgrimesLexNextToken: 471556Srgrimes 481556Srgrimes //===--------------------------------------==// 491556Srgrimes // Read the raw token data. 501556Srgrimes //===--------------------------------------==// 511556Srgrimes 521556Srgrimes // Shadow CurPtr into an automatic variable. 531556Srgrimes const unsigned char *CurPtrShadow = CurPtr; 541556Srgrimes 551556Srgrimes // Read in the data for the token. 561556Srgrimes unsigned Word0 = ReadLE32(CurPtrShadow); 571556Srgrimes uint32_t IdentifierID = ReadLE32(CurPtrShadow); 581556Srgrimes uint32_t FileOffset = ReadLE32(CurPtrShadow); 591556Srgrimes 601556Srgrimes tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF); 611556Srgrimes Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF); 621556Srgrimes uint32_t Len = Word0 >> 16; 631556Srgrimes 641556Srgrimes CurPtr = CurPtrShadow; 651556Srgrimes 661556Srgrimes //===--------------------------------------==// 671556Srgrimes // Construct the token itself. 681556Srgrimes //===--------------------------------------==// 691556Srgrimes 701556Srgrimes Tok.startToken(); 711556Srgrimes Tok.setKind(TKind); 721556Srgrimes Tok.setFlag(TFlags); 7346684Skris assert(!LexingRawMode); 741556Srgrimes Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset)); 751556Srgrimes Tok.setLength(Len); 761556Srgrimes 771556Srgrimes // Handle identifiers. 781556Srgrimes if (Tok.isLiteral()) { 791556Srgrimes Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID)); 801556Srgrimes } 811556Srgrimes else if (IdentifierID) { 821556Srgrimes MIOpt.ReadToken(); 831556Srgrimes IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); 841556Srgrimes 851556Srgrimes Tok.setIdentifierInfo(II); 861556Srgrimes 871556Srgrimes // Change the kind of this identifier to the appropriate token kind, e.g. 881556Srgrimes // turning "for" into a keyword. 891556Srgrimes Tok.setKind(II->getTokenID()); 901556Srgrimes 911556Srgrimes if (II->isHandleIdentifierCase()) 921556Srgrimes PP->HandleIdentifier(Tok); 931556Srgrimes return; 9476017Skris } 951556Srgrimes 961556Srgrimes //===--------------------------------------==// 971556Srgrimes // Process the token. 981556Srgrimes //===--------------------------------------==// 991556Srgrimes if (TKind == tok::eof) { 1001556Srgrimes // Save the end-of-file token. 1011556Srgrimes EofToken = Tok; 1021556Srgrimes 1031556Srgrimes Preprocessor *PPCache = PP; 1041556Srgrimes 1051556Srgrimes assert(!ParsingPreprocessorDirective); 1061556Srgrimes assert(!LexingRawMode); 1071556Srgrimes 1081556Srgrimes // FIXME: Issue diagnostics similar to Lexer. 1091556Srgrimes if (PP->HandleEndOfFile(Tok, false)) 1101556Srgrimes return; 1111556Srgrimes 1121556Srgrimes assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 1131556Srgrimes return PPCache->Lex(Tok); 1141556Srgrimes } 1151556Srgrimes 1161556Srgrimes if (TKind == tok::hash && Tok.isAtStartOfLine()) { 1171556Srgrimes LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 1181556Srgrimes assert(!LexingRawMode); 1191556Srgrimes PP->HandleDirective(Tok); 1201556Srgrimes 1211556Srgrimes if (PP->isCurrentLexer(this)) 1221556Srgrimes goto LexNextToken; 1231556Srgrimes 1241556Srgrimes return PP->Lex(Tok); 1251556Srgrimes } 12676017Skris 1271556Srgrimes if (TKind == tok::eom) { 1281556Srgrimes assert(ParsingPreprocessorDirective); 1291556Srgrimes ParsingPreprocessorDirective = false; 1301556Srgrimes return; 1311556Srgrimes } 1321556Srgrimes 1331556Srgrimes MIOpt.ReadToken(); 13476017Skris} 1351556Srgrimes 1361556Srgrimes// FIXME: We can just grab the last token instead of storing a copy 1371556Srgrimes// into EofToken. 1381556Srgrimesvoid PTHLexer::getEOF(Token& Tok) { 1391556Srgrimes assert(EofToken.is(tok::eof)); 1401556Srgrimes Tok = EofToken; 1411556Srgrimes} 1421556Srgrimes 1431556Srgrimesvoid PTHLexer::DiscardToEndOfLine() { 1441556Srgrimes assert(ParsingPreprocessorDirective && ParsingFilename == false && 1451556Srgrimes "Must be in a preprocessing directive!"); 1461556Srgrimes 1471556Srgrimes // We assume that if the preprocessor wishes to discard to the end of 1481556Srgrimes // the line that it also means to end the current preprocessor directive. 1491556Srgrimes ParsingPreprocessorDirective = false; 1501556Srgrimes 1511556Srgrimes // Skip tokens by only peeking at their token kind and the flags. 1521556Srgrimes // We don't need to actually reconstruct full tokens from the token buffer. 15376017Skris // This saves some copies and it also reduces IdentifierInfo* lookup. 1541556Srgrimes const unsigned char* p = CurPtr; 15576351Skris while (1) { 1561556Srgrimes // Read the token kind. Are we at the end of the file? 1571556Srgrimes tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 15876351Skris if (x == tok::eof) break; 1591556Srgrimes 16076351Skris // Read the token flags. Are we at the start of the next line? 1611556Srgrimes Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 1621556Srgrimes if (y & Token::StartOfLine) break; 1631556Srgrimes 1641556Srgrimes // Skip to the next token. 1651556Srgrimes p += DISK_TOKEN_SIZE; 1661556Srgrimes } 1671556Srgrimes 1681556Srgrimes CurPtr = p; 1691556Srgrimes} 17076017Skris 1711556Srgrimes/// SkipBlock - Used by Preprocessor to skip the current conditional block. 1721556Srgrimesbool PTHLexer::SkipBlock() { 1731556Srgrimes assert(CurPPCondPtr && "No cached PP conditional information."); 1741556Srgrimes assert(LastHashTokPtr && "No known '#' token."); 1751556Srgrimes 1761556Srgrimes const unsigned char* HashEntryI = 0; 1771556Srgrimes uint32_t Offset; 1781556Srgrimes uint32_t TableIdx; 1791556Srgrimes 18076017Skris do { 1811556Srgrimes // Read the token offset from the side-table. 1821556Srgrimes Offset = ReadLE32(CurPPCondPtr); 1831556Srgrimes 1841556Srgrimes // Read the target table index from the side-table. 1851556Srgrimes TableIdx = ReadLE32(CurPPCondPtr); 1861556Srgrimes 1871556Srgrimes // Compute the actual memory address of the '#' token data for this entry. 18876351Skris HashEntryI = TokBuf + Offset; 1891556Srgrimes 1901556Srgrimes // Optmization: "Sibling jumping". #if...#else...#endif blocks can 1911556Srgrimes // contain nested blocks. In the side-table we can jump over these 1921556Srgrimes // nested blocks instead of doing a linear search if the next "sibling" 1931556Srgrimes // entry is not at a location greater than LastHashTokPtr. 1941556Srgrimes if (HashEntryI < LastHashTokPtr && TableIdx) { 1951556Srgrimes // In the side-table we are still at an entry for a '#' token that 1961556Srgrimes // is earlier than the last one we saw. Check if the location we would 1971556Srgrimes // stride gets us closer. 1981556Srgrimes const unsigned char* NextPPCondPtr = 1991556Srgrimes PPCond + TableIdx*(sizeof(uint32_t)*2); 2001556Srgrimes assert(NextPPCondPtr >= CurPPCondPtr); 2011556Srgrimes // Read where we should jump to. 2021556Srgrimes uint32_t TmpOffset = ReadLE32(NextPPCondPtr); 2031556Srgrimes const unsigned char* HashEntryJ = TokBuf + TmpOffset; 2041556Srgrimes 20576017Skris if (HashEntryJ <= LastHashTokPtr) { 2061556Srgrimes // Jump directly to the next entry in the side table. 2071556Srgrimes HashEntryI = HashEntryJ; 2081556Srgrimes Offset = TmpOffset; 2091556Srgrimes TableIdx = ReadLE32(NextPPCondPtr); 2101556Srgrimes CurPPCondPtr = NextPPCondPtr; 2111556Srgrimes } 2121556Srgrimes } 2131556Srgrimes } 2141556Srgrimes while (HashEntryI < LastHashTokPtr); 2151556Srgrimes assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 2161556Srgrimes assert(TableIdx && "No jumping from #endifs."); 2171556Srgrimes 2181556Srgrimes // Update our side-table iterator. 2198855Srgrimes const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 2201556Srgrimes assert(NextPPCondPtr >= CurPPCondPtr); 2211556Srgrimes CurPPCondPtr = NextPPCondPtr; 2221556Srgrimes 2231556Srgrimes // Read where we should jump to. 2241556Srgrimes HashEntryI = TokBuf + ReadLE32(NextPPCondPtr); 2251556Srgrimes uint32_t NextIdx = ReadLE32(NextPPCondPtr); 2261556Srgrimes 2271556Srgrimes // By construction NextIdx will be zero if this is a #endif. This is useful 2281556Srgrimes // to know to obviate lexing another token. 2291556Srgrimes bool isEndif = NextIdx == 0; 2301556Srgrimes 2311556Srgrimes // This case can occur when we see something like this: 2321556Srgrimes // 2331556Srgrimes // #if ... 2341556Srgrimes // /* a comment or nothing */ 2351556Srgrimes // #elif 2361556Srgrimes // 2371556Srgrimes // If we are skipping the first #if block it will be the case that CurPtr 2381556Srgrimes // already points 'elif'. Just return. 2391556Srgrimes 2401556Srgrimes if (CurPtr > HashEntryI) { 2411556Srgrimes assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 2421556Srgrimes // Did we reach a #endif? If so, go ahead and consume that token as well. 2431556Srgrimes if (isEndif) 24476017Skris CurPtr += DISK_TOKEN_SIZE*2; 2451556Srgrimes else 2461556Srgrimes LastHashTokPtr = HashEntryI; 2471556Srgrimes 2481556Srgrimes return isEndif; 2491556Srgrimes } 2501556Srgrimes 2511556Srgrimes // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 2521556Srgrimes CurPtr = HashEntryI; 2531556Srgrimes 2541556Srgrimes // Update the location of the last observed '#'. This is useful if we 2551556Srgrimes // are skipping multiple blocks. 2561556Srgrimes LastHashTokPtr = CurPtr; 2571556Srgrimes 2581556Srgrimes // Skip the '#' token. 2591556Srgrimes assert(((tok::TokenKind)*CurPtr) == tok::hash); 2601556Srgrimes CurPtr += DISK_TOKEN_SIZE; 2611556Srgrimes 2621556Srgrimes // Did we reach a #endif? If so, go ahead and consume that token as well. 2631556Srgrimes if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 2641556Srgrimes 2651556Srgrimes return isEndif; 26676351Skris} 2671556Srgrimes 2681556SrgrimesSourceLocation PTHLexer::getSourceLocation() { 26976017Skris // getSourceLocation is not on the hot path. It is used to get the location 2701556Srgrimes // of the next token when transitioning back to this lexer when done 2711556Srgrimes // handling a #included file. Just read the necessary data from the token 2721556Srgrimes // data buffer to construct the SourceLocation object. 2731556Srgrimes // NOTE: This is a virtual function; hence it is defined out-of-line. 2741556Srgrimes const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4); 2751556Srgrimes uint32_t Offset = ReadLE32(OffsetPtr); 2761556Srgrimes return FileStartLoc.getFileLocWithOffset(Offset); 2771556Srgrimes} 2781556Srgrimes 2791556Srgrimes//===----------------------------------------------------------------------===// 2801556Srgrimes// PTH file lookup: map from strings to file data. 2811556Srgrimes//===----------------------------------------------------------------------===// 2821556Srgrimes 2831556Srgrimes/// PTHFileLookup - This internal data structure is used by the PTHManager 2841556Srgrimes/// to map from FileEntry objects managed by FileManager to offsets within 2851556Srgrimes/// the PTH file. 28676017Skrisnamespace { 2871556Srgrimesclass VISIBILITY_HIDDEN PTHFileData { 2881556Srgrimes const uint32_t TokenOff; 2891556Srgrimes const uint32_t PPCondOff; 2901556Srgrimespublic: 2911556Srgrimes PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) 2921556Srgrimes : TokenOff(tokenOff), PPCondOff(ppCondOff) {} 2931556Srgrimes 2941556Srgrimes uint32_t getTokenOffset() const { return TokenOff; } 2951556Srgrimes uint32_t getPPCondOffset() const { return PPCondOff; } 2961556Srgrimes}; 2971556Srgrimes 2981556Srgrimes 2991556Srgrimesclass VISIBILITY_HIDDEN PTHFileLookupCommonTrait { 3001556Srgrimespublic: 3011556Srgrimes typedef std::pair<unsigned char, const char*> internal_key_type; 3021556Srgrimes 3031556Srgrimes static unsigned ComputeHash(internal_key_type x) { 3041556Srgrimes return llvm::HashString(x.second); 3051556Srgrimes } 3061556Srgrimes 3071556Srgrimes static std::pair<unsigned, unsigned> 3081556Srgrimes ReadKeyDataLength(const unsigned char*& d) { 3091556Srgrimes unsigned keyLen = (unsigned) ReadUnalignedLE16(d); 3101556Srgrimes unsigned dataLen = (unsigned) *(d++); 3111556Srgrimes return std::make_pair(keyLen, dataLen); 3128855Srgrimes } 3131556Srgrimes 3141556Srgrimes static internal_key_type ReadKey(const unsigned char* d, unsigned) { 3151556Srgrimes unsigned char k = *(d++); // Read the entry kind. 3161556Srgrimes return std::make_pair(k, (const char*) d); 3171556Srgrimes } 3181556Srgrimes}; 3191556Srgrimes 32076017Skrisclass VISIBILITY_HIDDEN PTHFileLookupTrait : public PTHFileLookupCommonTrait { 3211556Srgrimespublic: 3221556Srgrimes typedef const FileEntry* external_key_type; 3231556Srgrimes typedef PTHFileData data_type; 3241556Srgrimes 3251556Srgrimes static internal_key_type GetInternalKey(const FileEntry* FE) { 32676351Skris return std::make_pair((unsigned char) 0x1, FE->getName()); 32776351Skris } 32876351Skris 32976351Skris static bool EqualKey(internal_key_type a, internal_key_type b) { 33076351Skris return a.first == b.first && strcmp(a.second, b.second) == 0; 33176351Skris } 33276351Skris 33376351Skris static PTHFileData ReadData(const internal_key_type& k, 33476351Skris const unsigned char* d, unsigned) { 33576351Skris assert(k.first == 0x1 && "Only file lookups can match!"); 33676351Skris uint32_t x = ::ReadUnalignedLE32(d); 33776351Skris uint32_t y = ::ReadUnalignedLE32(d); 33876351Skris return PTHFileData(x, y); 33976351Skris } 34076351Skris}; 3411556Srgrimes 3421556Srgrimesclass VISIBILITY_HIDDEN PTHStringLookupTrait { 3431556Srgrimespublic: 3441556Srgrimes typedef uint32_t 3451556Srgrimes data_type; 3461556Srgrimes 34746684Skris typedef const std::pair<const char*, unsigned> 3481556Srgrimes external_key_type; 3491556Srgrimes 3501556Srgrimes typedef external_key_type internal_key_type; 3511556Srgrimes 3521556Srgrimes static bool EqualKey(const internal_key_type& a, 3531556Srgrimes const internal_key_type& b) { 3541556Srgrimes return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 3551556Srgrimes : false; 3561556Srgrimes } 3571556Srgrimes 3581556Srgrimes static unsigned ComputeHash(const internal_key_type& a) { 3591556Srgrimes return llvm::HashString(llvm::StringRef(a.first, a.second)); 3601556Srgrimes } 3611556Srgrimes 3621556Srgrimes // This hopefully will just get inlined and removed by the optimizer. 3631556Srgrimes static const internal_key_type& 3641556Srgrimes GetInternalKey(const external_key_type& x) { return x; } 36576017Skris 3661556Srgrimes static std::pair<unsigned, unsigned> 3671556Srgrimes ReadKeyDataLength(const unsigned char*& d) { 3681556Srgrimes return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t)); 3691556Srgrimes } 3701556Srgrimes 3711556Srgrimes static std::pair<const char*, unsigned> 3721556Srgrimes ReadKey(const unsigned char* d, unsigned n) { 3731556Srgrimes assert(n >= 2 && d[n-1] == '\0'); 3741556Srgrimes return std::make_pair((const char*) d, n-1); 3751556Srgrimes } 3761556Srgrimes 3771556Srgrimes static uint32_t ReadData(const internal_key_type& k, const unsigned char* d, 3781556Srgrimes unsigned) { 3791556Srgrimes return ::ReadUnalignedLE32(d); 3801556Srgrimes } 3818855Srgrimes}; 3821556Srgrimes 3831556Srgrimes} // end anonymous namespace 3841556Srgrimes 3851556Srgrimestypedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; 3861556Srgrimestypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; 3871556Srgrimes 3881556Srgrimes//===----------------------------------------------------------------------===// 3891556Srgrimes// PTHManager methods. 3901556Srgrimes//===----------------------------------------------------------------------===// 3911556Srgrimes 3921556SrgrimesPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 3931556Srgrimes const unsigned char* idDataTable, 3941556Srgrimes IdentifierInfo** perIDCache, 3951556Srgrimes void* stringIdLookup, unsigned numIds, 3961556Srgrimes const unsigned char* spellingBase, 3971556Srgrimes const char* originalSourceFile) 3981556Srgrimes: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 3991556Srgrimes IdDataTable(idDataTable), StringIdLookup(stringIdLookup), 4001556Srgrimes NumIds(numIds), PP(0), SpellingBase(spellingBase), 4011556Srgrimes OriginalSourceFile(originalSourceFile) {} 4021556Srgrimes 4031556SrgrimesPTHManager::~PTHManager() { 4041556Srgrimes delete Buf; 4051556Srgrimes delete (PTHFileLookup*) FileLookup; 4061556Srgrimes delete (PTHStringIdLookup*) StringIdLookup; 4071556Srgrimes free(PerIDCache); 4081556Srgrimes} 4091556Srgrimes 4101556Srgrimesstatic void InvalidPTH(Diagnostic *Diags, Diagnostic::Level level, 4111556Srgrimes const char* Msg = 0) { 4121556Srgrimes if (!Diags) return; 4131556Srgrimes if (!Msg) Msg = "Invalid or corrupted PTH file"; 4141556Srgrimes unsigned DiagID = Diags->getCustomDiagID(level, Msg); 4151556Srgrimes Diags->Report(FullSourceLoc(), DiagID); 4161556Srgrimes} 4171556Srgrimes 4181556SrgrimesPTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, 4191556Srgrimes Diagnostic::Level level) { 4201556Srgrimes // Memory map the PTH file. 4211556Srgrimes llvm::OwningPtr<llvm::MemoryBuffer> 4221556Srgrimes File(llvm::MemoryBuffer::getFile(file.c_str())); 4231556Srgrimes 4241556Srgrimes if (!File) { 4251556Srgrimes if (Diags) { 4261556Srgrimes unsigned DiagID = Diags->getCustomDiagID(level, 4271556Srgrimes "PTH file %0 could not be read"); 4281556Srgrimes Diags->Report(FullSourceLoc(), DiagID) << file; 4291556Srgrimes } 4301556Srgrimes 4311556Srgrimes return 0; 4321556Srgrimes } 4331556Srgrimes 4341556Srgrimes // Get the buffer ranges and check if there are at least three 32-bit 4351556Srgrimes // words at the end of the file. 4361556Srgrimes const unsigned char* BufBeg = (unsigned char*)File->getBufferStart(); 4371556Srgrimes const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd(); 4381556Srgrimes 4391556Srgrimes // Check the prologue of the file. 4401556Srgrimes if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) || 4411556Srgrimes memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) { 4421556Srgrimes InvalidPTH(Diags, level); 44376017Skris return 0; 4441556Srgrimes } 4451556Srgrimes 4461556Srgrimes // Read the PTH version. 44776017Skris const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1); 4481556Srgrimes unsigned Version = ReadLE32(p); 44976017Skris 4501556Srgrimes if (Version != PTHManager::Version) { 4511556Srgrimes InvalidPTH(Diags, level, 4521556Srgrimes Version < PTHManager::Version 4531556Srgrimes ? "PTH file uses an older PTH format that is no longer supported" 4541556Srgrimes : "PTH file uses a newer PTH format that cannot be read"); 45576017Skris return 0; 4561556Srgrimes } 45776017Skris 4581556Srgrimes // Compute the address of the index table at the end of the PTH file. 4591556Srgrimes const unsigned char *PrologueOffset = p; 4601556Srgrimes 4611556Srgrimes if (PrologueOffset >= BufEnd) { 4621556Srgrimes InvalidPTH(Diags, level); 4631556Srgrimes return 0; 46476017Skris } 4651556Srgrimes 46676017Skris // Construct the file lookup table. This will be used for mapping from 4671556Srgrimes // FileEntry*'s to cached tokens. 4681556Srgrimes const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2; 4691556Srgrimes const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset); 4701556Srgrimes 4711556Srgrimes if (!(FileTable > BufBeg && FileTable < BufEnd)) { 4721556Srgrimes InvalidPTH(Diags, level); 4731556Srgrimes return 0; // FIXME: Proper error diagnostic? 4741556Srgrimes } 4751556Srgrimes 4761556Srgrimes llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); 4771556Srgrimes 4781556Srgrimes // Warn if the PTH file is empty. We still want to create a PTHManager 4791556Srgrimes // as the PTH could be used with -include-pth. 4801556Srgrimes if (FL->isEmpty()) 4811556Srgrimes InvalidPTH(Diags, level, "PTH file contains no cached source data"); 4821556Srgrimes 4831556Srgrimes // Get the location of the table mapping from persistent ids to the 4841556Srgrimes // data needed to reconstruct identifiers. 4851556Srgrimes const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0; 4861556Srgrimes const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset); 4871556Srgrimes 4881556Srgrimes if (!(IData >= BufBeg && IData < BufEnd)) { 4891556Srgrimes InvalidPTH(Diags, level); 4901556Srgrimes return 0; 4911556Srgrimes } 4921556Srgrimes 4931556Srgrimes // Get the location of the hashtable mapping between strings and 4941556Srgrimes // persistent IDs. 4951556Srgrimes const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1; 4961556Srgrimes const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset); 4971556Srgrimes if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) { 4981556Srgrimes InvalidPTH(Diags, level); 4991556Srgrimes return 0; 5001556Srgrimes } 5011556Srgrimes 5021556Srgrimes llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, 5031556Srgrimes BufBeg)); 5041556Srgrimes 5051556Srgrimes // Get the location of the spelling cache. 5061556Srgrimes const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3; 5071556Srgrimes const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); 5081556Srgrimes if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) { 5091556Srgrimes InvalidPTH(Diags, level); 5101556Srgrimes return 0; 5111556Srgrimes } 5121556Srgrimes 5131556Srgrimes // Get the number of IdentifierInfos and pre-allocate the identifier cache. 5141556Srgrimes uint32_t NumIds = ReadLE32(IData); 5151556Srgrimes 5161556Srgrimes // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 5171556Srgrimes // so that we in the best case only zero out memory once when the OS returns 5181556Srgrimes // us new pages. 5191556Srgrimes IdentifierInfo** PerIDCache = 0; 5201556Srgrimes 5211556Srgrimes if (NumIds) { 5221556Srgrimes PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); 5231556Srgrimes if (!PerIDCache) { 5241556Srgrimes InvalidPTH(Diags, level, 5251556Srgrimes "Could not allocate memory for processing PTH file"); 5261556Srgrimes return 0; 52736784Simp } 52876017Skris } 5291556Srgrimes 5301556Srgrimes // Compute the address of the original source file. 5311556Srgrimes const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4; 5321556Srgrimes unsigned len = ReadUnalignedLE16(originalSourceBase); 5331556Srgrimes if (!len) originalSourceBase = 0; 53446684Skris 5351556Srgrimes // Create the new PTHManager. 5361556Srgrimes return new PTHManager(File.take(), FL.take(), IData, PerIDCache, 5371556Srgrimes SL.take(), NumIds, spellingBase, 5381556Srgrimes (const char*) originalSourceBase); 5391556Srgrimes} 5401556Srgrimes 5411556SrgrimesIdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { 5421556Srgrimes // Look in the PTH file for the string data for the IdentifierInfo object. 5431556Srgrimes const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID; 5441556Srgrimes const unsigned char* IDData = 5451556Srgrimes (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry); 5461556Srgrimes assert(IDData < (const unsigned char*)Buf->getBufferEnd()); 5471556Srgrimes 5481556Srgrimes // Allocate the object. 5491556Srgrimes std::pair<IdentifierInfo,const unsigned char*> *Mem = 5501556Srgrimes Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); 5511556Srgrimes 5521556Srgrimes Mem->second = IDData; 5531556Srgrimes assert(IDData[0] != '\0'); 5541556Srgrimes IdentifierInfo *II = new ((void*) Mem) IdentifierInfo(); 5551556Srgrimes 5561556Srgrimes // Store the new IdentifierInfo in the cache. 55776351Skris PerIDCache[PersistentID] = II; 5581556Srgrimes assert(II->getNameStart() && II->getNameStart()[0] != '\0'); 5591556Srgrimes return II; 5601556Srgrimes} 5611556Srgrimes 562IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) { 563 PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup); 564 // Double check our assumption that the last character isn't '\0'. 565 assert(NameEnd==NameStart || NameStart[NameEnd-NameStart-1] != '\0'); 566 PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart, 567 NameEnd - NameStart)); 568 if (I == SL.end()) // No identifier found? 569 return 0; 570 571 // Match found. Return the identifier! 572 assert(*I > 0); 573 return GetIdentifierInfo(*I-1); 574} 575 576PTHLexer *PTHManager::CreateLexer(FileID FID) { 577 const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID); 578 if (!FE) 579 return 0; 580 581 // Lookup the FileEntry object in our file lookup data structure. It will 582 // return a variant that indicates whether or not there is an offset within 583 // the PTH file that contains cached tokens. 584 PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); 585 PTHFileLookup::iterator I = PFL.find(FE); 586 587 if (I == PFL.end()) // No tokens available? 588 return 0; 589 590 const PTHFileData& FileData = *I; 591 592 const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); 593 // Compute the offset of the token data within the buffer. 594 const unsigned char* data = BufStart + FileData.getTokenOffset(); 595 596 // Get the location of pp-conditional table. 597 const unsigned char* ppcond = BufStart + FileData.getPPCondOffset(); 598 uint32_t Len = ReadLE32(ppcond); 599 if (Len == 0) ppcond = 0; 600 601 assert(PP && "No preprocessor set yet!"); 602 return new PTHLexer(*PP, FID, data, ppcond, *this); 603} 604 605//===----------------------------------------------------------------------===// 606// 'stat' caching. 607//===----------------------------------------------------------------------===// 608 609namespace { 610class VISIBILITY_HIDDEN PTHStatData { 611public: 612 const bool hasStat; 613 const ino_t ino; 614 const dev_t dev; 615 const mode_t mode; 616 const time_t mtime; 617 const off_t size; 618 619 PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s) 620 : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {} 621 622 PTHStatData() 623 : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {} 624}; 625 626class VISIBILITY_HIDDEN PTHStatLookupTrait : public PTHFileLookupCommonTrait { 627public: 628 typedef const char* external_key_type; // const char* 629 typedef PTHStatData data_type; 630 631 static internal_key_type GetInternalKey(const char *path) { 632 // The key 'kind' doesn't matter here because it is ignored in EqualKey. 633 return std::make_pair((unsigned char) 0x0, path); 634 } 635 636 static bool EqualKey(internal_key_type a, internal_key_type b) { 637 // When doing 'stat' lookups we don't care about the kind of 'a' and 'b', 638 // just the paths. 639 return strcmp(a.second, b.second) == 0; 640 } 641 642 static data_type ReadData(const internal_key_type& k, const unsigned char* d, 643 unsigned) { 644 645 if (k.first /* File or Directory */) { 646 if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words. 647 ino_t ino = (ino_t) ReadUnalignedLE32(d); 648 dev_t dev = (dev_t) ReadUnalignedLE32(d); 649 mode_t mode = (mode_t) ReadUnalignedLE16(d); 650 time_t mtime = (time_t) ReadUnalignedLE64(d); 651 return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d)); 652 } 653 654 // Negative stat. Don't read anything. 655 return data_type(); 656 } 657}; 658 659class VISIBILITY_HIDDEN PTHStatCache : public StatSysCallCache { 660 typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy; 661 CacheTy Cache; 662 663public: 664 PTHStatCache(PTHFileLookup &FL) : 665 Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(), 666 FL.getBase()) {} 667 668 ~PTHStatCache() {} 669 670 int stat(const char *path, struct stat *buf) { 671 // Do the lookup for the file's data in the PTH file. 672 CacheTy::iterator I = Cache.find(path); 673 674 // If we don't get a hit in the PTH file just forward to 'stat'. 675 if (I == Cache.end()) 676 return StatSysCallCache::stat(path, buf); 677 678 const PTHStatData& Data = *I; 679 680 if (!Data.hasStat) 681 return 1; 682 683 buf->st_ino = Data.ino; 684 buf->st_dev = Data.dev; 685 buf->st_mtime = Data.mtime; 686 buf->st_mode = Data.mode; 687 buf->st_size = Data.size; 688 return 0; 689 } 690}; 691} // end anonymous namespace 692 693StatSysCallCache *PTHManager::createStatCache() { 694 return new PTHStatCache(*((PTHFileLookup*) FileLookup)); 695} 696