PTHLexer.cpp revision 198398
11556Srgrimes//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
21556Srgrimes//
31556Srgrimes//                     The LLVM Compiler Infrastructure
41556Srgrimes//
51556Srgrimes// This file is distributed under the University of Illinois Open Source
61556Srgrimes// License. See LICENSE.TXT for details.
71556Srgrimes//
81556Srgrimes//===----------------------------------------------------------------------===//
91556Srgrimes//
101556Srgrimes// This file implements the PTHLexer interface.
111556Srgrimes//
121556Srgrimes//===----------------------------------------------------------------------===//
131556Srgrimes
141556Srgrimes#include "clang/Basic/TokenKinds.h"
151556Srgrimes#include "clang/Basic/FileManager.h"
161556Srgrimes#include "clang/Basic/IdentifierTable.h"
171556Srgrimes#include "clang/Basic/OnDiskHashTable.h"
181556Srgrimes#include "clang/Lex/PTHLexer.h"
191556Srgrimes#include "clang/Lex/Preprocessor.h"
201556Srgrimes#include "clang/Lex/PTHManager.h"
211556Srgrimes#include "clang/Lex/Token.h"
221556Srgrimes#include "clang/Lex/Preprocessor.h"
231556Srgrimes#include "llvm/ADT/OwningPtr.h"
241556Srgrimes#include "llvm/ADT/StringExtras.h"
251556Srgrimes#include "llvm/ADT/StringMap.h"
261556Srgrimes#include "llvm/Support/MemoryBuffer.h"
271556Srgrimes#include <sys/stat.h>
281556Srgrimesusing namespace clang;
291556Srgrimesusing namespace clang::io;
301556Srgrimes
311556Srgrimes#define DISK_TOKEN_SIZE (1+1+2+4+4)
321556Srgrimes
331556Srgrimes//===----------------------------------------------------------------------===//
341556Srgrimes// PTHLexer methods.
351556Srgrimes//===----------------------------------------------------------------------===//
361556Srgrimes
371556SrgrimesPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
381556Srgrimes                   const unsigned char *ppcond, PTHManager &PM)
3936049Scharnier  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
4036049Scharnier    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
4136049Scharnier
4236049Scharnier  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
4350471Speter}
441556Srgrimes
451556Srgrimesvoid PTHLexer::Lex(Token& Tok) {
461556SrgrimesLexNextToken:
471556Srgrimes
481556Srgrimes  //===--------------------------------------==//
491556Srgrimes  // Read the raw token data.
501556Srgrimes  //===--------------------------------------==//
511556Srgrimes
521556Srgrimes  // Shadow CurPtr into an automatic variable.
531556Srgrimes  const unsigned char *CurPtrShadow = CurPtr;
541556Srgrimes
551556Srgrimes  // Read in the data for the token.
561556Srgrimes  unsigned Word0 = ReadLE32(CurPtrShadow);
571556Srgrimes  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
581556Srgrimes  uint32_t FileOffset = ReadLE32(CurPtrShadow);
591556Srgrimes
601556Srgrimes  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
611556Srgrimes  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
621556Srgrimes  uint32_t Len = Word0 >> 16;
631556Srgrimes
641556Srgrimes  CurPtr = CurPtrShadow;
651556Srgrimes
661556Srgrimes  //===--------------------------------------==//
671556Srgrimes  // Construct the token itself.
681556Srgrimes  //===--------------------------------------==//
691556Srgrimes
701556Srgrimes  Tok.startToken();
711556Srgrimes  Tok.setKind(TKind);
721556Srgrimes  Tok.setFlag(TFlags);
7346684Skris  assert(!LexingRawMode);
741556Srgrimes  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
751556Srgrimes  Tok.setLength(Len);
761556Srgrimes
771556Srgrimes  // Handle identifiers.
781556Srgrimes  if (Tok.isLiteral()) {
791556Srgrimes    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
801556Srgrimes  }
811556Srgrimes  else if (IdentifierID) {
821556Srgrimes    MIOpt.ReadToken();
831556Srgrimes    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
841556Srgrimes
851556Srgrimes    Tok.setIdentifierInfo(II);
861556Srgrimes
871556Srgrimes    // Change the kind of this identifier to the appropriate token kind, e.g.
881556Srgrimes    // turning "for" into a keyword.
891556Srgrimes    Tok.setKind(II->getTokenID());
901556Srgrimes
911556Srgrimes    if (II->isHandleIdentifierCase())
921556Srgrimes      PP->HandleIdentifier(Tok);
931556Srgrimes    return;
9476017Skris  }
951556Srgrimes
961556Srgrimes  //===--------------------------------------==//
971556Srgrimes  // Process the token.
981556Srgrimes  //===--------------------------------------==//
991556Srgrimes  if (TKind == tok::eof) {
1001556Srgrimes    // Save the end-of-file token.
1011556Srgrimes    EofToken = Tok;
1021556Srgrimes
1031556Srgrimes    Preprocessor *PPCache = PP;
1041556Srgrimes
1051556Srgrimes    assert(!ParsingPreprocessorDirective);
1061556Srgrimes    assert(!LexingRawMode);
1071556Srgrimes
1081556Srgrimes    // FIXME: Issue diagnostics similar to Lexer.
1091556Srgrimes    if (PP->HandleEndOfFile(Tok, false))
1101556Srgrimes      return;
1111556Srgrimes
1121556Srgrimes    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
1131556Srgrimes    return PPCache->Lex(Tok);
1141556Srgrimes  }
1151556Srgrimes
1161556Srgrimes  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
1171556Srgrimes    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
1181556Srgrimes    assert(!LexingRawMode);
1191556Srgrimes    PP->HandleDirective(Tok);
1201556Srgrimes
1211556Srgrimes    if (PP->isCurrentLexer(this))
1221556Srgrimes      goto LexNextToken;
1231556Srgrimes
1241556Srgrimes    return PP->Lex(Tok);
1251556Srgrimes  }
12676017Skris
1271556Srgrimes  if (TKind == tok::eom) {
1281556Srgrimes    assert(ParsingPreprocessorDirective);
1291556Srgrimes    ParsingPreprocessorDirective = false;
1301556Srgrimes    return;
1311556Srgrimes  }
1321556Srgrimes
1331556Srgrimes  MIOpt.ReadToken();
13476017Skris}
1351556Srgrimes
1361556Srgrimes// FIXME: We can just grab the last token instead of storing a copy
1371556Srgrimes// into EofToken.
1381556Srgrimesvoid PTHLexer::getEOF(Token& Tok) {
1391556Srgrimes  assert(EofToken.is(tok::eof));
1401556Srgrimes  Tok = EofToken;
1411556Srgrimes}
1421556Srgrimes
1431556Srgrimesvoid PTHLexer::DiscardToEndOfLine() {
1441556Srgrimes  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
1451556Srgrimes         "Must be in a preprocessing directive!");
1461556Srgrimes
1471556Srgrimes  // We assume that if the preprocessor wishes to discard to the end of
1481556Srgrimes  // the line that it also means to end the current preprocessor directive.
1491556Srgrimes  ParsingPreprocessorDirective = false;
1501556Srgrimes
1511556Srgrimes  // Skip tokens by only peeking at their token kind and the flags.
1521556Srgrimes  // We don't need to actually reconstruct full tokens from the token buffer.
15376017Skris  // This saves some copies and it also reduces IdentifierInfo* lookup.
1541556Srgrimes  const unsigned char* p = CurPtr;
15576351Skris  while (1) {
1561556Srgrimes    // Read the token kind.  Are we at the end of the file?
1571556Srgrimes    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
15876351Skris    if (x == tok::eof) break;
1591556Srgrimes
16076351Skris    // Read the token flags.  Are we at the start of the next line?
1611556Srgrimes    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
1621556Srgrimes    if (y & Token::StartOfLine) break;
1631556Srgrimes
1641556Srgrimes    // Skip to the next token.
1651556Srgrimes    p += DISK_TOKEN_SIZE;
1661556Srgrimes  }
1671556Srgrimes
1681556Srgrimes  CurPtr = p;
1691556Srgrimes}
17076017Skris
1711556Srgrimes/// SkipBlock - Used by Preprocessor to skip the current conditional block.
1721556Srgrimesbool PTHLexer::SkipBlock() {
1731556Srgrimes  assert(CurPPCondPtr && "No cached PP conditional information.");
1741556Srgrimes  assert(LastHashTokPtr && "No known '#' token.");
1751556Srgrimes
1761556Srgrimes  const unsigned char* HashEntryI = 0;
1771556Srgrimes  uint32_t Offset;
1781556Srgrimes  uint32_t TableIdx;
1791556Srgrimes
18076017Skris  do {
1811556Srgrimes    // Read the token offset from the side-table.
1821556Srgrimes    Offset = ReadLE32(CurPPCondPtr);
1831556Srgrimes
1841556Srgrimes    // Read the target table index from the side-table.
1851556Srgrimes    TableIdx = ReadLE32(CurPPCondPtr);
1861556Srgrimes
1871556Srgrimes    // Compute the actual memory address of the '#' token data for this entry.
18876351Skris    HashEntryI = TokBuf + Offset;
1891556Srgrimes
1901556Srgrimes    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
1911556Srgrimes    //  contain nested blocks.  In the side-table we can jump over these
1921556Srgrimes    //  nested blocks instead of doing a linear search if the next "sibling"
1931556Srgrimes    //  entry is not at a location greater than LastHashTokPtr.
1941556Srgrimes    if (HashEntryI < LastHashTokPtr && TableIdx) {
1951556Srgrimes      // In the side-table we are still at an entry for a '#' token that
1961556Srgrimes      // is earlier than the last one we saw.  Check if the location we would
1971556Srgrimes      // stride gets us closer.
1981556Srgrimes      const unsigned char* NextPPCondPtr =
1991556Srgrimes        PPCond + TableIdx*(sizeof(uint32_t)*2);
2001556Srgrimes      assert(NextPPCondPtr >= CurPPCondPtr);
2011556Srgrimes      // Read where we should jump to.
2021556Srgrimes      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
2031556Srgrimes      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
2041556Srgrimes
20576017Skris      if (HashEntryJ <= LastHashTokPtr) {
2061556Srgrimes        // Jump directly to the next entry in the side table.
2071556Srgrimes        HashEntryI = HashEntryJ;
2081556Srgrimes        Offset = TmpOffset;
2091556Srgrimes        TableIdx = ReadLE32(NextPPCondPtr);
2101556Srgrimes        CurPPCondPtr = NextPPCondPtr;
2111556Srgrimes      }
2121556Srgrimes    }
2131556Srgrimes  }
2141556Srgrimes  while (HashEntryI < LastHashTokPtr);
2151556Srgrimes  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
2161556Srgrimes  assert(TableIdx && "No jumping from #endifs.");
2171556Srgrimes
2181556Srgrimes  // Update our side-table iterator.
2198855Srgrimes  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
2201556Srgrimes  assert(NextPPCondPtr >= CurPPCondPtr);
2211556Srgrimes  CurPPCondPtr = NextPPCondPtr;
2221556Srgrimes
2231556Srgrimes  // Read where we should jump to.
2241556Srgrimes  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
2251556Srgrimes  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
2261556Srgrimes
2271556Srgrimes  // By construction NextIdx will be zero if this is a #endif.  This is useful
2281556Srgrimes  // to know to obviate lexing another token.
2291556Srgrimes  bool isEndif = NextIdx == 0;
2301556Srgrimes
2311556Srgrimes  // This case can occur when we see something like this:
2321556Srgrimes  //
2331556Srgrimes  //  #if ...
2341556Srgrimes  //   /* a comment or nothing */
2351556Srgrimes  //  #elif
2361556Srgrimes  //
2371556Srgrimes  // If we are skipping the first #if block it will be the case that CurPtr
2381556Srgrimes  // already points 'elif'.  Just return.
2391556Srgrimes
2401556Srgrimes  if (CurPtr > HashEntryI) {
2411556Srgrimes    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
2421556Srgrimes    // Did we reach a #endif?  If so, go ahead and consume that token as well.
2431556Srgrimes    if (isEndif)
24476017Skris      CurPtr += DISK_TOKEN_SIZE*2;
2451556Srgrimes    else
2461556Srgrimes      LastHashTokPtr = HashEntryI;
2471556Srgrimes
2481556Srgrimes    return isEndif;
2491556Srgrimes  }
2501556Srgrimes
2511556Srgrimes  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
2521556Srgrimes  CurPtr = HashEntryI;
2531556Srgrimes
2541556Srgrimes  // Update the location of the last observed '#'.  This is useful if we
2551556Srgrimes  // are skipping multiple blocks.
2561556Srgrimes  LastHashTokPtr = CurPtr;
2571556Srgrimes
2581556Srgrimes  // Skip the '#' token.
2591556Srgrimes  assert(((tok::TokenKind)*CurPtr) == tok::hash);
2601556Srgrimes  CurPtr += DISK_TOKEN_SIZE;
2611556Srgrimes
2621556Srgrimes  // Did we reach a #endif?  If so, go ahead and consume that token as well.
2631556Srgrimes  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
2641556Srgrimes
2651556Srgrimes  return isEndif;
26676351Skris}
2671556Srgrimes
2681556SrgrimesSourceLocation PTHLexer::getSourceLocation() {
26976017Skris  // getSourceLocation is not on the hot path.  It is used to get the location
2701556Srgrimes  // of the next token when transitioning back to this lexer when done
2711556Srgrimes  // handling a #included file.  Just read the necessary data from the token
2721556Srgrimes  // data buffer to construct the SourceLocation object.
2731556Srgrimes  // NOTE: This is a virtual function; hence it is defined out-of-line.
2741556Srgrimes  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
2751556Srgrimes  uint32_t Offset = ReadLE32(OffsetPtr);
2761556Srgrimes  return FileStartLoc.getFileLocWithOffset(Offset);
2771556Srgrimes}
2781556Srgrimes
2791556Srgrimes//===----------------------------------------------------------------------===//
2801556Srgrimes// PTH file lookup: map from strings to file data.
2811556Srgrimes//===----------------------------------------------------------------------===//
2821556Srgrimes
2831556Srgrimes/// PTHFileLookup - This internal data structure is used by the PTHManager
2841556Srgrimes///  to map from FileEntry objects managed by FileManager to offsets within
2851556Srgrimes///  the PTH file.
28676017Skrisnamespace {
2871556Srgrimesclass VISIBILITY_HIDDEN PTHFileData {
2881556Srgrimes  const uint32_t TokenOff;
2891556Srgrimes  const uint32_t PPCondOff;
2901556Srgrimespublic:
2911556Srgrimes  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
2921556Srgrimes    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
2931556Srgrimes
2941556Srgrimes  uint32_t getTokenOffset() const { return TokenOff; }
2951556Srgrimes  uint32_t getPPCondOffset() const { return PPCondOff; }
2961556Srgrimes};
2971556Srgrimes
2981556Srgrimes
2991556Srgrimesclass VISIBILITY_HIDDEN PTHFileLookupCommonTrait {
3001556Srgrimespublic:
3011556Srgrimes  typedef std::pair<unsigned char, const char*> internal_key_type;
3021556Srgrimes
3031556Srgrimes  static unsigned ComputeHash(internal_key_type x) {
3041556Srgrimes    return llvm::HashString(x.second);
3051556Srgrimes  }
3061556Srgrimes
3071556Srgrimes  static std::pair<unsigned, unsigned>
3081556Srgrimes  ReadKeyDataLength(const unsigned char*& d) {
3091556Srgrimes    unsigned keyLen = (unsigned) ReadUnalignedLE16(d);
3101556Srgrimes    unsigned dataLen = (unsigned) *(d++);
3111556Srgrimes    return std::make_pair(keyLen, dataLen);
3128855Srgrimes  }
3131556Srgrimes
3141556Srgrimes  static internal_key_type ReadKey(const unsigned char* d, unsigned) {
3151556Srgrimes    unsigned char k = *(d++); // Read the entry kind.
3161556Srgrimes    return std::make_pair(k, (const char*) d);
3171556Srgrimes  }
3181556Srgrimes};
3191556Srgrimes
32076017Skrisclass VISIBILITY_HIDDEN PTHFileLookupTrait : public PTHFileLookupCommonTrait {
3211556Srgrimespublic:
3221556Srgrimes  typedef const FileEntry* external_key_type;
3231556Srgrimes  typedef PTHFileData      data_type;
3241556Srgrimes
3251556Srgrimes  static internal_key_type GetInternalKey(const FileEntry* FE) {
32676351Skris    return std::make_pair((unsigned char) 0x1, FE->getName());
32776351Skris  }
32876351Skris
32976351Skris  static bool EqualKey(internal_key_type a, internal_key_type b) {
33076351Skris    return a.first == b.first && strcmp(a.second, b.second) == 0;
33176351Skris  }
33276351Skris
33376351Skris  static PTHFileData ReadData(const internal_key_type& k,
33476351Skris                              const unsigned char* d, unsigned) {
33576351Skris    assert(k.first == 0x1 && "Only file lookups can match!");
33676351Skris    uint32_t x = ::ReadUnalignedLE32(d);
33776351Skris    uint32_t y = ::ReadUnalignedLE32(d);
33876351Skris    return PTHFileData(x, y);
33976351Skris  }
34076351Skris};
3411556Srgrimes
3421556Srgrimesclass VISIBILITY_HIDDEN PTHStringLookupTrait {
3431556Srgrimespublic:
3441556Srgrimes  typedef uint32_t
3451556Srgrimes          data_type;
3461556Srgrimes
34746684Skris  typedef const std::pair<const char*, unsigned>
3481556Srgrimes          external_key_type;
3491556Srgrimes
3501556Srgrimes  typedef external_key_type internal_key_type;
3511556Srgrimes
3521556Srgrimes  static bool EqualKey(const internal_key_type& a,
3531556Srgrimes                       const internal_key_type& b) {
3541556Srgrimes    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
3551556Srgrimes                                  : false;
3561556Srgrimes  }
3571556Srgrimes
3581556Srgrimes  static unsigned ComputeHash(const internal_key_type& a) {
3591556Srgrimes    return llvm::HashString(llvm::StringRef(a.first, a.second));
3601556Srgrimes  }
3611556Srgrimes
3621556Srgrimes  // This hopefully will just get inlined and removed by the optimizer.
3631556Srgrimes  static const internal_key_type&
3641556Srgrimes  GetInternalKey(const external_key_type& x) { return x; }
36576017Skris
3661556Srgrimes  static std::pair<unsigned, unsigned>
3671556Srgrimes  ReadKeyDataLength(const unsigned char*& d) {
3681556Srgrimes    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
3691556Srgrimes  }
3701556Srgrimes
3711556Srgrimes  static std::pair<const char*, unsigned>
3721556Srgrimes  ReadKey(const unsigned char* d, unsigned n) {
3731556Srgrimes      assert(n >= 2 && d[n-1] == '\0');
3741556Srgrimes      return std::make_pair((const char*) d, n-1);
3751556Srgrimes    }
3761556Srgrimes
3771556Srgrimes  static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
3781556Srgrimes                           unsigned) {
3791556Srgrimes    return ::ReadUnalignedLE32(d);
3801556Srgrimes  }
3818855Srgrimes};
3821556Srgrimes
3831556Srgrimes} // end anonymous namespace
3841556Srgrimes
3851556Srgrimestypedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
3861556Srgrimestypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
3871556Srgrimes
3881556Srgrimes//===----------------------------------------------------------------------===//
3891556Srgrimes// PTHManager methods.
3901556Srgrimes//===----------------------------------------------------------------------===//
3911556Srgrimes
3921556SrgrimesPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
3931556Srgrimes                       const unsigned char* idDataTable,
3941556Srgrimes                       IdentifierInfo** perIDCache,
3951556Srgrimes                       void* stringIdLookup, unsigned numIds,
3961556Srgrimes                       const unsigned char* spellingBase,
3971556Srgrimes                       const char* originalSourceFile)
3981556Srgrimes: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
3991556Srgrimes  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
4001556Srgrimes  NumIds(numIds), PP(0), SpellingBase(spellingBase),
4011556Srgrimes  OriginalSourceFile(originalSourceFile) {}
4021556Srgrimes
4031556SrgrimesPTHManager::~PTHManager() {
4041556Srgrimes  delete Buf;
4051556Srgrimes  delete (PTHFileLookup*) FileLookup;
4061556Srgrimes  delete (PTHStringIdLookup*) StringIdLookup;
4071556Srgrimes  free(PerIDCache);
4081556Srgrimes}
4091556Srgrimes
4101556Srgrimesstatic void InvalidPTH(Diagnostic *Diags, Diagnostic::Level level,
4111556Srgrimes                       const char* Msg = 0) {
4121556Srgrimes  if (!Diags) return;
4131556Srgrimes  if (!Msg) Msg = "Invalid or corrupted PTH file";
4141556Srgrimes  unsigned DiagID = Diags->getCustomDiagID(level, Msg);
4151556Srgrimes  Diags->Report(FullSourceLoc(), DiagID);
4161556Srgrimes}
4171556Srgrimes
4181556SrgrimesPTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags,
4191556Srgrimes                               Diagnostic::Level level) {
4201556Srgrimes  // Memory map the PTH file.
4211556Srgrimes  llvm::OwningPtr<llvm::MemoryBuffer>
4221556Srgrimes  File(llvm::MemoryBuffer::getFile(file.c_str()));
4231556Srgrimes
4241556Srgrimes  if (!File) {
4251556Srgrimes    if (Diags) {
4261556Srgrimes      unsigned DiagID = Diags->getCustomDiagID(level,
4271556Srgrimes                                               "PTH file %0 could not be read");
4281556Srgrimes      Diags->Report(FullSourceLoc(), DiagID) << file;
4291556Srgrimes    }
4301556Srgrimes
4311556Srgrimes    return 0;
4321556Srgrimes  }
4331556Srgrimes
4341556Srgrimes  // Get the buffer ranges and check if there are at least three 32-bit
4351556Srgrimes  // words at the end of the file.
4361556Srgrimes  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
4371556Srgrimes  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
4381556Srgrimes
4391556Srgrimes  // Check the prologue of the file.
4401556Srgrimes  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
4411556Srgrimes      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
4421556Srgrimes    InvalidPTH(Diags, level);
44376017Skris    return 0;
4441556Srgrimes  }
4451556Srgrimes
4461556Srgrimes  // Read the PTH version.
44776017Skris  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
4481556Srgrimes  unsigned Version = ReadLE32(p);
44976017Skris
4501556Srgrimes  if (Version != PTHManager::Version) {
4511556Srgrimes    InvalidPTH(Diags, level,
4521556Srgrimes        Version < PTHManager::Version
4531556Srgrimes        ? "PTH file uses an older PTH format that is no longer supported"
4541556Srgrimes        : "PTH file uses a newer PTH format that cannot be read");
45576017Skris    return 0;
4561556Srgrimes  }
45776017Skris
4581556Srgrimes  // Compute the address of the index table at the end of the PTH file.
4591556Srgrimes  const unsigned char *PrologueOffset = p;
4601556Srgrimes
4611556Srgrimes  if (PrologueOffset >= BufEnd) {
4621556Srgrimes    InvalidPTH(Diags, level);
4631556Srgrimes    return 0;
46476017Skris  }
4651556Srgrimes
46676017Skris  // Construct the file lookup table.  This will be used for mapping from
4671556Srgrimes  // FileEntry*'s to cached tokens.
4681556Srgrimes  const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
4691556Srgrimes  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
4701556Srgrimes
4711556Srgrimes  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
4721556Srgrimes    InvalidPTH(Diags, level);
4731556Srgrimes    return 0; // FIXME: Proper error diagnostic?
4741556Srgrimes  }
4751556Srgrimes
4761556Srgrimes  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
4771556Srgrimes
4781556Srgrimes  // Warn if the PTH file is empty.  We still want to create a PTHManager
4791556Srgrimes  // as the PTH could be used with -include-pth.
4801556Srgrimes  if (FL->isEmpty())
4811556Srgrimes    InvalidPTH(Diags, level, "PTH file contains no cached source data");
4821556Srgrimes
4831556Srgrimes  // Get the location of the table mapping from persistent ids to the
4841556Srgrimes  // data needed to reconstruct identifiers.
4851556Srgrimes  const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
4861556Srgrimes  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
4871556Srgrimes
4881556Srgrimes  if (!(IData >= BufBeg && IData < BufEnd)) {
4891556Srgrimes    InvalidPTH(Diags, level);
4901556Srgrimes    return 0;
4911556Srgrimes  }
4921556Srgrimes
4931556Srgrimes  // Get the location of the hashtable mapping between strings and
4941556Srgrimes  // persistent IDs.
4951556Srgrimes  const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
4961556Srgrimes  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
4971556Srgrimes  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
4981556Srgrimes    InvalidPTH(Diags, level);
4991556Srgrimes    return 0;
5001556Srgrimes  }
5011556Srgrimes
5021556Srgrimes  llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
5031556Srgrimes                                                                  BufBeg));
5041556Srgrimes
5051556Srgrimes  // Get the location of the spelling cache.
5061556Srgrimes  const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
5071556Srgrimes  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
5081556Srgrimes  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
5091556Srgrimes    InvalidPTH(Diags, level);
5101556Srgrimes    return 0;
5111556Srgrimes  }
5121556Srgrimes
5131556Srgrimes  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
5141556Srgrimes  uint32_t NumIds = ReadLE32(IData);
5151556Srgrimes
5161556Srgrimes  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
5171556Srgrimes  // so that we in the best case only zero out memory once when the OS returns
5181556Srgrimes  // us new pages.
5191556Srgrimes  IdentifierInfo** PerIDCache = 0;
5201556Srgrimes
5211556Srgrimes  if (NumIds) {
5221556Srgrimes    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
5231556Srgrimes    if (!PerIDCache) {
5241556Srgrimes      InvalidPTH(Diags, level,
5251556Srgrimes                 "Could not allocate memory for processing PTH file");
5261556Srgrimes      return 0;
52736784Simp    }
52876017Skris  }
5291556Srgrimes
5301556Srgrimes  // Compute the address of the original source file.
5311556Srgrimes  const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
5321556Srgrimes  unsigned len = ReadUnalignedLE16(originalSourceBase);
5331556Srgrimes  if (!len) originalSourceBase = 0;
53446684Skris
5351556Srgrimes  // Create the new PTHManager.
5361556Srgrimes  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
5371556Srgrimes                        SL.take(), NumIds, spellingBase,
5381556Srgrimes                        (const char*) originalSourceBase);
5391556Srgrimes}
5401556Srgrimes
5411556SrgrimesIdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
5421556Srgrimes  // Look in the PTH file for the string data for the IdentifierInfo object.
5431556Srgrimes  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
5441556Srgrimes  const unsigned char* IDData =
5451556Srgrimes    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
5461556Srgrimes  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
5471556Srgrimes
5481556Srgrimes  // Allocate the object.
5491556Srgrimes  std::pair<IdentifierInfo,const unsigned char*> *Mem =
5501556Srgrimes    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
5511556Srgrimes
5521556Srgrimes  Mem->second = IDData;
5531556Srgrimes  assert(IDData[0] != '\0');
5541556Srgrimes  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
5551556Srgrimes
5561556Srgrimes  // Store the new IdentifierInfo in the cache.
55776351Skris  PerIDCache[PersistentID] = II;
5581556Srgrimes  assert(II->getNameStart() && II->getNameStart()[0] != '\0');
5591556Srgrimes  return II;
5601556Srgrimes}
5611556Srgrimes
562IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
563  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
564  // Double check our assumption that the last character isn't '\0'.
565  assert(NameEnd==NameStart || NameStart[NameEnd-NameStart-1] != '\0');
566  PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart,
567                                                         NameEnd - NameStart));
568  if (I == SL.end()) // No identifier found?
569    return 0;
570
571  // Match found.  Return the identifier!
572  assert(*I > 0);
573  return GetIdentifierInfo(*I-1);
574}
575
576PTHLexer *PTHManager::CreateLexer(FileID FID) {
577  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
578  if (!FE)
579    return 0;
580
581  // Lookup the FileEntry object in our file lookup data structure.  It will
582  // return a variant that indicates whether or not there is an offset within
583  // the PTH file that contains cached tokens.
584  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
585  PTHFileLookup::iterator I = PFL.find(FE);
586
587  if (I == PFL.end()) // No tokens available?
588    return 0;
589
590  const PTHFileData& FileData = *I;
591
592  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
593  // Compute the offset of the token data within the buffer.
594  const unsigned char* data = BufStart + FileData.getTokenOffset();
595
596  // Get the location of pp-conditional table.
597  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
598  uint32_t Len = ReadLE32(ppcond);
599  if (Len == 0) ppcond = 0;
600
601  assert(PP && "No preprocessor set yet!");
602  return new PTHLexer(*PP, FID, data, ppcond, *this);
603}
604
605//===----------------------------------------------------------------------===//
606// 'stat' caching.
607//===----------------------------------------------------------------------===//
608
609namespace {
610class VISIBILITY_HIDDEN PTHStatData {
611public:
612  const bool hasStat;
613  const ino_t ino;
614  const dev_t dev;
615  const mode_t mode;
616  const time_t mtime;
617  const off_t size;
618
619  PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s)
620  : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {}
621
622  PTHStatData()
623    : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {}
624};
625
626class VISIBILITY_HIDDEN PTHStatLookupTrait : public PTHFileLookupCommonTrait {
627public:
628  typedef const char* external_key_type;  // const char*
629  typedef PTHStatData data_type;
630
631  static internal_key_type GetInternalKey(const char *path) {
632    // The key 'kind' doesn't matter here because it is ignored in EqualKey.
633    return std::make_pair((unsigned char) 0x0, path);
634  }
635
636  static bool EqualKey(internal_key_type a, internal_key_type b) {
637    // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
638    // just the paths.
639    return strcmp(a.second, b.second) == 0;
640  }
641
642  static data_type ReadData(const internal_key_type& k, const unsigned char* d,
643                            unsigned) {
644
645    if (k.first /* File or Directory */) {
646      if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words.
647      ino_t ino = (ino_t) ReadUnalignedLE32(d);
648      dev_t dev = (dev_t) ReadUnalignedLE32(d);
649      mode_t mode = (mode_t) ReadUnalignedLE16(d);
650      time_t mtime = (time_t) ReadUnalignedLE64(d);
651      return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d));
652    }
653
654    // Negative stat.  Don't read anything.
655    return data_type();
656  }
657};
658
659class VISIBILITY_HIDDEN PTHStatCache : public StatSysCallCache {
660  typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
661  CacheTy Cache;
662
663public:
664  PTHStatCache(PTHFileLookup &FL) :
665    Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
666          FL.getBase()) {}
667
668  ~PTHStatCache() {}
669
670  int stat(const char *path, struct stat *buf) {
671    // Do the lookup for the file's data in the PTH file.
672    CacheTy::iterator I = Cache.find(path);
673
674    // If we don't get a hit in the PTH file just forward to 'stat'.
675    if (I == Cache.end())
676      return StatSysCallCache::stat(path, buf);
677
678    const PTHStatData& Data = *I;
679
680    if (!Data.hasStat)
681      return 1;
682
683    buf->st_ino = Data.ino;
684    buf->st_dev = Data.dev;
685    buf->st_mtime = Data.mtime;
686    buf->st_mode = Data.mode;
687    buf->st_size = Data.size;
688    return 0;
689  }
690};
691} // end anonymous namespace
692
693StatSysCallCache *PTHManager::createStatCache() {
694  return new PTHStatCache(*((PTHFileLookup*) FileLookup));
695}
696