1193326Sed//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9193326Sed//
10193326Sed// This file implements the PTHLexer interface.
11193326Sed//
12193326Sed//===----------------------------------------------------------------------===//
13193326Sed
14249423Sdim#include "clang/Lex/PTHLexer.h"
15193326Sed#include "clang/Basic/FileManager.h"
16218893Sdim#include "clang/Basic/FileSystemStatCache.h"
17193326Sed#include "clang/Basic/IdentifierTable.h"
18193326Sed#include "clang/Basic/OnDiskHashTable.h"
19249423Sdim#include "clang/Basic/TokenKinds.h"
20199482Srdivacky#include "clang/Lex/LexDiagnostic.h"
21249423Sdim#include "clang/Lex/PTHManager.h"
22193326Sed#include "clang/Lex/Preprocessor.h"
23193326Sed#include "clang/Lex/Token.h"
24198398Srdivacky#include "llvm/ADT/OwningPtr.h"
25198398Srdivacky#include "llvm/ADT/StringExtras.h"
26193326Sed#include "llvm/ADT/StringMap.h"
27193326Sed#include "llvm/Support/MemoryBuffer.h"
28218893Sdim#include "llvm/Support/system_error.h"
29193326Sedusing namespace clang;
30193326Sedusing namespace clang::io;
31193326Sed
32193326Sed#define DISK_TOKEN_SIZE (1+1+2+4+4)
33193326Sed
34193326Sed//===----------------------------------------------------------------------===//
35193326Sed// PTHLexer methods.
36193326Sed//===----------------------------------------------------------------------===//
37193326Sed
38193326SedPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
39193326Sed                   const unsigned char *ppcond, PTHManager &PM)
40193326Sed  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
41193326Sed    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
42198092Srdivacky
43193326Sed  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
44193326Sed}
45193326Sed
46263508Sdimbool PTHLexer::Lex(Token& Tok) {
47193326Sed  //===--------------------------------------==//
48193326Sed  // Read the raw token data.
49193326Sed  //===--------------------------------------==//
50198092Srdivacky
51193326Sed  // Shadow CurPtr into an automatic variable.
52198092Srdivacky  const unsigned char *CurPtrShadow = CurPtr;
53193326Sed
54193326Sed  // Read in the data for the token.
55193326Sed  unsigned Word0 = ReadLE32(CurPtrShadow);
56193326Sed  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
57193326Sed  uint32_t FileOffset = ReadLE32(CurPtrShadow);
58198092Srdivacky
59193326Sed  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
60193326Sed  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
61193326Sed  uint32_t Len = Word0 >> 16;
62193326Sed
63193326Sed  CurPtr = CurPtrShadow;
64198092Srdivacky
65193326Sed  //===--------------------------------------==//
66193326Sed  // Construct the token itself.
67193326Sed  //===--------------------------------------==//
68198092Srdivacky
69193326Sed  Tok.startToken();
70193326Sed  Tok.setKind(TKind);
71193326Sed  Tok.setFlag(TFlags);
72193326Sed  assert(!LexingRawMode);
73226633Sdim  Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
74193326Sed  Tok.setLength(Len);
75193326Sed
76193326Sed  // Handle identifiers.
77193326Sed  if (Tok.isLiteral()) {
78193326Sed    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
79193326Sed  }
80193326Sed  else if (IdentifierID) {
81193326Sed    MIOpt.ReadToken();
82193326Sed    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
83198092Srdivacky
84193326Sed    Tok.setIdentifierInfo(II);
85198092Srdivacky
86193326Sed    // Change the kind of this identifier to the appropriate token kind, e.g.
87193326Sed    // turning "for" into a keyword.
88193326Sed    Tok.setKind(II->getTokenID());
89198092Srdivacky
90193326Sed    if (II->isHandleIdentifierCase())
91263508Sdim      return PP->HandleIdentifier(Tok);
92263508Sdim
93263508Sdim    return true;
94193326Sed  }
95198092Srdivacky
96193326Sed  //===--------------------------------------==//
97193326Sed  // Process the token.
98193326Sed  //===--------------------------------------==//
99193326Sed  if (TKind == tok::eof) {
100193326Sed    // Save the end-of-file token.
101193326Sed    EofToken = Tok;
102198092Srdivacky
103193326Sed    assert(!ParsingPreprocessorDirective);
104193326Sed    assert(!LexingRawMode);
105198092Srdivacky
106263508Sdim    return LexEndOfFile(Tok);
107193326Sed  }
108198092Srdivacky
109193326Sed  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
110193326Sed    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
111193326Sed    assert(!LexingRawMode);
112193326Sed    PP->HandleDirective(Tok);
113198092Srdivacky
114263508Sdim    return false;
115193326Sed  }
116198092Srdivacky
117221345Sdim  if (TKind == tok::eod) {
118193326Sed    assert(ParsingPreprocessorDirective);
119193326Sed    ParsingPreprocessorDirective = false;
120263508Sdim    return true;
121193326Sed  }
122193326Sed
123193326Sed  MIOpt.ReadToken();
124263508Sdim  return true;
125193326Sed}
126193326Sed
127212904Sdimbool PTHLexer::LexEndOfFile(Token &Result) {
128212904Sdim  // If we hit the end of the file while parsing a preprocessor directive,
129212904Sdim  // end the preprocessor directive first.  The next token returned will
130212904Sdim  // then be the end of file.
131212904Sdim  if (ParsingPreprocessorDirective) {
132212904Sdim    ParsingPreprocessorDirective = false; // Done parsing the "line".
133212904Sdim    return true;  // Have a token.
134212904Sdim  }
135212904Sdim
136212904Sdim  assert(!LexingRawMode);
137212904Sdim
138212904Sdim  // If we are in a #if directive, emit an error.
139212904Sdim  while (!ConditionalStack.empty()) {
140226633Sdim    if (PP->getCodeCompletionFileLoc() != FileStartLoc)
141212904Sdim      PP->Diag(ConditionalStack.back().IfLoc,
142212904Sdim               diag::err_pp_unterminated_conditional);
143212904Sdim    ConditionalStack.pop_back();
144212904Sdim  }
145212904Sdim
146212904Sdim  // Finally, let the preprocessor handle this.
147212904Sdim  return PP->HandleEndOfFile(Result);
148212904Sdim}
149212904Sdim
150193326Sed// FIXME: We can just grab the last token instead of storing a copy
151193326Sed// into EofToken.
152193326Sedvoid PTHLexer::getEOF(Token& Tok) {
153193326Sed  assert(EofToken.is(tok::eof));
154193326Sed  Tok = EofToken;
155193326Sed}
156193326Sed
157193326Sedvoid PTHLexer::DiscardToEndOfLine() {
158193326Sed  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
159193326Sed         "Must be in a preprocessing directive!");
160193326Sed
161193326Sed  // We assume that if the preprocessor wishes to discard to the end of
162193326Sed  // the line that it also means to end the current preprocessor directive.
163193326Sed  ParsingPreprocessorDirective = false;
164198092Srdivacky
165193326Sed  // Skip tokens by only peeking at their token kind and the flags.
166193326Sed  // We don't need to actually reconstruct full tokens from the token buffer.
167193326Sed  // This saves some copies and it also reduces IdentifierInfo* lookup.
168193326Sed  const unsigned char* p = CurPtr;
169193326Sed  while (1) {
170193326Sed    // Read the token kind.  Are we at the end of the file?
171193326Sed    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
172193326Sed    if (x == tok::eof) break;
173198092Srdivacky
174193326Sed    // Read the token flags.  Are we at the start of the next line?
175193326Sed    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
176193326Sed    if (y & Token::StartOfLine) break;
177193326Sed
178193326Sed    // Skip to the next token.
179193326Sed    p += DISK_TOKEN_SIZE;
180193326Sed  }
181198092Srdivacky
182193326Sed  CurPtr = p;
183193326Sed}
184193326Sed
185193326Sed/// SkipBlock - Used by Preprocessor to skip the current conditional block.
186193326Sedbool PTHLexer::SkipBlock() {
187193326Sed  assert(CurPPCondPtr && "No cached PP conditional information.");
188193326Sed  assert(LastHashTokPtr && "No known '#' token.");
189198092Srdivacky
190193326Sed  const unsigned char* HashEntryI = 0;
191193326Sed  uint32_t TableIdx;
192198092Srdivacky
193193326Sed  do {
194193326Sed    // Read the token offset from the side-table.
195243830Sdim    uint32_t Offset = ReadLE32(CurPPCondPtr);
196198092Srdivacky
197198092Srdivacky    // Read the target table index from the side-table.
198193326Sed    TableIdx = ReadLE32(CurPPCondPtr);
199198092Srdivacky
200193326Sed    // Compute the actual memory address of the '#' token data for this entry.
201193326Sed    HashEntryI = TokBuf + Offset;
202193326Sed
203193326Sed    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
204193326Sed    //  contain nested blocks.  In the side-table we can jump over these
205193326Sed    //  nested blocks instead of doing a linear search if the next "sibling"
206193326Sed    //  entry is not at a location greater than LastHashTokPtr.
207193326Sed    if (HashEntryI < LastHashTokPtr && TableIdx) {
208193326Sed      // In the side-table we are still at an entry for a '#' token that
209193326Sed      // is earlier than the last one we saw.  Check if the location we would
210193326Sed      // stride gets us closer.
211193326Sed      const unsigned char* NextPPCondPtr =
212193326Sed        PPCond + TableIdx*(sizeof(uint32_t)*2);
213193326Sed      assert(NextPPCondPtr >= CurPPCondPtr);
214193326Sed      // Read where we should jump to.
215243830Sdim      const unsigned char* HashEntryJ = TokBuf + ReadLE32(NextPPCondPtr);
216198092Srdivacky
217193326Sed      if (HashEntryJ <= LastHashTokPtr) {
218193326Sed        // Jump directly to the next entry in the side table.
219193326Sed        HashEntryI = HashEntryJ;
220193326Sed        TableIdx = ReadLE32(NextPPCondPtr);
221193326Sed        CurPPCondPtr = NextPPCondPtr;
222193326Sed      }
223193326Sed    }
224193326Sed  }
225198092Srdivacky  while (HashEntryI < LastHashTokPtr);
226193326Sed  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
227193326Sed  assert(TableIdx && "No jumping from #endifs.");
228198092Srdivacky
229193326Sed  // Update our side-table iterator.
230193326Sed  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
231193326Sed  assert(NextPPCondPtr >= CurPPCondPtr);
232193326Sed  CurPPCondPtr = NextPPCondPtr;
233198092Srdivacky
234193326Sed  // Read where we should jump to.
235193326Sed  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
236193326Sed  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
237198092Srdivacky
238193326Sed  // By construction NextIdx will be zero if this is a #endif.  This is useful
239193326Sed  // to know to obviate lexing another token.
240193326Sed  bool isEndif = NextIdx == 0;
241198092Srdivacky
242193326Sed  // This case can occur when we see something like this:
243193326Sed  //
244193326Sed  //  #if ...
245193326Sed  //   /* a comment or nothing */
246193326Sed  //  #elif
247193326Sed  //
248193326Sed  // If we are skipping the first #if block it will be the case that CurPtr
249193326Sed  // already points 'elif'.  Just return.
250198092Srdivacky
251193326Sed  if (CurPtr > HashEntryI) {
252193326Sed    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
253193326Sed    // Did we reach a #endif?  If so, go ahead and consume that token as well.
254193326Sed    if (isEndif)
255193326Sed      CurPtr += DISK_TOKEN_SIZE*2;
256193326Sed    else
257193326Sed      LastHashTokPtr = HashEntryI;
258198092Srdivacky
259193326Sed    return isEndif;
260193326Sed  }
261193326Sed
262193326Sed  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
263193326Sed  CurPtr = HashEntryI;
264198092Srdivacky
265193326Sed  // Update the location of the last observed '#'.  This is useful if we
266193326Sed  // are skipping multiple blocks.
267193326Sed  LastHashTokPtr = CurPtr;
268193326Sed
269193326Sed  // Skip the '#' token.
270193326Sed  assert(((tok::TokenKind)*CurPtr) == tok::hash);
271193326Sed  CurPtr += DISK_TOKEN_SIZE;
272198092Srdivacky
273193326Sed  // Did we reach a #endif?  If so, go ahead and consume that token as well.
274193326Sed  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
275193326Sed
276193326Sed  return isEndif;
277193326Sed}
278193326Sed
279193326SedSourceLocation PTHLexer::getSourceLocation() {
280193326Sed  // getSourceLocation is not on the hot path.  It is used to get the location
281193326Sed  // of the next token when transitioning back to this lexer when done
282193326Sed  // handling a #included file.  Just read the necessary data from the token
283193326Sed  // data buffer to construct the SourceLocation object.
284193326Sed  // NOTE: This is a virtual function; hence it is defined out-of-line.
285193326Sed  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
286193326Sed  uint32_t Offset = ReadLE32(OffsetPtr);
287226633Sdim  return FileStartLoc.getLocWithOffset(Offset);
288193326Sed}
289193326Sed
290193326Sed//===----------------------------------------------------------------------===//
291193326Sed// PTH file lookup: map from strings to file data.
292193326Sed//===----------------------------------------------------------------------===//
293193326Sed
294193326Sed/// PTHFileLookup - This internal data structure is used by the PTHManager
295193326Sed///  to map from FileEntry objects managed by FileManager to offsets within
296193326Sed///  the PTH file.
297193326Sednamespace {
298199990Srdivackyclass PTHFileData {
299193326Sed  const uint32_t TokenOff;
300193326Sed  const uint32_t PPCondOff;
301193326Sedpublic:
302193326Sed  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
303193326Sed    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
304198092Srdivacky
305198092Srdivacky  uint32_t getTokenOffset() const { return TokenOff; }
306198092Srdivacky  uint32_t getPPCondOffset() const { return PPCondOff; }
307193326Sed};
308198092Srdivacky
309198092Srdivacky
310199990Srdivackyclass PTHFileLookupCommonTrait {
311193326Sedpublic:
312193326Sed  typedef std::pair<unsigned char, const char*> internal_key_type;
313193326Sed
314193326Sed  static unsigned ComputeHash(internal_key_type x) {
315198398Srdivacky    return llvm::HashString(x.second);
316193326Sed  }
317198092Srdivacky
318193326Sed  static std::pair<unsigned, unsigned>
319193326Sed  ReadKeyDataLength(const unsigned char*& d) {
320193326Sed    unsigned keyLen = (unsigned) ReadUnalignedLE16(d);
321193326Sed    unsigned dataLen = (unsigned) *(d++);
322193326Sed    return std::make_pair(keyLen, dataLen);
323193326Sed  }
324198092Srdivacky
325193326Sed  static internal_key_type ReadKey(const unsigned char* d, unsigned) {
326193326Sed    unsigned char k = *(d++); // Read the entry kind.
327193326Sed    return std::make_pair(k, (const char*) d);
328193326Sed  }
329193326Sed};
330198092Srdivacky
331199990Srdivackyclass PTHFileLookupTrait : public PTHFileLookupCommonTrait {
332193326Sedpublic:
333193326Sed  typedef const FileEntry* external_key_type;
334193326Sed  typedef PTHFileData      data_type;
335198092Srdivacky
336193326Sed  static internal_key_type GetInternalKey(const FileEntry* FE) {
337193326Sed    return std::make_pair((unsigned char) 0x1, FE->getName());
338193326Sed  }
339193326Sed
340193326Sed  static bool EqualKey(internal_key_type a, internal_key_type b) {
341193326Sed    return a.first == b.first && strcmp(a.second, b.second) == 0;
342198092Srdivacky  }
343198092Srdivacky
344198092Srdivacky  static PTHFileData ReadData(const internal_key_type& k,
345198092Srdivacky                              const unsigned char* d, unsigned) {
346193326Sed    assert(k.first == 0x1 && "Only file lookups can match!");
347193326Sed    uint32_t x = ::ReadUnalignedLE32(d);
348193326Sed    uint32_t y = ::ReadUnalignedLE32(d);
349198092Srdivacky    return PTHFileData(x, y);
350193326Sed  }
351193326Sed};
352193326Sed
353199990Srdivackyclass PTHStringLookupTrait {
354193326Sedpublic:
355198092Srdivacky  typedef uint32_t
356193326Sed          data_type;
357193326Sed
358193326Sed  typedef const std::pair<const char*, unsigned>
359193326Sed          external_key_type;
360193326Sed
361193326Sed  typedef external_key_type internal_key_type;
362198092Srdivacky
363193326Sed  static bool EqualKey(const internal_key_type& a,
364193326Sed                       const internal_key_type& b) {
365193326Sed    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
366193326Sed                                  : false;
367193326Sed  }
368198092Srdivacky
369193326Sed  static unsigned ComputeHash(const internal_key_type& a) {
370226633Sdim    return llvm::HashString(StringRef(a.first, a.second));
371193326Sed  }
372198092Srdivacky
373193326Sed  // This hopefully will just get inlined and removed by the optimizer.
374193326Sed  static const internal_key_type&
375193326Sed  GetInternalKey(const external_key_type& x) { return x; }
376198092Srdivacky
377193326Sed  static std::pair<unsigned, unsigned>
378193326Sed  ReadKeyDataLength(const unsigned char*& d) {
379193326Sed    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
380193326Sed  }
381198092Srdivacky
382193326Sed  static std::pair<const char*, unsigned>
383193326Sed  ReadKey(const unsigned char* d, unsigned n) {
384193326Sed      assert(n >= 2 && d[n-1] == '\0');
385193326Sed      return std::make_pair((const char*) d, n-1);
386193326Sed    }
387198092Srdivacky
388193326Sed  static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
389193326Sed                           unsigned) {
390193326Sed    return ::ReadUnalignedLE32(d);
391193326Sed  }
392193326Sed};
393193326Sed
394198092Srdivacky} // end anonymous namespace
395198092Srdivacky
396193326Sedtypedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
397193326Sedtypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
398193326Sed
399193326Sed//===----------------------------------------------------------------------===//
400193326Sed// PTHManager methods.
401193326Sed//===----------------------------------------------------------------------===//
402193326Sed
403193326SedPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
404193326Sed                       const unsigned char* idDataTable,
405198092Srdivacky                       IdentifierInfo** perIDCache,
406193326Sed                       void* stringIdLookup, unsigned numIds,
407193326Sed                       const unsigned char* spellingBase,
408193326Sed                       const char* originalSourceFile)
409193326Sed: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
410193326Sed  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
411193326Sed  NumIds(numIds), PP(0), SpellingBase(spellingBase),
412193326Sed  OriginalSourceFile(originalSourceFile) {}
413193326Sed
414193326SedPTHManager::~PTHManager() {
415193326Sed  delete Buf;
416193326Sed  delete (PTHFileLookup*) FileLookup;
417193326Sed  delete (PTHStringIdLookup*) StringIdLookup;
418193326Sed  free(PerIDCache);
419193326Sed}
420193326Sed
421226633Sdimstatic void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
422226633Sdim  Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg));
423193326Sed}
424193326Sed
425226633SdimPTHManager *PTHManager::Create(const std::string &file,
426226633Sdim                               DiagnosticsEngine &Diags) {
427193326Sed  // Memory map the PTH file.
428234353Sdim  OwningPtr<llvm::MemoryBuffer> File;
429198092Srdivacky
430218893Sdim  if (llvm::MemoryBuffer::getFile(file, File)) {
431218893Sdim    // FIXME: Add ec.message() to this diag.
432199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
433193326Sed    return 0;
434193326Sed  }
435198092Srdivacky
436193326Sed  // Get the buffer ranges and check if there are at least three 32-bit
437193326Sed  // words at the end of the file.
438243830Sdim  const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
439243830Sdim  const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
440193326Sed
441193326Sed  // Check the prologue of the file.
442239462Sdim  if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
443239462Sdim      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
444199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
445193326Sed    return 0;
446193326Sed  }
447198092Srdivacky
448193326Sed  // Read the PTH version.
449239462Sdim  const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
450193326Sed  unsigned Version = ReadLE32(p);
451198092Srdivacky
452199482Srdivacky  if (Version < PTHManager::Version) {
453199482Srdivacky    InvalidPTH(Diags,
454198092Srdivacky        Version < PTHManager::Version
455193326Sed        ? "PTH file uses an older PTH format that is no longer supported"
456193326Sed        : "PTH file uses a newer PTH format that cannot be read");
457193326Sed    return 0;
458193326Sed  }
459193326Sed
460198092Srdivacky  // Compute the address of the index table at the end of the PTH file.
461193326Sed  const unsigned char *PrologueOffset = p;
462198092Srdivacky
463193326Sed  if (PrologueOffset >= BufEnd) {
464199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
465193326Sed    return 0;
466193326Sed  }
467198092Srdivacky
468193326Sed  // Construct the file lookup table.  This will be used for mapping from
469193326Sed  // FileEntry*'s to cached tokens.
470193326Sed  const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
471193326Sed  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
472198092Srdivacky
473193326Sed  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
474199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
475193326Sed    return 0; // FIXME: Proper error diagnostic?
476193326Sed  }
477198092Srdivacky
478234353Sdim  OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
479198092Srdivacky
480193326Sed  // Warn if the PTH file is empty.  We still want to create a PTHManager
481193326Sed  // as the PTH could be used with -include-pth.
482193326Sed  if (FL->isEmpty())
483199482Srdivacky    InvalidPTH(Diags, "PTH file contains no cached source data");
484198092Srdivacky
485193326Sed  // Get the location of the table mapping from persistent ids to the
486193326Sed  // data needed to reconstruct identifiers.
487193326Sed  const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
488193326Sed  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
489198092Srdivacky
490193326Sed  if (!(IData >= BufBeg && IData < BufEnd)) {
491199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
492193326Sed    return 0;
493193326Sed  }
494198092Srdivacky
495193326Sed  // Get the location of the hashtable mapping between strings and
496193326Sed  // persistent IDs.
497193326Sed  const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
498193326Sed  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
499193326Sed  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
500199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
501193326Sed    return 0;
502193326Sed  }
503193326Sed
504234353Sdim  OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
505193326Sed                                                                  BufBeg));
506198092Srdivacky
507193326Sed  // Get the location of the spelling cache.
508193326Sed  const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
509193326Sed  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
510193326Sed  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
511199482Srdivacky    Diags.Report(diag::err_invalid_pth_file) << file;
512193326Sed    return 0;
513193326Sed  }
514198092Srdivacky
515193326Sed  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
516193326Sed  uint32_t NumIds = ReadLE32(IData);
517198092Srdivacky
518221345Sdim  // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
519193326Sed  // so that we in the best case only zero out memory once when the OS returns
520193326Sed  // us new pages.
521193326Sed  IdentifierInfo** PerIDCache = 0;
522198092Srdivacky
523193326Sed  if (NumIds) {
524198092Srdivacky    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
525193326Sed    if (!PerIDCache) {
526199482Srdivacky      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
527193326Sed      return 0;
528193326Sed    }
529193326Sed  }
530193326Sed
531193326Sed  // Compute the address of the original source file.
532193326Sed  const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
533193326Sed  unsigned len = ReadUnalignedLE16(originalSourceBase);
534198092Srdivacky  if (!len) originalSourceBase = 0;
535198092Srdivacky
536193326Sed  // Create the new PTHManager.
537193326Sed  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
538193326Sed                        SL.take(), NumIds, spellingBase,
539193326Sed                        (const char*) originalSourceBase);
540193326Sed}
541193326Sed
542193326SedIdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
543193326Sed  // Look in the PTH file for the string data for the IdentifierInfo object.
544193326Sed  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
545193326Sed  const unsigned char* IDData =
546193326Sed    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
547193326Sed  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
548198092Srdivacky
549193326Sed  // Allocate the object.
550193326Sed  std::pair<IdentifierInfo,const unsigned char*> *Mem =
551193326Sed    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
552193326Sed
553193326Sed  Mem->second = IDData;
554193326Sed  assert(IDData[0] != '\0');
555193326Sed  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
556198092Srdivacky
557193326Sed  // Store the new IdentifierInfo in the cache.
558193326Sed  PerIDCache[PersistentID] = II;
559198398Srdivacky  assert(II->getNameStart() && II->getNameStart()[0] != '\0');
560193326Sed  return II;
561193326Sed}
562193326Sed
563226633SdimIdentifierInfo* PTHManager::get(StringRef Name) {
564193326Sed  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
565193326Sed  // Double check our assumption that the last character isn't '\0'.
566226633Sdim  assert(Name.empty() || Name.back() != '\0');
567205219Srdivacky  PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
568205219Srdivacky                                                         Name.size()));
569193326Sed  if (I == SL.end()) // No identifier found?
570193326Sed    return 0;
571193326Sed
572193326Sed  // Match found.  Return the identifier!
573193326Sed  assert(*I > 0);
574193326Sed  return GetIdentifierInfo(*I-1);
575193326Sed}
576193326Sed
577193326SedPTHLexer *PTHManager::CreateLexer(FileID FID) {
578193326Sed  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
579193326Sed  if (!FE)
580193326Sed    return 0;
581198092Srdivacky
582193326Sed  // Lookup the FileEntry object in our file lookup data structure.  It will
583193326Sed  // return a variant that indicates whether or not there is an offset within
584193326Sed  // the PTH file that contains cached tokens.
585193326Sed  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
586193326Sed  PTHFileLookup::iterator I = PFL.find(FE);
587198092Srdivacky
588193326Sed  if (I == PFL.end()) // No tokens available?
589193326Sed    return 0;
590198092Srdivacky
591198092Srdivacky  const PTHFileData& FileData = *I;
592198092Srdivacky
593193326Sed  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
594193326Sed  // Compute the offset of the token data within the buffer.
595193326Sed  const unsigned char* data = BufStart + FileData.getTokenOffset();
596193326Sed
597193326Sed  // Get the location of pp-conditional table.
598193326Sed  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
599193326Sed  uint32_t Len = ReadLE32(ppcond);
600193326Sed  if (Len == 0) ppcond = 0;
601198092Srdivacky
602193326Sed  assert(PP && "No preprocessor set yet!");
603198092Srdivacky  return new PTHLexer(*PP, FID, data, ppcond, *this);
604193326Sed}
605193326Sed
606193326Sed//===----------------------------------------------------------------------===//
607193326Sed// 'stat' caching.
608193326Sed//===----------------------------------------------------------------------===//
609193326Sed
610193326Sednamespace {
611199990Srdivackyclass PTHStatData {
612193326Sedpublic:
613263508Sdim  const bool HasData;
614263508Sdim  uint64_t Size;
615263508Sdim  time_t ModTime;
616263508Sdim  llvm::sys::fs::UniqueID UniqueID;
617263508Sdim  bool IsDirectory;
618198092Srdivacky
619263508Sdim  PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
620263508Sdim              bool IsDirectory)
621263508Sdim      : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
622263508Sdim        IsDirectory(IsDirectory) {}
623198092Srdivacky
624263508Sdim  PTHStatData() : HasData(false) {}
625193326Sed};
626198092Srdivacky
627199990Srdivackyclass PTHStatLookupTrait : public PTHFileLookupCommonTrait {
628193326Sedpublic:
629193326Sed  typedef const char* external_key_type;  // const char*
630193326Sed  typedef PTHStatData data_type;
631198092Srdivacky
632193326Sed  static internal_key_type GetInternalKey(const char *path) {
633193326Sed    // The key 'kind' doesn't matter here because it is ignored in EqualKey.
634193326Sed    return std::make_pair((unsigned char) 0x0, path);
635193326Sed  }
636193326Sed
637193326Sed  static bool EqualKey(internal_key_type a, internal_key_type b) {
638193326Sed    // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
639193326Sed    // just the paths.
640193326Sed    return strcmp(a.second, b.second) == 0;
641198092Srdivacky  }
642198092Srdivacky
643193326Sed  static data_type ReadData(const internal_key_type& k, const unsigned char* d,
644198092Srdivacky                            unsigned) {
645198092Srdivacky
646193326Sed    if (k.first /* File or Directory */) {
647263508Sdim      bool IsDirectory = true;
648263508Sdim      if (k.first == 0x1 /* File */) {
649263508Sdim        IsDirectory = false;
650263508Sdim        d += 4 * 2; // Skip the first 2 words.
651263508Sdim      }
652263508Sdim
653263508Sdim      uint64_t File = ReadUnalignedLE64(d);
654263508Sdim      uint64_t Device = ReadUnalignedLE64(d);
655263508Sdim      llvm::sys::fs::UniqueID UniqueID(File, Device);
656263508Sdim      time_t ModTime = ReadUnalignedLE64(d);
657263508Sdim      uint64_t Size = ReadUnalignedLE64(d);
658263508Sdim      return data_type(Size, ModTime, UniqueID, IsDirectory);
659193326Sed    }
660193326Sed
661193326Sed    // Negative stat.  Don't read anything.
662193326Sed    return data_type();
663193326Sed  }
664193326Sed};
665193326Sed
666218893Sdimclass PTHStatCache : public FileSystemStatCache {
667193326Sed  typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
668193326Sed  CacheTy Cache;
669193326Sed
670198092Srdivackypublic:
671193326Sed  PTHStatCache(PTHFileLookup &FL) :
672193326Sed    Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
673193326Sed          FL.getBase()) {}
674193326Sed
675193326Sed  ~PTHStatCache() {}
676198092Srdivacky
677263508Sdim  LookupResult getStat(const char *Path, FileData &Data, bool isFile,
678263508Sdim                       int *FileDescriptor) {
679193326Sed    // Do the lookup for the file's data in the PTH file.
680218893Sdim    CacheTy::iterator I = Cache.find(Path);
681193326Sed
682193326Sed    // If we don't get a hit in the PTH file just forward to 'stat'.
683205219Srdivacky    if (I == Cache.end())
684263508Sdim      return statChained(Path, Data, isFile, FileDescriptor);
685198092Srdivacky
686263508Sdim    const PTHStatData &D = *I;
687198092Srdivacky
688263508Sdim    if (!D.HasData)
689218893Sdim      return CacheMissing;
690193326Sed
691263508Sdim    Data.Size = D.Size;
692263508Sdim    Data.ModTime = D.ModTime;
693263508Sdim    Data.UniqueID = D.UniqueID;
694263508Sdim    Data.IsDirectory = D.IsDirectory;
695263508Sdim    Data.IsNamedPipe = false;
696263508Sdim    Data.InPCH = true;
697263508Sdim
698218893Sdim    return CacheExists;
699193326Sed  }
700193326Sed};
701193326Sed} // end anonymous namespace
702193326Sed
703218893SdimFileSystemStatCache *PTHManager::createStatCache() {
704193326Sed  return new PTHStatCache(*((PTHFileLookup*) FileLookup));
705193326Sed}
706