PPLexerChange.cpp revision 239462
1//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements pieces of the Preprocessor interface that manage the
11// current lexer stack.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/Preprocessor.h"
16#include "clang/Lex/HeaderSearch.h"
17#include "clang/Lex/MacroInfo.h"
18#include "clang/Lex/LexDiagnostic.h"
19#include "clang/Basic/FileManager.h"
20#include "clang/Basic/SourceManager.h"
21#include "llvm/Support/FileSystem.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/PathV2.h"
24#include "llvm/ADT/StringSwitch.h"
25using namespace clang;
26
27PPCallbacks::~PPCallbacks() {}
28
29//===----------------------------------------------------------------------===//
30// Miscellaneous Methods.
31//===----------------------------------------------------------------------===//
32
33/// isInPrimaryFile - Return true if we're in the top-level file, not in a
34/// \#include.  This looks through macro expansions and active _Pragma lexers.
35bool Preprocessor::isInPrimaryFile() const {
36  if (IsFileLexer())
37    return IncludeMacroStack.empty();
38
39  // If there are any stacked lexers, we're in a #include.
40  assert(IsFileLexer(IncludeMacroStack[0]) &&
41         "Top level include stack isn't our primary lexer?");
42  for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
43    if (IsFileLexer(IncludeMacroStack[i]))
44      return false;
45  return true;
46}
47
48/// getCurrentLexer - Return the current file lexer being lexed from.  Note
49/// that this ignores any potentially active macro expansions and _Pragma
50/// expansions going on at the time.
51PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
52  if (IsFileLexer())
53    return CurPPLexer;
54
55  // Look for a stacked lexer.
56  for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
57    const IncludeStackInfo& ISI = IncludeMacroStack[i-1];
58    if (IsFileLexer(ISI))
59      return ISI.ThePPLexer;
60  }
61  return 0;
62}
63
64
65//===----------------------------------------------------------------------===//
66// Methods for Entering and Callbacks for leaving various contexts
67//===----------------------------------------------------------------------===//
68
69/// EnterSourceFile - Add a source file to the top of the include stack and
70/// start lexing tokens from it instead of the current buffer.
71void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
72                                   SourceLocation Loc) {
73  assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!");
74  ++NumEnteredSourceFiles;
75
76  if (MaxIncludeStackDepth < IncludeMacroStack.size())
77    MaxIncludeStackDepth = IncludeMacroStack.size();
78
79  if (PTH) {
80    if (PTHLexer *PL = PTH->CreateLexer(FID)) {
81      EnterSourceFileWithPTH(PL, CurDir);
82      return;
83    }
84  }
85
86  // Get the MemoryBuffer for this FID, if it fails, we fail.
87  bool Invalid = false;
88  const llvm::MemoryBuffer *InputFile =
89    getSourceManager().getBuffer(FID, Loc, &Invalid);
90  if (Invalid) {
91    SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);
92    Diag(Loc, diag::err_pp_error_opening_file)
93      << std::string(SourceMgr.getBufferName(FileStart)) << "";
94    return;
95  }
96
97  if (isCodeCompletionEnabled() &&
98      SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
99    CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
100    CodeCompletionLoc =
101        CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
102  }
103
104  EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
105  return;
106}
107
108/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
109///  and start lexing tokens from it instead of the current buffer.
110void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
111                                            const DirectoryLookup *CurDir) {
112
113  // Add the current lexer to the include stack.
114  if (CurPPLexer || CurTokenLexer)
115    PushIncludeMacroStack();
116
117  CurLexer.reset(TheLexer);
118  CurPPLexer = TheLexer;
119  CurDirLookup = CurDir;
120  if (CurLexerKind != CLK_LexAfterModuleImport)
121    CurLexerKind = CLK_Lexer;
122
123  // Notify the client, if desired, that we are in a new source file.
124  if (Callbacks && !CurLexer->Is_PragmaLexer) {
125    SrcMgr::CharacteristicKind FileType =
126       SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());
127
128    Callbacks->FileChanged(CurLexer->getFileLoc(),
129                           PPCallbacks::EnterFile, FileType);
130  }
131}
132
133/// EnterSourceFileWithPTH - Add a source file to the top of the include stack
134/// and start getting tokens from it using the PTH cache.
135void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
136                                          const DirectoryLookup *CurDir) {
137
138  if (CurPPLexer || CurTokenLexer)
139    PushIncludeMacroStack();
140
141  CurDirLookup = CurDir;
142  CurPTHLexer.reset(PL);
143  CurPPLexer = CurPTHLexer.get();
144  if (CurLexerKind != CLK_LexAfterModuleImport)
145    CurLexerKind = CLK_PTHLexer;
146
147  // Notify the client, if desired, that we are in a new source file.
148  if (Callbacks) {
149    FileID FID = CurPPLexer->getFileID();
150    SourceLocation EnterLoc = SourceMgr.getLocForStartOfFile(FID);
151    SrcMgr::CharacteristicKind FileType =
152      SourceMgr.getFileCharacteristic(EnterLoc);
153    Callbacks->FileChanged(EnterLoc, PPCallbacks::EnterFile, FileType);
154  }
155}
156
157/// EnterMacro - Add a Macro to the top of the include stack and start lexing
158/// tokens from it instead of the current buffer.
159void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
160                              MacroArgs *Args) {
161  PushIncludeMacroStack();
162  CurDirLookup = 0;
163
164  if (NumCachedTokenLexers == 0) {
165    CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Args, *this));
166  } else {
167    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
168    CurTokenLexer->Init(Tok, ILEnd, Args);
169  }
170  if (CurLexerKind != CLK_LexAfterModuleImport)
171    CurLexerKind = CLK_TokenLexer;
172}
173
174/// EnterTokenStream - Add a "macro" context to the top of the include stack,
175/// which will cause the lexer to start returning the specified tokens.
176///
177/// If DisableMacroExpansion is true, tokens lexed from the token stream will
178/// not be subject to further macro expansion.  Otherwise, these tokens will
179/// be re-macro-expanded when/if expansion is enabled.
180///
181/// If OwnsTokens is false, this method assumes that the specified stream of
182/// tokens has a permanent owner somewhere, so they do not need to be copied.
183/// If it is true, it assumes the array of tokens is allocated with new[] and
184/// must be freed.
185///
186void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
187                                    bool DisableMacroExpansion,
188                                    bool OwnsTokens) {
189  // Save our current state.
190  PushIncludeMacroStack();
191  CurDirLookup = 0;
192
193  // Create a macro expander to expand from the specified token stream.
194  if (NumCachedTokenLexers == 0) {
195    CurTokenLexer.reset(new TokenLexer(Toks, NumToks, DisableMacroExpansion,
196                                       OwnsTokens, *this));
197  } else {
198    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
199    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
200  }
201  if (CurLexerKind != CLK_LexAfterModuleImport)
202    CurLexerKind = CLK_TokenLexer;
203}
204
205/// \brief Compute the relative path that names the given file relative to
206/// the given directory.
207static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
208                                const FileEntry *File,
209                                SmallString<128> &Result) {
210  Result.clear();
211
212  StringRef FilePath = File->getDir()->getName();
213  StringRef Path = FilePath;
214  while (!Path.empty()) {
215    if (const DirectoryEntry *CurDir = FM.getDirectory(Path)) {
216      if (CurDir == Dir) {
217        Result = FilePath.substr(Path.size());
218        llvm::sys::path::append(Result,
219                                llvm::sys::path::filename(File->getName()));
220        return;
221      }
222    }
223
224    Path = llvm::sys::path::parent_path(Path);
225  }
226
227  Result = File->getName();
228}
229
230/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
231/// the current file.  This either returns the EOF token or pops a level off
232/// the include stack and keeps going.
233bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
234  assert(!CurTokenLexer &&
235         "Ending a file when currently in a macro!");
236
237  // See if this file had a controlling macro.
238  if (CurPPLexer) {  // Not ending a macro, ignore it.
239    if (const IdentifierInfo *ControllingMacro =
240          CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
241      // Okay, this has a controlling macro, remember in HeaderFileInfo.
242      if (const FileEntry *FE =
243            SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))
244        HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
245    }
246  }
247
248  // Complain about reaching a true EOF within arc_cf_code_audited.
249  // We don't want to complain about reaching the end of a macro
250  // instantiation or a _Pragma.
251  if (PragmaARCCFCodeAuditedLoc.isValid() &&
252      !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
253    Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited);
254
255    // Recover by leaving immediately.
256    PragmaARCCFCodeAuditedLoc = SourceLocation();
257  }
258
259  // If this is a #include'd file, pop it off the include stack and continue
260  // lexing the #includer file.
261  if (!IncludeMacroStack.empty()) {
262
263    // If we lexed the code-completion file, act as if we reached EOF.
264    if (isCodeCompletionEnabled() && CurPPLexer &&
265        SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
266            CodeCompletionFileLoc) {
267      if (CurLexer) {
268        Result.startToken();
269        CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
270        CurLexer.reset();
271      } else {
272        assert(CurPTHLexer && "Got EOF but no current lexer set!");
273        CurPTHLexer->getEOF(Result);
274        CurPTHLexer.reset();
275      }
276
277      CurPPLexer = 0;
278      return true;
279    }
280
281    if (!isEndOfMacro && CurPPLexer &&
282        SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {
283      // Notify SourceManager to record the number of FileIDs that were created
284      // during lexing of the #include'd file.
285      unsigned NumFIDs =
286          SourceMgr.local_sloc_entry_size() -
287          CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;
288      SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
289    }
290
291    FileID ExitedFID;
292    if (Callbacks && !isEndOfMacro && CurPPLexer)
293      ExitedFID = CurPPLexer->getFileID();
294
295    // We're done with the #included file.
296    RemoveTopOfLexerStack();
297
298    // Notify the client, if desired, that we are in a new source file.
299    if (Callbacks && !isEndOfMacro && CurPPLexer) {
300      SrcMgr::CharacteristicKind FileType =
301        SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
302      Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
303                             PPCallbacks::ExitFile, FileType, ExitedFID);
304    }
305
306    // Client should lex another token.
307    return false;
308  }
309
310  // If the file ends with a newline, form the EOF token on the newline itself,
311  // rather than "on the line following it", which doesn't exist.  This makes
312  // diagnostics relating to the end of file include the last file that the user
313  // actually typed, which is goodness.
314  if (CurLexer) {
315    const char *EndPos = CurLexer->BufferEnd;
316    if (EndPos != CurLexer->BufferStart &&
317        (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
318      --EndPos;
319
320      // Handle \n\r and \r\n:
321      if (EndPos != CurLexer->BufferStart &&
322          (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
323          EndPos[-1] != EndPos[0])
324        --EndPos;
325    }
326
327    Result.startToken();
328    CurLexer->BufferPtr = EndPos;
329    CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
330
331    if (!isIncrementalProcessingEnabled())
332      // We're done with lexing.
333      CurLexer.reset();
334  } else {
335    assert(CurPTHLexer && "Got EOF but no current lexer set!");
336    CurPTHLexer->getEOF(Result);
337    CurPTHLexer.reset();
338  }
339
340  if (!isIncrementalProcessingEnabled())
341    CurPPLexer = 0;
342
343  // This is the end of the top-level file. 'WarnUnusedMacroLocs' has collected
344  // all macro locations that we need to warn because they are not used.
345  for (WarnUnusedMacroLocsTy::iterator
346         I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end(); I!=E; ++I)
347    Diag(*I, diag::pp_macro_not_used);
348
349  // If we are building a module that has an umbrella header, make sure that
350  // each of the headers within the directory covered by the umbrella header
351  // was actually included by the umbrella header.
352  if (Module *Mod = getCurrentModule()) {
353    if (Mod->getUmbrellaHeader()) {
354      SourceLocation StartLoc
355        = SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
356
357      if (getDiagnostics().getDiagnosticLevel(
358            diag::warn_uncovered_module_header,
359            StartLoc) != DiagnosticsEngine::Ignored) {
360        ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
361        typedef llvm::sys::fs::recursive_directory_iterator
362          recursive_directory_iterator;
363        const DirectoryEntry *Dir = Mod->getUmbrellaDir();
364        llvm::error_code EC;
365        for (recursive_directory_iterator Entry(Dir->getName(), EC), End;
366             Entry != End && !EC; Entry.increment(EC)) {
367          using llvm::StringSwitch;
368
369          // Check whether this entry has an extension typically associated with
370          // headers.
371          if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path()))
372                 .Cases(".h", ".H", ".hh", ".hpp", true)
373                 .Default(false))
374            continue;
375
376          if (const FileEntry *Header = getFileManager().getFile(Entry->path()))
377            if (!getSourceManager().hasFileInfo(Header)) {
378              if (!ModMap.isHeaderInUnavailableModule(Header)) {
379                // Find the relative path that would access this header.
380                SmallString<128> RelativePath;
381                computeRelativePath(FileMgr, Dir, Header, RelativePath);
382                Diag(StartLoc, diag::warn_uncovered_module_header)
383                  << RelativePath;
384              }
385            }
386        }
387      }
388    }
389  }
390
391  return true;
392}
393
394/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
395/// hits the end of its token stream.
396bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
397  assert(CurTokenLexer && !CurPPLexer &&
398         "Ending a macro when currently in a #include file!");
399
400  if (!MacroExpandingLexersStack.empty() &&
401      MacroExpandingLexersStack.back().first == CurTokenLexer.get())
402    removeCachedMacroExpandedTokensOfLastLexer();
403
404  // Delete or cache the now-dead macro expander.
405  if (NumCachedTokenLexers == TokenLexerCacheSize)
406    CurTokenLexer.reset();
407  else
408    TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer.take();
409
410  // Handle this like a #include file being popped off the stack.
411  return HandleEndOfFile(Result, true);
412}
413
414/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
415/// lexer stack.  This should only be used in situations where the current
416/// state of the top-of-stack lexer is unknown.
417void Preprocessor::RemoveTopOfLexerStack() {
418  assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
419
420  if (CurTokenLexer) {
421    // Delete or cache the now-dead macro expander.
422    if (NumCachedTokenLexers == TokenLexerCacheSize)
423      CurTokenLexer.reset();
424    else
425      TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer.take();
426  }
427
428  PopIncludeMacroStack();
429}
430
431/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
432/// comment (/##/) in microsoft mode, this method handles updating the current
433/// state, returning the token on the next source line.
434void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
435  assert(CurTokenLexer && !CurPPLexer &&
436         "Pasted comment can only be formed from macro");
437
438  // We handle this by scanning for the closest real lexer, switching it to
439  // raw mode and preprocessor mode.  This will cause it to return \n as an
440  // explicit EOD token.
441  PreprocessorLexer *FoundLexer = 0;
442  bool LexerWasInPPMode = false;
443  for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
444    IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
445    if (ISI.ThePPLexer == 0) continue;  // Scan for a real lexer.
446
447    // Once we find a real lexer, mark it as raw mode (disabling macro
448    // expansions) and preprocessor mode (return EOD).  We know that the lexer
449    // was *not* in raw mode before, because the macro that the comment came
450    // from was expanded.  However, it could have already been in preprocessor
451    // mode (#if COMMENT) in which case we have to return it to that mode and
452    // return EOD.
453    FoundLexer = ISI.ThePPLexer;
454    FoundLexer->LexingRawMode = true;
455    LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
456    FoundLexer->ParsingPreprocessorDirective = true;
457    break;
458  }
459
460  // Okay, we either found and switched over the lexer, or we didn't find a
461  // lexer.  In either case, finish off the macro the comment came from, getting
462  // the next token.
463  if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
464
465  // Discarding comments as long as we don't have EOF or EOD.  This 'comments
466  // out' the rest of the line, including any tokens that came from other macros
467  // that were active, as in:
468  //  #define submacro a COMMENT b
469  //    submacro c
470  // which should lex to 'a' only: 'b' and 'c' should be removed.
471  while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof))
472    Lex(Tok);
473
474  // If we got an eod token, then we successfully found the end of the line.
475  if (Tok.is(tok::eod)) {
476    assert(FoundLexer && "Can't get end of line without an active lexer");
477    // Restore the lexer back to normal mode instead of raw mode.
478    FoundLexer->LexingRawMode = false;
479
480    // If the lexer was already in preprocessor mode, just return the EOD token
481    // to finish the preprocessor line.
482    if (LexerWasInPPMode) return;
483
484    // Otherwise, switch out of PP mode and return the next lexed token.
485    FoundLexer->ParsingPreprocessorDirective = false;
486    return Lex(Tok);
487  }
488
489  // If we got an EOF token, then we reached the end of the token stream but
490  // didn't find an explicit \n.  This can only happen if there was no lexer
491  // active (an active lexer would return EOD at EOF if there was no \n in
492  // preprocessor directive mode), so just return EOF as our token.
493  assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
494}
495