1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14//   -H       - Print the name of each header file used.
15//   -d[DNI] - Dump various things.
16//   -fworking-directory - #line's with preprocessor's working dir.
17//   -fpreprocessed
18//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19//   -W*
20//   -w
21//
22// Messages to emit:
23//   "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/FileSystemStatCache.h"
31#include "clang/Basic/IdentifierTable.h"
32#include "clang/Basic/LLVM.h"
33#include "clang/Basic/LangOptions.h"
34#include "clang/Basic/Module.h"
35#include "clang/Basic/SourceLocation.h"
36#include "clang/Basic/SourceManager.h"
37#include "clang/Basic/TargetInfo.h"
38#include "clang/Lex/CodeCompletionHandler.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/Pragma.h"
48#include "clang/Lex/PreprocessingRecord.h"
49#include "clang/Lex/PreprocessorLexer.h"
50#include "clang/Lex/PreprocessorOptions.h"
51#include "clang/Lex/ScratchBuffer.h"
52#include "clang/Lex/Token.h"
53#include "clang/Lex/TokenLexer.h"
54#include "llvm/ADT/APInt.h"
55#include "llvm/ADT/ArrayRef.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/STLExtras.h"
58#include "llvm/ADT/SmallString.h"
59#include "llvm/ADT/SmallVector.h"
60#include "llvm/ADT/StringRef.h"
61#include "llvm/ADT/StringSwitch.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/MemoryBuffer.h"
65#include "llvm/Support/raw_ostream.h"
66#include <algorithm>
67#include <cassert>
68#include <memory>
69#include <string>
70#include <utility>
71#include <vector>
72
73using namespace clang;
74
75LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
76
77ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
78
79Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
80                           DiagnosticsEngine &diags, LangOptions &opts,
81                           SourceManager &SM, HeaderSearch &Headers,
82                           ModuleLoader &TheModuleLoader,
83                           IdentifierInfoLookup *IILookup, bool OwnsHeaders,
84                           TranslationUnitKind TUKind)
85    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
86      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
87      ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
88      TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
89      // As the language options may have not been loaded yet (when
90      // deserializing an ASTUnit), adding keywords to the identifier table is
91      // deferred to Preprocessor::Initialize().
92      Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
93      TUKind(TUKind), SkipMainFilePreamble(0, true),
94      CurSubmoduleState(&NullSubmoduleState) {
95  OwnsHeaderSearch = OwnsHeaders;
96
97  // Default to discarding comments.
98  KeepComments = false;
99  KeepMacroComments = false;
100  SuppressIncludeNotFoundError = false;
101
102  // Macro expansion is enabled.
103  DisableMacroExpansion = false;
104  MacroExpansionInDirectivesOverride = false;
105  InMacroArgs = false;
106  ArgMacro = nullptr;
107  InMacroArgPreExpansion = false;
108  NumCachedTokenLexers = 0;
109  PragmasEnabled = true;
110  ParsingIfOrElifDirective = false;
111  PreprocessedOutput = false;
112
113  // We haven't read anything from the external source.
114  ReadMacrosFromExternalSource = false;
115
116  BuiltinInfo = std::make_unique<Builtin::Context>();
117
118  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
119  // a macro. They get unpoisoned where it is allowed.
120  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
121  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
122  if (getLangOpts().CPlusPlus2a) {
123    (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
124    SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
125  } else {
126    Ident__VA_OPT__ = nullptr;
127  }
128
129  // Initialize the pragma handlers.
130  RegisterBuiltinPragmas();
131
132  // Initialize builtin macros like __LINE__ and friends.
133  RegisterBuiltinMacros();
134
135  if(LangOpts.Borland) {
136    Ident__exception_info        = getIdentifierInfo("_exception_info");
137    Ident___exception_info       = getIdentifierInfo("__exception_info");
138    Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
139    Ident__exception_code        = getIdentifierInfo("_exception_code");
140    Ident___exception_code       = getIdentifierInfo("__exception_code");
141    Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
142    Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
143    Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
144    Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
145  } else {
146    Ident__exception_info = Ident__exception_code = nullptr;
147    Ident__abnormal_termination = Ident___exception_info = nullptr;
148    Ident___exception_code = Ident___abnormal_termination = nullptr;
149    Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
150    Ident_AbnormalTermination = nullptr;
151  }
152
153  // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
154  if (usingPCHWithPragmaHdrStop())
155    SkippingUntilPragmaHdrStop = true;
156
157  // If using a PCH with a through header, start skipping tokens.
158  if (!this->PPOpts->PCHThroughHeader.empty() &&
159      !this->PPOpts->ImplicitPCHInclude.empty())
160    SkippingUntilPCHThroughHeader = true;
161
162  if (this->PPOpts->GeneratePreamble)
163    PreambleConditionalStack.startRecording();
164
165  ExcludedConditionalDirectiveSkipMappings =
166      this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
167  if (ExcludedConditionalDirectiveSkipMappings)
168    ExcludedConditionalDirectiveSkipMappings->clear();
169}
170
171Preprocessor::~Preprocessor() {
172  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
173
174  IncludeMacroStack.clear();
175
176  // Destroy any macro definitions.
177  while (MacroInfoChain *I = MIChainHead) {
178    MIChainHead = I->Next;
179    I->~MacroInfoChain();
180  }
181
182  // Free any cached macro expanders.
183  // This populates MacroArgCache, so all TokenLexers need to be destroyed
184  // before the code below that frees up the MacroArgCache list.
185  std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
186  CurTokenLexer.reset();
187
188  // Free any cached MacroArgs.
189  for (MacroArgs *ArgList = MacroArgCache; ArgList;)
190    ArgList = ArgList->deallocate();
191
192  // Delete the header search info, if we own it.
193  if (OwnsHeaderSearch)
194    delete &HeaderInfo;
195}
196
197void Preprocessor::Initialize(const TargetInfo &Target,
198                              const TargetInfo *AuxTarget) {
199  assert((!this->Target || this->Target == &Target) &&
200         "Invalid override of target information");
201  this->Target = &Target;
202
203  assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
204         "Invalid override of aux target information.");
205  this->AuxTarget = AuxTarget;
206
207  // Initialize information about built-ins.
208  BuiltinInfo->InitializeTarget(Target, AuxTarget);
209  HeaderInfo.setTarget(Target);
210
211  // Populate the identifier table with info about keywords for the current language.
212  Identifiers.AddKeywords(LangOpts);
213}
214
215void Preprocessor::InitializeForModelFile() {
216  NumEnteredSourceFiles = 0;
217
218  // Reset pragmas
219  PragmaHandlersBackup = std::move(PragmaHandlers);
220  PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
221  RegisterBuiltinPragmas();
222
223  // Reset PredefinesFileID
224  PredefinesFileID = FileID();
225}
226
227void Preprocessor::FinalizeForModelFile() {
228  NumEnteredSourceFiles = 1;
229
230  PragmaHandlers = std::move(PragmaHandlersBackup);
231}
232
233void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
234  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
235               << getSpelling(Tok) << "'";
236
237  if (!DumpFlags) return;
238
239  llvm::errs() << "\t";
240  if (Tok.isAtStartOfLine())
241    llvm::errs() << " [StartOfLine]";
242  if (Tok.hasLeadingSpace())
243    llvm::errs() << " [LeadingSpace]";
244  if (Tok.isExpandDisabled())
245    llvm::errs() << " [ExpandDisabled]";
246  if (Tok.needsCleaning()) {
247    const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
248    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
249                 << "']";
250  }
251
252  llvm::errs() << "\tLoc=<";
253  DumpLocation(Tok.getLocation());
254  llvm::errs() << ">";
255}
256
257void Preprocessor::DumpLocation(SourceLocation Loc) const {
258  Loc.print(llvm::errs(), SourceMgr);
259}
260
261void Preprocessor::DumpMacro(const MacroInfo &MI) const {
262  llvm::errs() << "MACRO: ";
263  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
264    DumpToken(MI.getReplacementToken(i));
265    llvm::errs() << "  ";
266  }
267  llvm::errs() << "\n";
268}
269
270void Preprocessor::PrintStats() {
271  llvm::errs() << "\n*** Preprocessor Stats:\n";
272  llvm::errs() << NumDirectives << " directives found:\n";
273  llvm::errs() << "  " << NumDefined << " #define.\n";
274  llvm::errs() << "  " << NumUndefined << " #undef.\n";
275  llvm::errs() << "  #include/#include_next/#import:\n";
276  llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
277  llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
278  llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
279  llvm::errs() << "  " << NumElse << " #else/#elif.\n";
280  llvm::errs() << "  " << NumEndif << " #endif.\n";
281  llvm::errs() << "  " << NumPragma << " #pragma.\n";
282  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
283
284  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
285             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
286             << NumFastMacroExpanded << " on the fast path.\n";
287  llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
288             << " token paste (##) operations performed, "
289             << NumFastTokenPaste << " on the fast path.\n";
290
291  llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
292
293  llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
294  llvm::errs() << "\n  Macro Expanded Tokens: "
295               << llvm::capacity_in_bytes(MacroExpandedTokens);
296  llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
297  // FIXME: List information for all submodules.
298  llvm::errs() << "\n  Macros: "
299               << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
300  llvm::errs() << "\n  #pragma push_macro Info: "
301               << llvm::capacity_in_bytes(PragmaPushMacroInfo);
302  llvm::errs() << "\n  Poison Reasons: "
303               << llvm::capacity_in_bytes(PoisonReasons);
304  llvm::errs() << "\n  Comment Handlers: "
305               << llvm::capacity_in_bytes(CommentHandlers) << "\n";
306}
307
308Preprocessor::macro_iterator
309Preprocessor::macro_begin(bool IncludeExternalMacros) const {
310  if (IncludeExternalMacros && ExternalSource &&
311      !ReadMacrosFromExternalSource) {
312    ReadMacrosFromExternalSource = true;
313    ExternalSource->ReadDefinedMacros();
314  }
315
316  // Make sure we cover all macros in visible modules.
317  for (const ModuleMacro &Macro : ModuleMacros)
318    CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
319
320  return CurSubmoduleState->Macros.begin();
321}
322
323size_t Preprocessor::getTotalMemory() const {
324  return BP.getTotalMemory()
325    + llvm::capacity_in_bytes(MacroExpandedTokens)
326    + Predefines.capacity() /* Predefines buffer. */
327    // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
328    // and ModuleMacros.
329    + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
330    + llvm::capacity_in_bytes(PragmaPushMacroInfo)
331    + llvm::capacity_in_bytes(PoisonReasons)
332    + llvm::capacity_in_bytes(CommentHandlers);
333}
334
335Preprocessor::macro_iterator
336Preprocessor::macro_end(bool IncludeExternalMacros) const {
337  if (IncludeExternalMacros && ExternalSource &&
338      !ReadMacrosFromExternalSource) {
339    ReadMacrosFromExternalSource = true;
340    ExternalSource->ReadDefinedMacros();
341  }
342
343  return CurSubmoduleState->Macros.end();
344}
345
346/// Compares macro tokens with a specified token value sequence.
347static bool MacroDefinitionEquals(const MacroInfo *MI,
348                                  ArrayRef<TokenValue> Tokens) {
349  return Tokens.size() == MI->getNumTokens() &&
350      std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
351}
352
353StringRef Preprocessor::getLastMacroWithSpelling(
354                                    SourceLocation Loc,
355                                    ArrayRef<TokenValue> Tokens) const {
356  SourceLocation BestLocation;
357  StringRef BestSpelling;
358  for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
359       I != E; ++I) {
360    const MacroDirective::DefInfo
361      Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
362    if (!Def || !Def.getMacroInfo())
363      continue;
364    if (!Def.getMacroInfo()->isObjectLike())
365      continue;
366    if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
367      continue;
368    SourceLocation Location = Def.getLocation();
369    // Choose the macro defined latest.
370    if (BestLocation.isInvalid() ||
371        (Location.isValid() &&
372         SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
373      BestLocation = Location;
374      BestSpelling = I->first->getName();
375    }
376  }
377  return BestSpelling;
378}
379
380void Preprocessor::recomputeCurLexerKind() {
381  if (CurLexer)
382    CurLexerKind = CLK_Lexer;
383  else if (CurTokenLexer)
384    CurLexerKind = CLK_TokenLexer;
385  else
386    CurLexerKind = CLK_CachingLexer;
387}
388
389bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
390                                          unsigned CompleteLine,
391                                          unsigned CompleteColumn) {
392  assert(File);
393  assert(CompleteLine && CompleteColumn && "Starts from 1:1");
394  assert(!CodeCompletionFile && "Already set");
395
396  using llvm::MemoryBuffer;
397
398  // Load the actual file's contents.
399  bool Invalid = false;
400  const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
401  if (Invalid)
402    return true;
403
404  // Find the byte position of the truncation point.
405  const char *Position = Buffer->getBufferStart();
406  for (unsigned Line = 1; Line < CompleteLine; ++Line) {
407    for (; *Position; ++Position) {
408      if (*Position != '\r' && *Position != '\n')
409        continue;
410
411      // Eat \r\n or \n\r as a single line.
412      if ((Position[1] == '\r' || Position[1] == '\n') &&
413          Position[0] != Position[1])
414        ++Position;
415      ++Position;
416      break;
417    }
418  }
419
420  Position += CompleteColumn - 1;
421
422  // If pointing inside the preamble, adjust the position at the beginning of
423  // the file after the preamble.
424  if (SkipMainFilePreamble.first &&
425      SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
426    if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
427      Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
428  }
429
430  if (Position > Buffer->getBufferEnd())
431    Position = Buffer->getBufferEnd();
432
433  CodeCompletionFile = File;
434  CodeCompletionOffset = Position - Buffer->getBufferStart();
435
436  auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
437      Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
438  char *NewBuf = NewBuffer->getBufferStart();
439  char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
440  *NewPos = '\0';
441  std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
442  SourceMgr.overrideFileContents(File, std::move(NewBuffer));
443
444  return false;
445}
446
447void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
448                                            bool IsAngled) {
449  if (CodeComplete)
450    CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
451  setCodeCompletionReached();
452}
453
454void Preprocessor::CodeCompleteNaturalLanguage() {
455  if (CodeComplete)
456    CodeComplete->CodeCompleteNaturalLanguage();
457  setCodeCompletionReached();
458}
459
460/// getSpelling - This method is used to get the spelling of a token into a
461/// SmallVector. Note that the returned StringRef may not point to the
462/// supplied buffer if a copy can be avoided.
463StringRef Preprocessor::getSpelling(const Token &Tok,
464                                          SmallVectorImpl<char> &Buffer,
465                                          bool *Invalid) const {
466  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
467  if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
468    // Try the fast path.
469    if (const IdentifierInfo *II = Tok.getIdentifierInfo())
470      return II->getName();
471  }
472
473  // Resize the buffer if we need to copy into it.
474  if (Tok.needsCleaning())
475    Buffer.resize(Tok.getLength());
476
477  const char *Ptr = Buffer.data();
478  unsigned Len = getSpelling(Tok, Ptr, Invalid);
479  return StringRef(Ptr, Len);
480}
481
482/// CreateString - Plop the specified string into a scratch buffer and return a
483/// location for it.  If specified, the source location provides a source
484/// location for the token.
485void Preprocessor::CreateString(StringRef Str, Token &Tok,
486                                SourceLocation ExpansionLocStart,
487                                SourceLocation ExpansionLocEnd) {
488  Tok.setLength(Str.size());
489
490  const char *DestPtr;
491  SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
492
493  if (ExpansionLocStart.isValid())
494    Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
495                                       ExpansionLocEnd, Str.size());
496  Tok.setLocation(Loc);
497
498  // If this is a raw identifier or a literal token, set the pointer data.
499  if (Tok.is(tok::raw_identifier))
500    Tok.setRawIdentifierData(DestPtr);
501  else if (Tok.isLiteral())
502    Tok.setLiteralData(DestPtr);
503}
504
505SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
506  auto &SM = getSourceManager();
507  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
508  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
509  bool Invalid = false;
510  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
511  if (Invalid)
512    return SourceLocation();
513
514  // FIXME: We could consider re-using spelling for tokens we see repeatedly.
515  const char *DestPtr;
516  SourceLocation Spelling =
517      ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
518  return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
519}
520
521Module *Preprocessor::getCurrentModule() {
522  if (!getLangOpts().isCompilingModule())
523    return nullptr;
524
525  return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
526}
527
528//===----------------------------------------------------------------------===//
529// Preprocessor Initialization Methods
530//===----------------------------------------------------------------------===//
531
532/// EnterMainSourceFile - Enter the specified FileID as the main source file,
533/// which implicitly adds the builtin defines etc.
534void Preprocessor::EnterMainSourceFile() {
535  // We do not allow the preprocessor to reenter the main file.  Doing so will
536  // cause FileID's to accumulate information from both runs (e.g. #line
537  // information) and predefined macros aren't guaranteed to be set properly.
538  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
539  FileID MainFileID = SourceMgr.getMainFileID();
540
541  // If MainFileID is loaded it means we loaded an AST file, no need to enter
542  // a main file.
543  if (!SourceMgr.isLoadedFileID(MainFileID)) {
544    // Enter the main file source buffer.
545    EnterSourceFile(MainFileID, nullptr, SourceLocation());
546
547    // If we've been asked to skip bytes in the main file (e.g., as part of a
548    // precompiled preamble), do so now.
549    if (SkipMainFilePreamble.first > 0)
550      CurLexer->SetByteOffset(SkipMainFilePreamble.first,
551                              SkipMainFilePreamble.second);
552
553    // Tell the header info that the main file was entered.  If the file is later
554    // #imported, it won't be re-entered.
555    if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
556      HeaderInfo.IncrementIncludeCount(FE);
557  }
558
559  // Preprocess Predefines to populate the initial preprocessor state.
560  std::unique_ptr<llvm::MemoryBuffer> SB =
561    llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
562  assert(SB && "Cannot create predefined source buffer");
563  FileID FID = SourceMgr.createFileID(std::move(SB));
564  assert(FID.isValid() && "Could not create FileID for predefines?");
565  setPredefinesFileID(FID);
566
567  // Start parsing the predefines.
568  EnterSourceFile(FID, nullptr, SourceLocation());
569
570  if (!PPOpts->PCHThroughHeader.empty()) {
571    // Lookup and save the FileID for the through header. If it isn't found
572    // in the search path, it's a fatal error.
573    const DirectoryLookup *CurDir;
574    Optional<FileEntryRef> File = LookupFile(
575        SourceLocation(), PPOpts->PCHThroughHeader,
576        /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
577        /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
578        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
579        /*IsFrameworkFound=*/nullptr);
580    if (!File) {
581      Diag(SourceLocation(), diag::err_pp_through_header_not_found)
582          << PPOpts->PCHThroughHeader;
583      return;
584    }
585    setPCHThroughHeaderFileID(
586        SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
587  }
588
589  // Skip tokens from the Predefines and if needed the main file.
590  if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
591      (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
592    SkipTokensWhileUsingPCH();
593}
594
595void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
596  assert(PCHThroughHeaderFileID.isInvalid() &&
597         "PCHThroughHeaderFileID already set!");
598  PCHThroughHeaderFileID = FID;
599}
600
601bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
602  assert(PCHThroughHeaderFileID.isValid() &&
603         "Invalid PCH through header FileID");
604  return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
605}
606
607bool Preprocessor::creatingPCHWithThroughHeader() {
608  return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
609         PCHThroughHeaderFileID.isValid();
610}
611
612bool Preprocessor::usingPCHWithThroughHeader() {
613  return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
614         PCHThroughHeaderFileID.isValid();
615}
616
617bool Preprocessor::creatingPCHWithPragmaHdrStop() {
618  return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
619}
620
621bool Preprocessor::usingPCHWithPragmaHdrStop() {
622  return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
623}
624
625/// Skip tokens until after the #include of the through header or
626/// until after a #pragma hdrstop is seen. Tokens in the predefines file
627/// and the main file may be skipped. If the end of the predefines file
628/// is reached, skipping continues into the main file. If the end of the
629/// main file is reached, it's a fatal error.
630void Preprocessor::SkipTokensWhileUsingPCH() {
631  bool ReachedMainFileEOF = false;
632  bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
633  bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
634  Token Tok;
635  while (true) {
636    bool InPredefines =
637        (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
638    switch (CurLexerKind) {
639    case CLK_Lexer:
640      CurLexer->Lex(Tok);
641     break;
642    case CLK_TokenLexer:
643      CurTokenLexer->Lex(Tok);
644      break;
645    case CLK_CachingLexer:
646      CachingLex(Tok);
647      break;
648    case CLK_LexAfterModuleImport:
649      LexAfterModuleImport(Tok);
650      break;
651    }
652    if (Tok.is(tok::eof) && !InPredefines) {
653      ReachedMainFileEOF = true;
654      break;
655    }
656    if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
657      break;
658    if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
659      break;
660  }
661  if (ReachedMainFileEOF) {
662    if (UsingPCHThroughHeader)
663      Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
664          << PPOpts->PCHThroughHeader << 1;
665    else if (!PPOpts->PCHWithHdrStopCreate)
666      Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
667  }
668}
669
670void Preprocessor::replayPreambleConditionalStack() {
671  // Restore the conditional stack from the preamble, if there is one.
672  if (PreambleConditionalStack.isReplaying()) {
673    assert(CurPPLexer &&
674           "CurPPLexer is null when calling replayPreambleConditionalStack.");
675    CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
676    PreambleConditionalStack.doneReplaying();
677    if (PreambleConditionalStack.reachedEOFWhileSkipping())
678      SkipExcludedConditionalBlock(
679          PreambleConditionalStack.SkipInfo->HashTokenLoc,
680          PreambleConditionalStack.SkipInfo->IfTokenLoc,
681          PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
682          PreambleConditionalStack.SkipInfo->FoundElse,
683          PreambleConditionalStack.SkipInfo->ElseLoc);
684  }
685}
686
687void Preprocessor::EndSourceFile() {
688  // Notify the client that we reached the end of the source file.
689  if (Callbacks)
690    Callbacks->EndOfMainFile();
691}
692
693//===----------------------------------------------------------------------===//
694// Lexer Event Handling.
695//===----------------------------------------------------------------------===//
696
697/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
698/// identifier information for the token and install it into the token,
699/// updating the token kind accordingly.
700IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
701  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
702
703  // Look up this token, see if it is a macro, or if it is a language keyword.
704  IdentifierInfo *II;
705  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
706    // No cleaning needed, just use the characters from the lexed buffer.
707    II = getIdentifierInfo(Identifier.getRawIdentifier());
708  } else {
709    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
710    SmallString<64> IdentifierBuffer;
711    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
712
713    if (Identifier.hasUCN()) {
714      SmallString<64> UCNIdentifierBuffer;
715      expandUCNs(UCNIdentifierBuffer, CleanedStr);
716      II = getIdentifierInfo(UCNIdentifierBuffer);
717    } else {
718      II = getIdentifierInfo(CleanedStr);
719    }
720  }
721
722  // Update the token info (identifier info and appropriate token kind).
723  Identifier.setIdentifierInfo(II);
724  if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
725      getSourceManager().isInSystemHeader(Identifier.getLocation()))
726    Identifier.setKind(tok::identifier);
727  else
728    Identifier.setKind(II->getTokenID());
729
730  return II;
731}
732
733void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
734  PoisonReasons[II] = DiagID;
735}
736
737void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
738  assert(Ident__exception_code && Ident__exception_info);
739  assert(Ident___exception_code && Ident___exception_info);
740  Ident__exception_code->setIsPoisoned(Poison);
741  Ident___exception_code->setIsPoisoned(Poison);
742  Ident_GetExceptionCode->setIsPoisoned(Poison);
743  Ident__exception_info->setIsPoisoned(Poison);
744  Ident___exception_info->setIsPoisoned(Poison);
745  Ident_GetExceptionInfo->setIsPoisoned(Poison);
746  Ident__abnormal_termination->setIsPoisoned(Poison);
747  Ident___abnormal_termination->setIsPoisoned(Poison);
748  Ident_AbnormalTermination->setIsPoisoned(Poison);
749}
750
751void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
752  assert(Identifier.getIdentifierInfo() &&
753         "Can't handle identifiers without identifier info!");
754  llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
755    PoisonReasons.find(Identifier.getIdentifierInfo());
756  if(it == PoisonReasons.end())
757    Diag(Identifier, diag::err_pp_used_poisoned_id);
758  else
759    Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
760}
761
762/// Returns a diagnostic message kind for reporting a future keyword as
763/// appropriate for the identifier and specified language.
764static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
765                                          const LangOptions &LangOpts) {
766  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
767
768  if (LangOpts.CPlusPlus)
769    return llvm::StringSwitch<diag::kind>(II.getName())
770#define CXX11_KEYWORD(NAME, FLAGS)                                             \
771        .Case(#NAME, diag::warn_cxx11_keyword)
772#define CXX2A_KEYWORD(NAME, FLAGS)                                             \
773        .Case(#NAME, diag::warn_cxx2a_keyword)
774#include "clang/Basic/TokenKinds.def"
775        ;
776
777  llvm_unreachable(
778      "Keyword not known to come from a newer Standard or proposed Standard");
779}
780
781void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
782  assert(II.isOutOfDate() && "not out of date");
783  getExternalSource()->updateOutOfDateIdentifier(II);
784}
785
786/// HandleIdentifier - This callback is invoked when the lexer reads an
787/// identifier.  This callback looks up the identifier in the map and/or
788/// potentially macro expands it or turns it into a named token (like 'for').
789///
790/// Note that callers of this method are guarded by checking the
791/// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
792/// IdentifierInfo methods that compute these properties will need to change to
793/// match.
794bool Preprocessor::HandleIdentifier(Token &Identifier) {
795  assert(Identifier.getIdentifierInfo() &&
796         "Can't handle identifiers without identifier info!");
797
798  IdentifierInfo &II = *Identifier.getIdentifierInfo();
799
800  // If the information about this identifier is out of date, update it from
801  // the external source.
802  // We have to treat __VA_ARGS__ in a special way, since it gets
803  // serialized with isPoisoned = true, but our preprocessor may have
804  // unpoisoned it if we're defining a C99 macro.
805  if (II.isOutOfDate()) {
806    bool CurrentIsPoisoned = false;
807    const bool IsSpecialVariadicMacro =
808        &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
809    if (IsSpecialVariadicMacro)
810      CurrentIsPoisoned = II.isPoisoned();
811
812    updateOutOfDateIdentifier(II);
813    Identifier.setKind(II.getTokenID());
814
815    if (IsSpecialVariadicMacro)
816      II.setIsPoisoned(CurrentIsPoisoned);
817  }
818
819  // If this identifier was poisoned, and if it was not produced from a macro
820  // expansion, emit an error.
821  if (II.isPoisoned() && CurPPLexer) {
822    HandlePoisonedIdentifier(Identifier);
823  }
824
825  // If this is a macro to be expanded, do it.
826  if (MacroDefinition MD = getMacroDefinition(&II)) {
827    auto *MI = MD.getMacroInfo();
828    assert(MI && "macro definition with no macro info?");
829    if (!DisableMacroExpansion) {
830      if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
831        // C99 6.10.3p10: If the preprocessing token immediately after the
832        // macro name isn't a '(', this macro should not be expanded.
833        if (!MI->isFunctionLike() || isNextPPTokenLParen())
834          return HandleMacroExpandedIdentifier(Identifier, MD);
835      } else {
836        // C99 6.10.3.4p2 says that a disabled macro may never again be
837        // expanded, even if it's in a context where it could be expanded in the
838        // future.
839        Identifier.setFlag(Token::DisableExpand);
840        if (MI->isObjectLike() || isNextPPTokenLParen())
841          Diag(Identifier, diag::pp_disabled_macro_expansion);
842      }
843    }
844  }
845
846  // If this identifier is a keyword in a newer Standard or proposed Standard,
847  // produce a warning. Don't warn if we're not considering macro expansion,
848  // since this identifier might be the name of a macro.
849  // FIXME: This warning is disabled in cases where it shouldn't be, like
850  //   "#define constexpr constexpr", "int constexpr;"
851  if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
852    Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
853        << II.getName();
854    // Don't diagnose this keyword again in this translation unit.
855    II.setIsFutureCompatKeyword(false);
856  }
857
858  // If this is an extension token, diagnose its use.
859  // We avoid diagnosing tokens that originate from macro definitions.
860  // FIXME: This warning is disabled in cases where it shouldn't be,
861  // like "#define TY typeof", "TY(1) x".
862  if (II.isExtensionToken() && !DisableMacroExpansion)
863    Diag(Identifier, diag::ext_token_used);
864
865  // If this is the 'import' contextual keyword following an '@', note
866  // that the next token indicates a module name.
867  //
868  // Note that we do not treat 'import' as a contextual
869  // keyword when we're in a caching lexer, because caching lexers only get
870  // used in contexts where import declarations are disallowed.
871  //
872  // Likewise if this is the C++ Modules TS import keyword.
873  if (((LastTokenWasAt && II.isModulesImport()) ||
874       Identifier.is(tok::kw_import)) &&
875      !InMacroArgs && !DisableMacroExpansion &&
876      (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
877      CurLexerKind != CLK_CachingLexer) {
878    ModuleImportLoc = Identifier.getLocation();
879    ModuleImportPath.clear();
880    ModuleImportExpectsIdentifier = true;
881    CurLexerKind = CLK_LexAfterModuleImport;
882  }
883  return true;
884}
885
886void Preprocessor::Lex(Token &Result) {
887  ++LexLevel;
888
889  // We loop here until a lex function returns a token; this avoids recursion.
890  bool ReturnedToken;
891  do {
892    switch (CurLexerKind) {
893    case CLK_Lexer:
894      ReturnedToken = CurLexer->Lex(Result);
895      break;
896    case CLK_TokenLexer:
897      ReturnedToken = CurTokenLexer->Lex(Result);
898      break;
899    case CLK_CachingLexer:
900      CachingLex(Result);
901      ReturnedToken = true;
902      break;
903    case CLK_LexAfterModuleImport:
904      ReturnedToken = LexAfterModuleImport(Result);
905      break;
906    }
907  } while (!ReturnedToken);
908
909  if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
910    // Remember the identifier before code completion token.
911    setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
912    setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
913    // Set IdenfitierInfo to null to avoid confusing code that handles both
914    // identifiers and completion tokens.
915    Result.setIdentifierInfo(nullptr);
916  }
917
918  // Update ImportSeqState to track our position within a C++20 import-seq
919  // if this token is being produced as a result of phase 4 of translation.
920  if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
921      !Result.getFlag(Token::IsReinjected)) {
922    switch (Result.getKind()) {
923    case tok::l_paren: case tok::l_square: case tok::l_brace:
924      ImportSeqState.handleOpenBracket();
925      break;
926    case tok::r_paren: case tok::r_square:
927      ImportSeqState.handleCloseBracket();
928      break;
929    case tok::r_brace:
930      ImportSeqState.handleCloseBrace();
931      break;
932    case tok::semi:
933      ImportSeqState.handleSemi();
934      break;
935    case tok::header_name:
936    case tok::annot_header_unit:
937      ImportSeqState.handleHeaderName();
938      break;
939    case tok::kw_export:
940      ImportSeqState.handleExport();
941      break;
942    case tok::identifier:
943      if (Result.getIdentifierInfo()->isModulesImport()) {
944        ImportSeqState.handleImport();
945        if (ImportSeqState.afterImportSeq()) {
946          ModuleImportLoc = Result.getLocation();
947          ModuleImportPath.clear();
948          ModuleImportExpectsIdentifier = true;
949          CurLexerKind = CLK_LexAfterModuleImport;
950        }
951        break;
952      }
953      LLVM_FALLTHROUGH;
954    default:
955      ImportSeqState.handleMisc();
956      break;
957    }
958  }
959
960  LastTokenWasAt = Result.is(tok::at);
961  --LexLevel;
962  if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
963    OnToken(Result);
964}
965
966/// Lex a header-name token (including one formed from header-name-tokens if
967/// \p AllowConcatenation is \c true).
968///
969/// \param FilenameTok Filled in with the next token. On success, this will
970///        be either a header_name token. On failure, it will be whatever other
971///        token was found instead.
972/// \param AllowMacroExpansion If \c true, allow the header name to be formed
973///        by macro expansion (concatenating tokens as necessary if the first
974///        token is a '<').
975/// \return \c true if we reached EOD or EOF while looking for a > token in
976///         a concatenated header name and diagnosed it. \c false otherwise.
977bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
978  // Lex using header-name tokenization rules if tokens are being lexed from
979  // a file. Just grab a token normally if we're in a macro expansion.
980  if (CurPPLexer)
981    CurPPLexer->LexIncludeFilename(FilenameTok);
982  else
983    Lex(FilenameTok);
984
985  // This could be a <foo/bar.h> file coming from a macro expansion.  In this
986  // case, glue the tokens together into an angle_string_literal token.
987  SmallString<128> FilenameBuffer;
988  if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
989    bool StartOfLine = FilenameTok.isAtStartOfLine();
990    bool LeadingSpace = FilenameTok.hasLeadingSpace();
991    bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
992
993    SourceLocation Start = FilenameTok.getLocation();
994    SourceLocation End;
995    FilenameBuffer.push_back('<');
996
997    // Consume tokens until we find a '>'.
998    // FIXME: A header-name could be formed starting or ending with an
999    // alternative token. It's not clear whether that's ill-formed in all
1000    // cases.
1001    while (FilenameTok.isNot(tok::greater)) {
1002      Lex(FilenameTok);
1003      if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1004        Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1005        Diag(Start, diag::note_matching) << tok::less;
1006        return true;
1007      }
1008
1009      End = FilenameTok.getLocation();
1010
1011      // FIXME: Provide code completion for #includes.
1012      if (FilenameTok.is(tok::code_completion)) {
1013        setCodeCompletionReached();
1014        Lex(FilenameTok);
1015        continue;
1016      }
1017
1018      // Append the spelling of this token to the buffer. If there was a space
1019      // before it, add it now.
1020      if (FilenameTok.hasLeadingSpace())
1021        FilenameBuffer.push_back(' ');
1022
1023      // Get the spelling of the token, directly into FilenameBuffer if
1024      // possible.
1025      size_t PreAppendSize = FilenameBuffer.size();
1026      FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1027
1028      const char *BufPtr = &FilenameBuffer[PreAppendSize];
1029      unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1030
1031      // If the token was spelled somewhere else, copy it into FilenameBuffer.
1032      if (BufPtr != &FilenameBuffer[PreAppendSize])
1033        memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1034
1035      // Resize FilenameBuffer to the correct size.
1036      if (FilenameTok.getLength() != ActualLen)
1037        FilenameBuffer.resize(PreAppendSize + ActualLen);
1038    }
1039
1040    FilenameTok.startToken();
1041    FilenameTok.setKind(tok::header_name);
1042    FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1043    FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1044    FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1045    CreateString(FilenameBuffer, FilenameTok, Start, End);
1046  } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1047    // Convert a string-literal token of the form " h-char-sequence "
1048    // (produced by macro expansion) into a header-name token.
1049    //
1050    // The rules for header-names don't quite match the rules for
1051    // string-literals, but all the places where they differ result in
1052    // undefined behavior, so we can and do treat them the same.
1053    //
1054    // A string-literal with a prefix or suffix is not translated into a
1055    // header-name. This could theoretically be observable via the C++20
1056    // context-sensitive header-name formation rules.
1057    StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1058    if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1059      FilenameTok.setKind(tok::header_name);
1060  }
1061
1062  return false;
1063}
1064
1065/// Collect the tokens of a C++20 pp-import-suffix.
1066void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1067  // FIXME: For error recovery, consider recognizing attribute syntax here
1068  // and terminating / diagnosing a missing semicolon if we find anything
1069  // else? (Can we leave that to the parser?)
1070  unsigned BracketDepth = 0;
1071  while (true) {
1072    Toks.emplace_back();
1073    Lex(Toks.back());
1074
1075    switch (Toks.back().getKind()) {
1076    case tok::l_paren: case tok::l_square: case tok::l_brace:
1077      ++BracketDepth;
1078      break;
1079
1080    case tok::r_paren: case tok::r_square: case tok::r_brace:
1081      if (BracketDepth == 0)
1082        return;
1083      --BracketDepth;
1084      break;
1085
1086    case tok::semi:
1087      if (BracketDepth == 0)
1088        return;
1089    break;
1090
1091    case tok::eof:
1092      return;
1093
1094    default:
1095      break;
1096    }
1097  }
1098}
1099
1100
1101/// Lex a token following the 'import' contextual keyword.
1102///
1103///     pp-import: [C++20]
1104///           import header-name pp-import-suffix[opt] ;
1105///           import header-name-tokens pp-import-suffix[opt] ;
1106/// [ObjC]    @ import module-name ;
1107/// [Clang]   import module-name ;
1108///
1109///     header-name-tokens:
1110///           string-literal
1111///           < [any sequence of preprocessing-tokens other than >] >
1112///
1113///     module-name:
1114///           module-name-qualifier[opt] identifier
1115///
1116///     module-name-qualifier
1117///           module-name-qualifier[opt] identifier .
1118///
1119/// We respond to a pp-import by importing macros from the named module.
1120bool Preprocessor::LexAfterModuleImport(Token &Result) {
1121  // Figure out what kind of lexer we actually have.
1122  recomputeCurLexerKind();
1123
1124  // Lex the next token. The header-name lexing rules are used at the start of
1125  // a pp-import.
1126  //
1127  // For now, we only support header-name imports in C++20 mode.
1128  // FIXME: Should we allow this in all language modes that support an import
1129  // declaration as an extension?
1130  if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1131    if (LexHeaderName(Result))
1132      return true;
1133  } else {
1134    Lex(Result);
1135  }
1136
1137  // Allocate a holding buffer for a sequence of tokens and introduce it into
1138  // the token stream.
1139  auto EnterTokens = [this](ArrayRef<Token> Toks) {
1140    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1141    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1142    EnterTokenStream(std::move(ToksCopy), Toks.size(),
1143                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1144  };
1145
1146  // Check for a header-name.
1147  SmallVector<Token, 32> Suffix;
1148  if (Result.is(tok::header_name)) {
1149    // Enter the header-name token into the token stream; a Lex action cannot
1150    // both return a token and cache tokens (doing so would corrupt the token
1151    // cache if the call to Lex comes from CachingLex / PeekAhead).
1152    Suffix.push_back(Result);
1153
1154    // Consume the pp-import-suffix and expand any macros in it now. We'll add
1155    // it back into the token stream later.
1156    CollectPpImportSuffix(Suffix);
1157    if (Suffix.back().isNot(tok::semi)) {
1158      // This is not a pp-import after all.
1159      EnterTokens(Suffix);
1160      return false;
1161    }
1162
1163    // C++2a [cpp.module]p1:
1164    //   The ';' preprocessing-token terminating a pp-import shall not have
1165    //   been produced by macro replacement.
1166    SourceLocation SemiLoc = Suffix.back().getLocation();
1167    if (SemiLoc.isMacroID())
1168      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1169
1170    // Reconstitute the import token.
1171    Token ImportTok;
1172    ImportTok.startToken();
1173    ImportTok.setKind(tok::kw_import);
1174    ImportTok.setLocation(ModuleImportLoc);
1175    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1176    ImportTok.setLength(6);
1177
1178    auto Action = HandleHeaderIncludeOrImport(
1179        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1180    switch (Action.Kind) {
1181    case ImportAction::None:
1182      break;
1183
1184    case ImportAction::ModuleBegin:
1185      // Let the parser know we're textually entering the module.
1186      Suffix.emplace_back();
1187      Suffix.back().startToken();
1188      Suffix.back().setKind(tok::annot_module_begin);
1189      Suffix.back().setLocation(SemiLoc);
1190      Suffix.back().setAnnotationEndLoc(SemiLoc);
1191      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1192      LLVM_FALLTHROUGH;
1193
1194    case ImportAction::ModuleImport:
1195    case ImportAction::SkippedModuleImport:
1196      // We chose to import (or textually enter) the file. Convert the
1197      // header-name token into a header unit annotation token.
1198      Suffix[0].setKind(tok::annot_header_unit);
1199      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1200      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1201      // FIXME: Call the moduleImport callback?
1202      break;
1203    }
1204
1205    EnterTokens(Suffix);
1206    return false;
1207  }
1208
1209  // The token sequence
1210  //
1211  //   import identifier (. identifier)*
1212  //
1213  // indicates a module import directive. We already saw the 'import'
1214  // contextual keyword, so now we're looking for the identifiers.
1215  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1216    // We expected to see an identifier here, and we did; continue handling
1217    // identifiers.
1218    ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
1219                                              Result.getLocation()));
1220    ModuleImportExpectsIdentifier = false;
1221    CurLexerKind = CLK_LexAfterModuleImport;
1222    return true;
1223  }
1224
1225  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1226  // see the next identifier. (We can also see a '[[' that begins an
1227  // attribute-specifier-seq here under the C++ Modules TS.)
1228  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1229    ModuleImportExpectsIdentifier = true;
1230    CurLexerKind = CLK_LexAfterModuleImport;
1231    return true;
1232  }
1233
1234  // If we didn't recognize a module name at all, this is not a (valid) import.
1235  if (ModuleImportPath.empty() || Result.is(tok::eof))
1236    return true;
1237
1238  // Consume the pp-import-suffix and expand any macros in it now, if we're not
1239  // at the semicolon already.
1240  SourceLocation SemiLoc = Result.getLocation();
1241  if (Result.isNot(tok::semi)) {
1242    Suffix.push_back(Result);
1243    CollectPpImportSuffix(Suffix);
1244    if (Suffix.back().isNot(tok::semi)) {
1245      // This is not an import after all.
1246      EnterTokens(Suffix);
1247      return false;
1248    }
1249    SemiLoc = Suffix.back().getLocation();
1250  }
1251
1252  // Under the Modules TS, the dot is just part of the module name, and not
1253  // a real hierarchy separator. Flatten such module names now.
1254  //
1255  // FIXME: Is this the right level to be performing this transformation?
1256  std::string FlatModuleName;
1257  if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
1258    for (auto &Piece : ModuleImportPath) {
1259      if (!FlatModuleName.empty())
1260        FlatModuleName += ".";
1261      FlatModuleName += Piece.first->getName();
1262    }
1263    SourceLocation FirstPathLoc = ModuleImportPath[0].second;
1264    ModuleImportPath.clear();
1265    ModuleImportPath.push_back(
1266        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
1267  }
1268
1269  Module *Imported = nullptr;
1270  if (getLangOpts().Modules) {
1271    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1272                                          ModuleImportPath,
1273                                          Module::Hidden,
1274                                          /*IsInclusionDirective=*/false);
1275    if (Imported)
1276      makeModuleVisible(Imported, SemiLoc);
1277  }
1278  if (Callbacks)
1279    Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
1280
1281  if (!Suffix.empty()) {
1282    EnterTokens(Suffix);
1283    return false;
1284  }
1285  return true;
1286}
1287
1288void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1289  CurSubmoduleState->VisibleModules.setVisible(
1290      M, Loc, [](Module *) {},
1291      [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1292        // FIXME: Include the path in the diagnostic.
1293        // FIXME: Include the import location for the conflicting module.
1294        Diag(ModuleImportLoc, diag::warn_module_conflict)
1295            << Path[0]->getFullModuleName()
1296            << Conflict->getFullModuleName()
1297            << Message;
1298      });
1299
1300  // Add this module to the imports list of the currently-built submodule.
1301  if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1302    BuildingSubmoduleStack.back().M->Imports.insert(M);
1303}
1304
1305bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1306                                          const char *DiagnosticTag,
1307                                          bool AllowMacroExpansion) {
1308  // We need at least one string literal.
1309  if (Result.isNot(tok::string_literal)) {
1310    Diag(Result, diag::err_expected_string_literal)
1311      << /*Source='in...'*/0 << DiagnosticTag;
1312    return false;
1313  }
1314
1315  // Lex string literal tokens, optionally with macro expansion.
1316  SmallVector<Token, 4> StrToks;
1317  do {
1318    StrToks.push_back(Result);
1319
1320    if (Result.hasUDSuffix())
1321      Diag(Result, diag::err_invalid_string_udl);
1322
1323    if (AllowMacroExpansion)
1324      Lex(Result);
1325    else
1326      LexUnexpandedToken(Result);
1327  } while (Result.is(tok::string_literal));
1328
1329  // Concatenate and parse the strings.
1330  StringLiteralParser Literal(StrToks, *this);
1331  assert(Literal.isAscii() && "Didn't allow wide strings in");
1332
1333  if (Literal.hadError)
1334    return false;
1335
1336  if (Literal.Pascal) {
1337    Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1338      << /*Source='in...'*/0 << DiagnosticTag;
1339    return false;
1340  }
1341
1342  String = Literal.GetString();
1343  return true;
1344}
1345
1346bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1347  assert(Tok.is(tok::numeric_constant));
1348  SmallString<8> IntegerBuffer;
1349  bool NumberInvalid = false;
1350  StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1351  if (NumberInvalid)
1352    return false;
1353  NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
1354  if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1355    return false;
1356  llvm::APInt APVal(64, 0);
1357  if (Literal.GetIntegerValue(APVal))
1358    return false;
1359  Lex(Tok);
1360  Value = APVal.getLimitedValue();
1361  return true;
1362}
1363
1364void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1365  assert(Handler && "NULL comment handler");
1366  assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
1367         "Comment handler already registered");
1368  CommentHandlers.push_back(Handler);
1369}
1370
1371void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1372  std::vector<CommentHandler *>::iterator Pos =
1373      llvm::find(CommentHandlers, Handler);
1374  assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1375  CommentHandlers.erase(Pos);
1376}
1377
1378bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1379  bool AnyPendingTokens = false;
1380  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1381       HEnd = CommentHandlers.end();
1382       H != HEnd; ++H) {
1383    if ((*H)->HandleComment(*this, Comment))
1384      AnyPendingTokens = true;
1385  }
1386  if (!AnyPendingTokens || getCommentRetentionState())
1387    return false;
1388  Lex(result);
1389  return true;
1390}
1391
1392ModuleLoader::~ModuleLoader() = default;
1393
1394CommentHandler::~CommentHandler() = default;
1395
1396CodeCompletionHandler::~CodeCompletionHandler() = default;
1397
1398void Preprocessor::createPreprocessingRecord() {
1399  if (Record)
1400    return;
1401
1402  Record = new PreprocessingRecord(getSourceManager());
1403  addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1404}
1405