1193326Sed//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file implements the Preprocessor interface. 11193326Sed// 12193326Sed//===----------------------------------------------------------------------===// 13193326Sed// 14193326Sed// Options to support: 15193326Sed// -H - Print the name of each header file used. 16193326Sed// -d[DNI] - Dump various things. 17193326Sed// -fworking-directory - #line's with preprocessor's working dir. 18193326Sed// -fpreprocessed 19193326Sed// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20193326Sed// -W* 21193326Sed// -w 22193326Sed// 23193326Sed// Messages to emit: 24193326Sed// "Multiple include guards may be useful for:\n" 25193326Sed// 26193326Sed//===----------------------------------------------------------------------===// 27193326Sed 28193326Sed#include "clang/Lex/Preprocessor.h" 29252723Sdim#include "clang/Lex/MacroArgs.h" 30252723Sdim#include "clang/Basic/FileManager.h" 31252723Sdim#include "clang/Basic/SourceManager.h" 32252723Sdim#include "clang/Basic/TargetInfo.h" 33252723Sdim#include "clang/Lex/CodeCompletionHandler.h" 34202379Srdivacky#include "clang/Lex/ExternalPreprocessorSource.h" 35193326Sed#include "clang/Lex/HeaderSearch.h" 36252723Sdim#include "clang/Lex/LexDiagnostic.h" 37252723Sdim#include "clang/Lex/LiteralSupport.h" 38193326Sed#include "clang/Lex/MacroInfo.h" 39252723Sdim#include "clang/Lex/ModuleLoader.h" 40193326Sed#include "clang/Lex/Pragma.h" 41205408Srdivacky#include "clang/Lex/PreprocessingRecord.h" 42252723Sdim#include "clang/Lex/PreprocessorOptions.h" 43193326Sed#include "clang/Lex/ScratchBuffer.h" 44193326Sed#include "llvm/ADT/APFloat.h" 45235633Sdim#include "llvm/ADT/SmallString.h" 46252723Sdim#include "llvm/ADT/STLExtras.h" 47252723Sdim#include "llvm/ADT/StringExtras.h" 48252723Sdim#include "llvm/Support/Capacity.h" 49252723Sdim#include "llvm/Support/ConvertUTF.h" 50193326Sed#include "llvm/Support/MemoryBuffer.h" 51198092Srdivacky#include "llvm/Support/raw_ostream.h" 52193326Sedusing namespace clang; 53193326Sed 54193326Sed//===----------------------------------------------------------------------===// 55202379SrdivackyExternalPreprocessorSource::~ExternalPreprocessorSource() { } 56193326Sed 57252723SdimPreprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 58245431Sdim DiagnosticsEngine &diags, LangOptions &opts, 59226890Sdim const TargetInfo *target, SourceManager &SM, 60226890Sdim HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 61252723Sdim IdentifierInfoLookup *IILookup, bool OwnsHeaders, 62252723Sdim bool DelayInitialization, bool IncrProcessing) 63252723Sdim : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target), 64252723Sdim FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers), 65252723Sdim TheModuleLoader(TheModuleLoader), ExternalSource(0), 66252723Sdim Identifiers(opts, IILookup), IncrementalProcessing(IncrProcessing), 67252723Sdim CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0), 68263509Sdim LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 69252723Sdim CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0), 70252723Sdim CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), 71252723Sdim MacroArgCache(0), Record(0), MIChainHead(0), MICache(0), 72252723Sdim DeserialMIChainHead(0) { 73226890Sdim OwnsHeaderSearch = OwnsHeaders; 74226890Sdim 75193326Sed ScratchBuf = new ScratchBuffer(SourceMgr); 76193326Sed CounterValue = 0; // __COUNTER__ starts at 0. 77226890Sdim 78193326Sed // Clear stats. 79193326Sed NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 80193326Sed NumIf = NumElse = NumEndif = 0; 81193326Sed NumEnteredSourceFiles = 0; 82193326Sed NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 83193326Sed NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 84198092Srdivacky MaxIncludeStackDepth = 0; 85193326Sed NumSkipped = 0; 86226890Sdim 87193326Sed // Default to discarding comments. 88193326Sed KeepComments = false; 89193326Sed KeepMacroComments = false; 90226890Sdim SuppressIncludeNotFoundError = false; 91226890Sdim 92193326Sed // Macro expansion is enabled. 93193326Sed DisableMacroExpansion = false; 94245431Sdim MacroExpansionInDirectivesOverride = false; 95193326Sed InMacroArgs = false; 96235633Sdim InMacroArgPreExpansion = false; 97193326Sed NumCachedTokenLexers = 0; 98245431Sdim PragmasEnabled = true; 99252723Sdim ParsingIfOrElifDirective = false; 100252723Sdim PreprocessedOutput = false; 101245431Sdim 102193326Sed CachedLexPos = 0; 103252723Sdim 104202379Srdivacky // We haven't read anything from the external source. 105202379Srdivacky ReadMacrosFromExternalSource = false; 106226890Sdim 107193326Sed // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 108193326Sed // This gets unpoisoned where it is allowed. 109193326Sed (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 110221345Sdim SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 111226890Sdim 112193326Sed // Initialize the pragma handlers. 113226890Sdim PragmaHandlers = new PragmaNamespace(StringRef()); 114193326Sed RegisterBuiltinPragmas(); 115226890Sdim 116193326Sed // Initialize builtin macros like __LINE__ and friends. 117193326Sed RegisterBuiltinMacros(); 118226890Sdim 119235633Sdim if(LangOpts.Borland) { 120221345Sdim Ident__exception_info = getIdentifierInfo("_exception_info"); 121221345Sdim Ident___exception_info = getIdentifierInfo("__exception_info"); 122221345Sdim Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 123221345Sdim Ident__exception_code = getIdentifierInfo("_exception_code"); 124221345Sdim Ident___exception_code = getIdentifierInfo("__exception_code"); 125221345Sdim Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 126221345Sdim Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 127221345Sdim Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 128221345Sdim Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 129221345Sdim } else { 130221345Sdim Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0; 131221345Sdim Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0; 132221345Sdim Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0; 133235633Sdim } 134245431Sdim 135245431Sdim if (!DelayInitialization) { 136245431Sdim assert(Target && "Must provide target information for PP initialization"); 137245431Sdim Initialize(*Target); 138245431Sdim } 139245431Sdim} 140245431Sdim 141245431SdimPreprocessor::~Preprocessor() { 142245431Sdim assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 143245431Sdim 144245431Sdim while (!IncludeMacroStack.empty()) { 145245431Sdim delete IncludeMacroStack.back().TheLexer; 146245431Sdim delete IncludeMacroStack.back().TheTokenLexer; 147245431Sdim IncludeMacroStack.pop_back(); 148245431Sdim } 149245431Sdim 150245431Sdim // Free any macro definitions. 151245431Sdim for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) 152245431Sdim I->MI.Destroy(); 153245431Sdim 154245431Sdim // Free any cached macro expanders. 155245431Sdim for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) 156245431Sdim delete TokenLexerCache[i]; 157245431Sdim 158252723Sdim for (DeserializedMacroInfoChain *I = DeserialMIChainHead ; I ; I = I->Next) 159252723Sdim I->MI.Destroy(); 160252723Sdim 161245431Sdim // Free any cached MacroArgs. 162245431Sdim for (MacroArgs *ArgList = MacroArgCache; ArgList; ) 163245431Sdim ArgList = ArgList->deallocate(); 164245431Sdim 165245431Sdim // Release pragma information. 166245431Sdim delete PragmaHandlers; 167245431Sdim 168245431Sdim // Delete the scratch buffer info. 169245431Sdim delete ScratchBuf; 170245431Sdim 171245431Sdim // Delete the header search info, if we own it. 172245431Sdim if (OwnsHeaderSearch) 173245431Sdim delete &HeaderInfo; 174245431Sdim 175245431Sdim delete Callbacks; 176245431Sdim} 177245431Sdim 178245431Sdimvoid Preprocessor::Initialize(const TargetInfo &Target) { 179245431Sdim assert((!this->Target || this->Target == &Target) && 180245431Sdim "Invalid override of target information"); 181245431Sdim this->Target = &Target; 182235633Sdim 183245431Sdim // Initialize information about built-ins. 184245431Sdim BuiltinInfo.InitializeTarget(Target); 185235633Sdim HeaderInfo.setTarget(Target); 186193326Sed} 187193326Sed 188193326Sedvoid Preprocessor::setPTHManager(PTHManager* pm) { 189193326Sed PTH.reset(pm); 190198398Srdivacky FileMgr.addStatCache(PTH->createStatCache()); 191193326Sed} 192193326Sed 193193326Sedvoid Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 194198092Srdivacky llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 195198092Srdivacky << getSpelling(Tok) << "'"; 196198092Srdivacky 197193326Sed if (!DumpFlags) return; 198198092Srdivacky 199198092Srdivacky llvm::errs() << "\t"; 200193326Sed if (Tok.isAtStartOfLine()) 201198092Srdivacky llvm::errs() << " [StartOfLine]"; 202193326Sed if (Tok.hasLeadingSpace()) 203198092Srdivacky llvm::errs() << " [LeadingSpace]"; 204193326Sed if (Tok.isExpandDisabled()) 205198092Srdivacky llvm::errs() << " [ExpandDisabled]"; 206193326Sed if (Tok.needsCleaning()) { 207193326Sed const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 208226890Sdim llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 209198092Srdivacky << "']"; 210193326Sed } 211198092Srdivacky 212198092Srdivacky llvm::errs() << "\tLoc=<"; 213193326Sed DumpLocation(Tok.getLocation()); 214198092Srdivacky llvm::errs() << ">"; 215193326Sed} 216193326Sed 217193326Sedvoid Preprocessor::DumpLocation(SourceLocation Loc) const { 218193326Sed Loc.dump(SourceMgr); 219193326Sed} 220193326Sed 221193326Sedvoid Preprocessor::DumpMacro(const MacroInfo &MI) const { 222198092Srdivacky llvm::errs() << "MACRO: "; 223193326Sed for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 224193326Sed DumpToken(MI.getReplacementToken(i)); 225198092Srdivacky llvm::errs() << " "; 226193326Sed } 227198092Srdivacky llvm::errs() << "\n"; 228193326Sed} 229193326Sed 230193326Sedvoid Preprocessor::PrintStats() { 231198092Srdivacky llvm::errs() << "\n*** Preprocessor Stats:\n"; 232198092Srdivacky llvm::errs() << NumDirectives << " directives found:\n"; 233198092Srdivacky llvm::errs() << " " << NumDefined << " #define.\n"; 234198092Srdivacky llvm::errs() << " " << NumUndefined << " #undef.\n"; 235198092Srdivacky llvm::errs() << " #include/#include_next/#import:\n"; 236198092Srdivacky llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 237198092Srdivacky llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 238198092Srdivacky llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 239198092Srdivacky llvm::errs() << " " << NumElse << " #else/#elif.\n"; 240198092Srdivacky llvm::errs() << " " << NumEndif << " #endif.\n"; 241198092Srdivacky llvm::errs() << " " << NumPragma << " #pragma.\n"; 242198092Srdivacky llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 243193326Sed 244198092Srdivacky llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 245193326Sed << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 246193326Sed << NumFastMacroExpanded << " on the fast path.\n"; 247198092Srdivacky llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 248193326Sed << " token paste (##) operations performed, " 249193326Sed << NumFastTokenPaste << " on the fast path.\n"; 250245431Sdim 251245431Sdim llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 252245431Sdim 253245431Sdim llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 254245431Sdim llvm::errs() << "\n Macro Expanded Tokens: " 255245431Sdim << llvm::capacity_in_bytes(MacroExpandedTokens); 256245431Sdim llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 257245431Sdim llvm::errs() << "\n Macros: " << llvm::capacity_in_bytes(Macros); 258245431Sdim llvm::errs() << "\n #pragma push_macro Info: " 259245431Sdim << llvm::capacity_in_bytes(PragmaPushMacroInfo); 260245431Sdim llvm::errs() << "\n Poison Reasons: " 261245431Sdim << llvm::capacity_in_bytes(PoisonReasons); 262245431Sdim llvm::errs() << "\n Comment Handlers: " 263245431Sdim << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 264193326Sed} 265193326Sed 266205219SrdivackyPreprocessor::macro_iterator 267205219SrdivackyPreprocessor::macro_begin(bool IncludeExternalMacros) const { 268205219Srdivacky if (IncludeExternalMacros && ExternalSource && 269202379Srdivacky !ReadMacrosFromExternalSource) { 270202379Srdivacky ReadMacrosFromExternalSource = true; 271202379Srdivacky ExternalSource->ReadDefinedMacros(); 272202379Srdivacky } 273205219Srdivacky 274205219Srdivacky return Macros.begin(); 275202379Srdivacky} 276202379Srdivacky 277224145Sdimsize_t Preprocessor::getTotalMemory() const { 278226890Sdim return BP.getTotalMemory() 279226890Sdim + llvm::capacity_in_bytes(MacroExpandedTokens) 280226890Sdim + Predefines.capacity() /* Predefines buffer. */ 281226890Sdim + llvm::capacity_in_bytes(Macros) 282226890Sdim + llvm::capacity_in_bytes(PragmaPushMacroInfo) 283226890Sdim + llvm::capacity_in_bytes(PoisonReasons) 284226890Sdim + llvm::capacity_in_bytes(CommentHandlers); 285224145Sdim} 286224145Sdim 287205219SrdivackyPreprocessor::macro_iterator 288205219SrdivackyPreprocessor::macro_end(bool IncludeExternalMacros) const { 289205219Srdivacky if (IncludeExternalMacros && ExternalSource && 290202379Srdivacky !ReadMacrosFromExternalSource) { 291202379Srdivacky ReadMacrosFromExternalSource = true; 292202379Srdivacky ExternalSource->ReadDefinedMacros(); 293202379Srdivacky } 294205219Srdivacky 295205219Srdivacky return Macros.end(); 296202379Srdivacky} 297202379Srdivacky 298245431Sdim/// \brief Compares macro tokens with a specified token value sequence. 299245431Sdimstatic bool MacroDefinitionEquals(const MacroInfo *MI, 300252723Sdim ArrayRef<TokenValue> Tokens) { 301245431Sdim return Tokens.size() == MI->getNumTokens() && 302245431Sdim std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 303245431Sdim} 304245431Sdim 305245431SdimStringRef Preprocessor::getLastMacroWithSpelling( 306245431Sdim SourceLocation Loc, 307245431Sdim ArrayRef<TokenValue> Tokens) const { 308245431Sdim SourceLocation BestLocation; 309245431Sdim StringRef BestSpelling; 310245431Sdim for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 311245431Sdim I != E; ++I) { 312252723Sdim if (!I->second->getMacroInfo()->isObjectLike()) 313245431Sdim continue; 314252723Sdim const MacroDirective::DefInfo 315252723Sdim Def = I->second->findDirectiveAtLoc(Loc, SourceMgr); 316252723Sdim if (!Def) 317245431Sdim continue; 318252723Sdim if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 319245431Sdim continue; 320252723Sdim SourceLocation Location = Def.getLocation(); 321245431Sdim // Choose the macro defined latest. 322245431Sdim if (BestLocation.isInvalid() || 323245431Sdim (Location.isValid() && 324245431Sdim SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 325245431Sdim BestLocation = Location; 326245431Sdim BestSpelling = I->first->getName(); 327245431Sdim } 328245431Sdim } 329245431Sdim return BestSpelling; 330245431Sdim} 331245431Sdim 332235633Sdimvoid Preprocessor::recomputeCurLexerKind() { 333235633Sdim if (CurLexer) 334235633Sdim CurLexerKind = CLK_Lexer; 335235633Sdim else if (CurPTHLexer) 336235633Sdim CurLexerKind = CLK_PTHLexer; 337235633Sdim else if (CurTokenLexer) 338235633Sdim CurLexerKind = CLK_TokenLexer; 339235633Sdim else 340235633Sdim CurLexerKind = CLK_CachingLexer; 341235633Sdim} 342235633Sdim 343205219Srdivackybool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 344226890Sdim unsigned CompleteLine, 345226890Sdim unsigned CompleteColumn) { 346226890Sdim assert(File); 347226890Sdim assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 348226890Sdim assert(!CodeCompletionFile && "Already set"); 349226890Sdim 350200583Srdivacky using llvm::MemoryBuffer; 351200583Srdivacky 352200583Srdivacky // Load the actual file's contents. 353205408Srdivacky bool Invalid = false; 354205408Srdivacky const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 355205408Srdivacky if (Invalid) 356200583Srdivacky return true; 357200583Srdivacky 358200583Srdivacky // Find the byte position of the truncation point. 359200583Srdivacky const char *Position = Buffer->getBufferStart(); 360226890Sdim for (unsigned Line = 1; Line < CompleteLine; ++Line) { 361200583Srdivacky for (; *Position; ++Position) { 362200583Srdivacky if (*Position != '\r' && *Position != '\n') 363200583Srdivacky continue; 364205219Srdivacky 365200583Srdivacky // Eat \r\n or \n\r as a single line. 366200583Srdivacky if ((Position[1] == '\r' || Position[1] == '\n') && 367200583Srdivacky Position[0] != Position[1]) 368200583Srdivacky ++Position; 369200583Srdivacky ++Position; 370200583Srdivacky break; 371200583Srdivacky } 372200583Srdivacky } 373205219Srdivacky 374226890Sdim Position += CompleteColumn - 1; 375205219Srdivacky 376226890Sdim // Insert '\0' at the code-completion point. 377200583Srdivacky if (Position < Buffer->getBufferEnd()) { 378226890Sdim CodeCompletionFile = File; 379226890Sdim CodeCompletionOffset = Position - Buffer->getBufferStart(); 380226890Sdim 381226890Sdim MemoryBuffer *NewBuffer = 382226890Sdim MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 383226890Sdim Buffer->getBufferIdentifier()); 384226890Sdim char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 385226890Sdim char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 386226890Sdim *NewPos = '\0'; 387226890Sdim std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 388226890Sdim SourceMgr.overrideFileContents(File, NewBuffer); 389200583Srdivacky } 390200583Srdivacky 391200583Srdivacky return false; 392200583Srdivacky} 393200583Srdivacky 394212904Sdimvoid Preprocessor::CodeCompleteNaturalLanguage() { 395212904Sdim if (CodeComplete) 396212904Sdim CodeComplete->CodeCompleteNaturalLanguage(); 397226890Sdim setCodeCompletionReached(); 398212904Sdim} 399212904Sdim 400193326Sed/// getSpelling - This method is used to get the spelling of a token into a 401204643Srdivacky/// SmallVector. Note that the returned StringRef may not point to the 402204643Srdivacky/// supplied buffer if a copy can be avoided. 403226890SdimStringRef Preprocessor::getSpelling(const Token &Tok, 404226890Sdim SmallVectorImpl<char> &Buffer, 405205219Srdivacky bool *Invalid) const { 406218893Sdim // NOTE: this has to be checked *before* testing for an IdentifierInfo. 407252723Sdim if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 408218893Sdim // Try the fast path. 409218893Sdim if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 410218893Sdim return II->getName(); 411218893Sdim } 412204643Srdivacky 413204643Srdivacky // Resize the buffer if we need to copy into it. 414204643Srdivacky if (Tok.needsCleaning()) 415204643Srdivacky Buffer.resize(Tok.getLength()); 416204643Srdivacky 417204643Srdivacky const char *Ptr = Buffer.data(); 418205219Srdivacky unsigned Len = getSpelling(Tok, Ptr, Invalid); 419226890Sdim return StringRef(Ptr, Len); 420204643Srdivacky} 421204643Srdivacky 422193326Sed/// CreateString - Plop the specified string into a scratch buffer and return a 423193326Sed/// location for it. If specified, the source location provides a source 424193326Sed/// location for the token. 425245431Sdimvoid Preprocessor::CreateString(StringRef Str, Token &Tok, 426226890Sdim SourceLocation ExpansionLocStart, 427226890Sdim SourceLocation ExpansionLocEnd) { 428245431Sdim Tok.setLength(Str.size()); 429198092Srdivacky 430193326Sed const char *DestPtr; 431245431Sdim SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 432198092Srdivacky 433226890Sdim if (ExpansionLocStart.isValid()) 434226890Sdim Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 435245431Sdim ExpansionLocEnd, Str.size()); 436193326Sed Tok.setLocation(Loc); 437198092Srdivacky 438218893Sdim // If this is a raw identifier or a literal token, set the pointer data. 439218893Sdim if (Tok.is(tok::raw_identifier)) 440218893Sdim Tok.setRawIdentifierData(DestPtr); 441218893Sdim else if (Tok.isLiteral()) 442193326Sed Tok.setLiteralData(DestPtr); 443193326Sed} 444193326Sed 445235633SdimModule *Preprocessor::getCurrentModule() { 446235633Sdim if (getLangOpts().CurrentModule.empty()) 447235633Sdim return 0; 448235633Sdim 449235633Sdim return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 450235633Sdim} 451193326Sed 452193326Sed//===----------------------------------------------------------------------===// 453193326Sed// Preprocessor Initialization Methods 454193326Sed//===----------------------------------------------------------------------===// 455193326Sed 456193326Sed 457193326Sed/// EnterMainSourceFile - Enter the specified FileID as the main source file, 458193326Sed/// which implicitly adds the builtin defines etc. 459207619Srdivackyvoid Preprocessor::EnterMainSourceFile() { 460193326Sed // We do not allow the preprocessor to reenter the main file. Doing so will 461193326Sed // cause FileID's to accumulate information from both runs (e.g. #line 462193326Sed // information) and predefined macros aren't guaranteed to be set properly. 463193326Sed assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 464193326Sed FileID MainFileID = SourceMgr.getMainFileID(); 465198092Srdivacky 466235633Sdim // If MainFileID is loaded it means we loaded an AST file, no need to enter 467235633Sdim // a main file. 468235633Sdim if (!SourceMgr.isLoadedFileID(MainFileID)) { 469235633Sdim // Enter the main file source buffer. 470235633Sdim EnterSourceFile(MainFileID, 0, SourceLocation()); 471212904Sdim 472235633Sdim // If we've been asked to skip bytes in the main file (e.g., as part of a 473235633Sdim // precompiled preamble), do so now. 474235633Sdim if (SkipMainFilePreamble.first > 0) 475235633Sdim CurLexer->SkipBytes(SkipMainFilePreamble.first, 476235633Sdim SkipMainFilePreamble.second); 477235633Sdim 478235633Sdim // Tell the header info that the main file was entered. If the file is later 479235633Sdim // #imported, it won't be re-entered. 480235633Sdim if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 481235633Sdim HeaderInfo.IncrementIncludeCount(FE); 482235633Sdim } 483198092Srdivacky 484201361Srdivacky // Preprocess Predefines to populate the initial preprocessor state. 485198092Srdivacky llvm::MemoryBuffer *SB = 486206275Srdivacky llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 487212904Sdim assert(SB && "Cannot create predefined source buffer"); 488193326Sed FileID FID = SourceMgr.createFileIDForMemBuffer(SB); 489193326Sed assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 490252723Sdim setPredefinesFileID(FID); 491198092Srdivacky 492193326Sed // Start parsing the predefines. 493207619Srdivacky EnterSourceFile(FID, 0, SourceLocation()); 494193326Sed} 495193326Sed 496206084Srdivackyvoid Preprocessor::EndSourceFile() { 497206084Srdivacky // Notify the client that we reached the end of the source file. 498206084Srdivacky if (Callbacks) 499206084Srdivacky Callbacks->EndOfMainFile(); 500206084Srdivacky} 501193326Sed 502193326Sed//===----------------------------------------------------------------------===// 503193326Sed// Lexer Event Handling. 504193326Sed//===----------------------------------------------------------------------===// 505193326Sed 506252723Sdimstatic void appendCodePoint(unsigned Codepoint, 507252723Sdim llvm::SmallVectorImpl<char> &Str) { 508252723Sdim char ResultBuf[4]; 509252723Sdim char *ResultPtr = ResultBuf; 510252723Sdim bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr); 511252723Sdim (void)Res; 512252723Sdim assert(Res && "Unexpected conversion failure"); 513252723Sdim Str.append(ResultBuf, ResultPtr); 514252723Sdim} 515252723Sdim 516252723Sdimstatic void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) { 517252723Sdim for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) { 518252723Sdim if (*I != '\\') { 519252723Sdim Buf.push_back(*I); 520252723Sdim continue; 521252723Sdim } 522252723Sdim 523252723Sdim ++I; 524252723Sdim assert(*I == 'u' || *I == 'U'); 525252723Sdim 526252723Sdim unsigned NumHexDigits; 527252723Sdim if (*I == 'u') 528252723Sdim NumHexDigits = 4; 529252723Sdim else 530252723Sdim NumHexDigits = 8; 531252723Sdim 532252723Sdim assert(I + NumHexDigits <= E); 533252723Sdim 534252723Sdim uint32_t CodePoint = 0; 535252723Sdim for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { 536252723Sdim unsigned Value = llvm::hexDigitValue(*I); 537252723Sdim assert(Value != -1U); 538252723Sdim 539252723Sdim CodePoint <<= 4; 540252723Sdim CodePoint += Value; 541252723Sdim } 542252723Sdim 543252723Sdim appendCodePoint(CodePoint, Buf); 544252723Sdim --I; 545252723Sdim } 546252723Sdim} 547252723Sdim 548218893Sdim/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 549218893Sdim/// identifier information for the token and install it into the token, 550218893Sdim/// updating the token kind accordingly. 551218893SdimIdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 552218893Sdim assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!"); 553198092Srdivacky 554193326Sed // Look up this token, see if it is a macro, or if it is a language keyword. 555193326Sed IdentifierInfo *II; 556252723Sdim if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 557193326Sed // No cleaning needed, just use the characters from the lexed buffer. 558226890Sdim II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(), 559252723Sdim Identifier.getLength())); 560193326Sed } else { 561193326Sed // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 562235633Sdim SmallString<64> IdentifierBuffer; 563226890Sdim StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 564252723Sdim 565252723Sdim if (Identifier.hasUCN()) { 566252723Sdim SmallString<64> UCNIdentifierBuffer; 567252723Sdim expandUCNs(UCNIdentifierBuffer, CleanedStr); 568252723Sdim II = getIdentifierInfo(UCNIdentifierBuffer); 569252723Sdim } else { 570252723Sdim II = getIdentifierInfo(CleanedStr); 571252723Sdim } 572193326Sed } 573218893Sdim 574218893Sdim // Update the token info (identifier info and appropriate token kind). 575193326Sed Identifier.setIdentifierInfo(II); 576218893Sdim Identifier.setKind(II->getTokenID()); 577218893Sdim 578193326Sed return II; 579193326Sed} 580193326Sed 581221345Sdimvoid Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 582221345Sdim PoisonReasons[II] = DiagID; 583221345Sdim} 584193326Sed 585221345Sdimvoid Preprocessor::PoisonSEHIdentifiers(bool Poison) { 586221345Sdim assert(Ident__exception_code && Ident__exception_info); 587221345Sdim assert(Ident___exception_code && Ident___exception_info); 588221345Sdim Ident__exception_code->setIsPoisoned(Poison); 589221345Sdim Ident___exception_code->setIsPoisoned(Poison); 590221345Sdim Ident_GetExceptionCode->setIsPoisoned(Poison); 591221345Sdim Ident__exception_info->setIsPoisoned(Poison); 592221345Sdim Ident___exception_info->setIsPoisoned(Poison); 593221345Sdim Ident_GetExceptionInfo->setIsPoisoned(Poison); 594221345Sdim Ident__abnormal_termination->setIsPoisoned(Poison); 595221345Sdim Ident___abnormal_termination->setIsPoisoned(Poison); 596221345Sdim Ident_AbnormalTermination->setIsPoisoned(Poison); 597221345Sdim} 598221345Sdim 599221345Sdimvoid Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 600221345Sdim assert(Identifier.getIdentifierInfo() && 601221345Sdim "Can't handle identifiers without identifier info!"); 602221345Sdim llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 603221345Sdim PoisonReasons.find(Identifier.getIdentifierInfo()); 604221345Sdim if(it == PoisonReasons.end()) 605221345Sdim Diag(Identifier, diag::err_pp_used_poisoned_id); 606221345Sdim else 607221345Sdim Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 608221345Sdim} 609221345Sdim 610193326Sed/// HandleIdentifier - This callback is invoked when the lexer reads an 611193326Sed/// identifier. This callback looks up the identifier in the map and/or 612193326Sed/// potentially macro expands it or turns it into a named token (like 'for'). 613193326Sed/// 614193326Sed/// Note that callers of this method are guarded by checking the 615193326Sed/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 616193326Sed/// IdentifierInfo methods that compute these properties will need to change to 617193326Sed/// match. 618263509Sdimbool Preprocessor::HandleIdentifier(Token &Identifier) { 619193326Sed assert(Identifier.getIdentifierInfo() && 620193326Sed "Can't handle identifiers without identifier info!"); 621198092Srdivacky 622193326Sed IdentifierInfo &II = *Identifier.getIdentifierInfo(); 623193326Sed 624235633Sdim // If the information about this identifier is out of date, update it from 625235633Sdim // the external source. 626245431Sdim // We have to treat __VA_ARGS__ in a special way, since it gets 627245431Sdim // serialized with isPoisoned = true, but our preprocessor may have 628245431Sdim // unpoisoned it if we're defining a C99 macro. 629235633Sdim if (II.isOutOfDate()) { 630245431Sdim bool CurrentIsPoisoned = false; 631245431Sdim if (&II == Ident__VA_ARGS__) 632245431Sdim CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 633245431Sdim 634235633Sdim ExternalSource->updateOutOfDateIdentifier(II); 635235633Sdim Identifier.setKind(II.getTokenID()); 636245431Sdim 637245431Sdim if (&II == Ident__VA_ARGS__) 638245431Sdim II.setIsPoisoned(CurrentIsPoisoned); 639235633Sdim } 640235633Sdim 641193326Sed // If this identifier was poisoned, and if it was not produced from a macro 642193326Sed // expansion, emit an error. 643193326Sed if (II.isPoisoned() && CurPPLexer) { 644221345Sdim HandlePoisonedIdentifier(Identifier); 645193326Sed } 646198092Srdivacky 647193326Sed // If this is a macro to be expanded, do it. 648252723Sdim if (MacroDirective *MD = getMacroDirective(&II)) { 649252723Sdim MacroInfo *MI = MD->getMacroInfo(); 650235633Sdim if (!DisableMacroExpansion) { 651252723Sdim if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 652263509Sdim // C99 6.10.3p10: If the preprocessing token immediately after the 653263509Sdim // macro name isn't a '(', this macro should not be expanded. 654263509Sdim if (!MI->isFunctionLike() || isNextPPTokenLParen()) 655263509Sdim return HandleMacroExpandedIdentifier(Identifier, MD); 656193326Sed } else { 657193326Sed // C99 6.10.3.4p2 says that a disabled macro may never again be 658193326Sed // expanded, even if it's in a context where it could be expanded in the 659193326Sed // future. 660193326Sed Identifier.setFlag(Token::DisableExpand); 661252723Sdim if (MI->isObjectLike() || isNextPPTokenLParen()) 662252723Sdim Diag(Identifier, diag::pp_disabled_macro_expansion); 663193326Sed } 664193326Sed } 665193326Sed } 666193326Sed 667226890Sdim // If this identifier is a keyword in C++11, produce a warning. Don't warn if 668226890Sdim // we're not considering macro expansion, since this identifier might be the 669226890Sdim // name of a macro. 670226890Sdim // FIXME: This warning is disabled in cases where it shouldn't be, like 671226890Sdim // "#define constexpr constexpr", "int constexpr;" 672226890Sdim if (II.isCXX11CompatKeyword() & !DisableMacroExpansion) { 673226890Sdim Diag(Identifier, diag::warn_cxx11_keyword) << II.getName(); 674226890Sdim // Don't diagnose this keyword again in this translation unit. 675226890Sdim II.setIsCXX11CompatKeyword(false); 676226890Sdim } 677226890Sdim 678193326Sed // C++ 2.11p2: If this is an alternative representation of a C++ operator, 679193326Sed // then we act as if it is the actual operator and not the textual 680193326Sed // representation of it. 681193326Sed if (II.isCPlusPlusOperatorKeyword()) 682193326Sed Identifier.setIdentifierInfo(0); 683193326Sed 684193326Sed // If this is an extension token, diagnose its use. 685193326Sed // We avoid diagnosing tokens that originate from macro definitions. 686193326Sed // FIXME: This warning is disabled in cases where it shouldn't be, 687193326Sed // like "#define TY typeof", "TY(1) x". 688193326Sed if (II.isExtensionToken() && !DisableMacroExpansion) 689193326Sed Diag(Identifier, diag::ext_token_used); 690226890Sdim 691263509Sdim // If this is the 'import' contextual keyword following an '@', note 692235633Sdim // that the next token indicates a module name. 693235633Sdim // 694252723Sdim // Note that we do not treat 'import' as a contextual 695235633Sdim // keyword when we're in a caching lexer, because caching lexers only get 696235633Sdim // used in contexts where import declarations are disallowed. 697263509Sdim if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 698263509Sdim !DisableMacroExpansion && getLangOpts().Modules && 699263509Sdim CurLexerKind != CLK_CachingLexer) { 700226890Sdim ModuleImportLoc = Identifier.getLocation(); 701235633Sdim ModuleImportPath.clear(); 702235633Sdim ModuleImportExpectsIdentifier = true; 703226890Sdim CurLexerKind = CLK_LexAfterModuleImport; 704226890Sdim } 705263509Sdim return true; 706193326Sed} 707195341Sed 708263509Sdimvoid Preprocessor::Lex(Token &Result) { 709263509Sdim // We loop here until a lex function retuns a token; this avoids recursion. 710263509Sdim bool ReturnedToken; 711263509Sdim do { 712263509Sdim switch (CurLexerKind) { 713263509Sdim case CLK_Lexer: 714263509Sdim ReturnedToken = CurLexer->Lex(Result); 715263509Sdim break; 716263509Sdim case CLK_PTHLexer: 717263509Sdim ReturnedToken = CurPTHLexer->Lex(Result); 718263509Sdim break; 719263509Sdim case CLK_TokenLexer: 720263509Sdim ReturnedToken = CurTokenLexer->Lex(Result); 721263509Sdim break; 722263509Sdim case CLK_CachingLexer: 723263509Sdim CachingLex(Result); 724263509Sdim ReturnedToken = true; 725263509Sdim break; 726263509Sdim case CLK_LexAfterModuleImport: 727263509Sdim LexAfterModuleImport(Result); 728263509Sdim ReturnedToken = true; 729263509Sdim break; 730263509Sdim } 731263509Sdim } while (!ReturnedToken); 732263509Sdim 733263509Sdim LastTokenWasAt = Result.is(tok::at); 734263509Sdim} 735263509Sdim 736263509Sdim 737235633Sdim/// \brief Lex a token following the 'import' contextual keyword. 738235633Sdim/// 739226890Sdimvoid Preprocessor::LexAfterModuleImport(Token &Result) { 740226890Sdim // Figure out what kind of lexer we actually have. 741235633Sdim recomputeCurLexerKind(); 742226890Sdim 743226890Sdim // Lex the next token. 744226890Sdim Lex(Result); 745226890Sdim 746226890Sdim // The token sequence 747226890Sdim // 748235633Sdim // import identifier (. identifier)* 749226890Sdim // 750235633Sdim // indicates a module import directive. We already saw the 'import' 751235633Sdim // contextual keyword, so now we're looking for the identifiers. 752235633Sdim if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 753235633Sdim // We expected to see an identifier here, and we did; continue handling 754235633Sdim // identifiers. 755235633Sdim ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 756235633Sdim Result.getLocation())); 757235633Sdim ModuleImportExpectsIdentifier = false; 758235633Sdim CurLexerKind = CLK_LexAfterModuleImport; 759226890Sdim return; 760235633Sdim } 761226890Sdim 762235633Sdim // If we're expecting a '.' or a ';', and we got a '.', then wait until we 763235633Sdim // see the next identifier. 764235633Sdim if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 765235633Sdim ModuleImportExpectsIdentifier = true; 766235633Sdim CurLexerKind = CLK_LexAfterModuleImport; 767235633Sdim return; 768235633Sdim } 769235633Sdim 770235633Sdim // If we have a non-empty module path, load the named module. 771263509Sdim if (!ModuleImportPath.empty() && getLangOpts().Modules) { 772245431Sdim Module *Imported = TheModuleLoader.loadModule(ModuleImportLoc, 773245431Sdim ModuleImportPath, 774245431Sdim Module::MacrosVisible, 775245431Sdim /*IsIncludeDirective=*/false); 776245431Sdim if (Callbacks) 777245431Sdim Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 778245431Sdim } 779226890Sdim} 780226890Sdim 781252723Sdimbool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 782252723Sdim const char *DiagnosticTag, 783252723Sdim bool AllowMacroExpansion) { 784252723Sdim // We need at least one string literal. 785252723Sdim if (Result.isNot(tok::string_literal)) { 786252723Sdim Diag(Result, diag::err_expected_string_literal) 787252723Sdim << /*Source='in...'*/0 << DiagnosticTag; 788252723Sdim return false; 789252723Sdim } 790252723Sdim 791252723Sdim // Lex string literal tokens, optionally with macro expansion. 792252723Sdim SmallVector<Token, 4> StrToks; 793252723Sdim do { 794252723Sdim StrToks.push_back(Result); 795252723Sdim 796252723Sdim if (Result.hasUDSuffix()) 797252723Sdim Diag(Result, diag::err_invalid_string_udl); 798252723Sdim 799252723Sdim if (AllowMacroExpansion) 800252723Sdim Lex(Result); 801252723Sdim else 802252723Sdim LexUnexpandedToken(Result); 803252723Sdim } while (Result.is(tok::string_literal)); 804252723Sdim 805252723Sdim // Concatenate and parse the strings. 806252723Sdim StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); 807252723Sdim assert(Literal.isAscii() && "Didn't allow wide strings in"); 808252723Sdim 809252723Sdim if (Literal.hadError) 810252723Sdim return false; 811252723Sdim 812252723Sdim if (Literal.Pascal) { 813252723Sdim Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 814252723Sdim << /*Source='in...'*/0 << DiagnosticTag; 815252723Sdim return false; 816252723Sdim } 817252723Sdim 818252723Sdim String = Literal.GetString(); 819252723Sdim return true; 820252723Sdim} 821252723Sdim 822245431Sdimvoid Preprocessor::addCommentHandler(CommentHandler *Handler) { 823195341Sed assert(Handler && "NULL comment handler"); 824195341Sed assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 825195341Sed CommentHandlers.end() && "Comment handler already registered"); 826195341Sed CommentHandlers.push_back(Handler); 827195341Sed} 828195341Sed 829245431Sdimvoid Preprocessor::removeCommentHandler(CommentHandler *Handler) { 830195341Sed std::vector<CommentHandler *>::iterator Pos 831195341Sed = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 832195341Sed assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 833195341Sed CommentHandlers.erase(Pos); 834195341Sed} 835195341Sed 836202879Srdivackybool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 837202879Srdivacky bool AnyPendingTokens = false; 838195341Sed for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 839195341Sed HEnd = CommentHandlers.end(); 840202879Srdivacky H != HEnd; ++H) { 841202879Srdivacky if ((*H)->HandleComment(*this, Comment)) 842202879Srdivacky AnyPendingTokens = true; 843202879Srdivacky } 844202879Srdivacky if (!AnyPendingTokens || getCommentRetentionState()) 845202879Srdivacky return false; 846202879Srdivacky Lex(result); 847202879Srdivacky return true; 848195341Sed} 849195341Sed 850226890SdimModuleLoader::~ModuleLoader() { } 851226890Sdim 852195341SedCommentHandler::~CommentHandler() { } 853205408Srdivacky 854212904SdimCodeCompletionHandler::~CodeCompletionHandler() { } 855212904Sdim 856252723Sdimvoid Preprocessor::createPreprocessingRecord() { 857205408Srdivacky if (Record) 858205408Srdivacky return; 859205408Srdivacky 860252723Sdim Record = new PreprocessingRecord(getSourceManager()); 861205408Srdivacky addPPCallbacks(Record); 862205408Srdivacky} 863