1193326Sed//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file implements the TokenLexer interface. 11193326Sed// 12193326Sed//===----------------------------------------------------------------------===// 13193326Sed 14193326Sed#include "clang/Lex/TokenLexer.h" 15252723Sdim#include "clang/Lex/MacroArgs.h" 16252723Sdim#include "clang/Basic/SourceManager.h" 17252723Sdim#include "clang/Lex/LexDiagnostic.h" 18193326Sed#include "clang/Lex/MacroInfo.h" 19193326Sed#include "clang/Lex/Preprocessor.h" 20235633Sdim#include "llvm/ADT/SmallString.h" 21193326Sedusing namespace clang; 22193326Sed 23193326Sed 24193326Sed/// Create a TokenLexer for the specified macro with the specified actual 25193326Sed/// arguments. Note that this ctor takes ownership of the ActualArgs pointer. 26245431Sdimvoid TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, 27245431Sdim MacroArgs *Actuals) { 28193326Sed // If the client is reusing a TokenLexer, make sure to free any memory 29193326Sed // associated with it. 30193326Sed destroy(); 31198092Srdivacky 32245431Sdim Macro = MI; 33193326Sed ActualArgs = Actuals; 34193326Sed CurToken = 0; 35198092Srdivacky 36224145Sdim ExpandLocStart = Tok.getLocation(); 37224145Sdim ExpandLocEnd = ELEnd; 38193326Sed AtStartOfLine = Tok.isAtStartOfLine(); 39193326Sed HasLeadingSpace = Tok.hasLeadingSpace(); 40193326Sed Tokens = &*Macro->tokens_begin(); 41193326Sed OwnsTokens = false; 42193326Sed DisableMacroExpansion = false; 43193326Sed NumTokens = Macro->tokens_end()-Macro->tokens_begin(); 44224145Sdim MacroExpansionStart = SourceLocation(); 45193326Sed 46224145Sdim SourceManager &SM = PP.getSourceManager(); 47226890Sdim MacroStartSLocOffset = SM.getNextLocalOffset(); 48224145Sdim 49224145Sdim if (NumTokens > 0) { 50224145Sdim assert(Tokens[0].getLocation().isValid()); 51224145Sdim assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) && 52224145Sdim "Macro defined in macro?"); 53224145Sdim assert(ExpandLocStart.isValid()); 54224145Sdim 55224145Sdim // Reserve a source location entry chunk for the length of the macro 56224145Sdim // definition. Tokens that get lexed directly from the definition will 57224145Sdim // have their locations pointing inside this chunk. This is to avoid 58224145Sdim // creating separate source location entries for each token. 59226890Sdim MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); 60226890Sdim MacroDefLength = Macro->getDefinitionLength(SM); 61226890Sdim MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, 62226890Sdim ExpandLocStart, 63226890Sdim ExpandLocEnd, 64226890Sdim MacroDefLength); 65224145Sdim } 66224145Sdim 67193326Sed // If this is a function-like macro, expand the arguments and change 68193326Sed // Tokens to point to the expanded tokens. 69193326Sed if (Macro->isFunctionLike() && Macro->getNumArgs()) 70193326Sed ExpandFunctionArguments(); 71198092Srdivacky 72193326Sed // Mark the macro as currently disabled, so that it is not recursively 73193326Sed // expanded. The macro must be disabled only after argument pre-expansion of 74193326Sed // function-like macro arguments occurs. 75193326Sed Macro->DisableMacro(); 76193326Sed} 77193326Sed 78193326Sed 79193326Sed 80193326Sed/// Create a TokenLexer for the specified token stream. This does not 81193326Sed/// take ownership of the specified token vector. 82193326Sedvoid TokenLexer::Init(const Token *TokArray, unsigned NumToks, 83193326Sed bool disableMacroExpansion, bool ownsTokens) { 84193326Sed // If the client is reusing a TokenLexer, make sure to free any memory 85193326Sed // associated with it. 86193326Sed destroy(); 87198092Srdivacky 88193326Sed Macro = 0; 89193326Sed ActualArgs = 0; 90193326Sed Tokens = TokArray; 91193326Sed OwnsTokens = ownsTokens; 92193326Sed DisableMacroExpansion = disableMacroExpansion; 93193326Sed NumTokens = NumToks; 94193326Sed CurToken = 0; 95224145Sdim ExpandLocStart = ExpandLocEnd = SourceLocation(); 96193326Sed AtStartOfLine = false; 97193326Sed HasLeadingSpace = false; 98224145Sdim MacroExpansionStart = SourceLocation(); 99198092Srdivacky 100193326Sed // Set HasLeadingSpace/AtStartOfLine so that the first token will be 101193326Sed // returned unmodified. 102193326Sed if (NumToks != 0) { 103193326Sed AtStartOfLine = TokArray[0].isAtStartOfLine(); 104193326Sed HasLeadingSpace = TokArray[0].hasLeadingSpace(); 105193326Sed } 106193326Sed} 107193326Sed 108193326Sed 109193326Sedvoid TokenLexer::destroy() { 110193326Sed // If this was a function-like macro that actually uses its arguments, delete 111193326Sed // the expanded tokens. 112193326Sed if (OwnsTokens) { 113193326Sed delete [] Tokens; 114193326Sed Tokens = 0; 115193326Sed OwnsTokens = false; 116193326Sed } 117198092Srdivacky 118193326Sed // TokenLexer owns its formal arguments. 119200583Srdivacky if (ActualArgs) ActualArgs->destroy(PP); 120193326Sed} 121193326Sed 122245431Sdim/// Remove comma ahead of __VA_ARGS__, if present, according to compiler dialect 123245431Sdim/// settings. Returns true if the comma is removed. 124263509Sdimstatic bool MaybeRemoveCommaBeforeVaArgs(SmallVectorImpl<Token> &ResultToks, 125245431Sdim bool &NextTokGetsSpace, 126245431Sdim bool HasPasteOperator, 127245431Sdim MacroInfo *Macro, unsigned MacroArgNo, 128245431Sdim Preprocessor &PP) { 129245431Sdim // Is the macro argument __VA_ARGS__? 130245431Sdim if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1) 131245431Sdim return false; 132245431Sdim 133245431Sdim // In Microsoft-compatibility mode, a comma is removed in the expansion 134245431Sdim // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is 135245431Sdim // not supported by gcc. 136245431Sdim if (!HasPasteOperator && !PP.getLangOpts().MicrosoftMode) 137245431Sdim return false; 138245431Sdim 139245431Sdim // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if 140245431Sdim // __VA_ARGS__ is empty, but not in strict C99 mode where there are no 141245431Sdim // named arguments, where it remains. In all other modes, including C99 142245431Sdim // with GNU extensions, it is removed regardless of named arguments. 143245431Sdim // Microsoft also appears to support this extension, unofficially. 144245431Sdim if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode 145245431Sdim && Macro->getNumArgs() < 2) 146245431Sdim return false; 147245431Sdim 148245431Sdim // Is a comma available to be removed? 149245431Sdim if (ResultToks.empty() || !ResultToks.back().is(tok::comma)) 150245431Sdim return false; 151245431Sdim 152245431Sdim // Issue an extension diagnostic for the paste operator. 153245431Sdim if (HasPasteOperator) 154245431Sdim PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); 155245431Sdim 156245431Sdim // Remove the comma. 157245431Sdim ResultToks.pop_back(); 158245431Sdim 159245431Sdim // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"), 160245431Sdim // then removal of the comma should produce a placemarker token (in C99 161245431Sdim // terms) which we model by popping off the previous ##, giving us a plain 162245431Sdim // "X" when __VA_ARGS__ is empty. 163245431Sdim if (!ResultToks.empty() && ResultToks.back().is(tok::hashhash)) 164245431Sdim ResultToks.pop_back(); 165245431Sdim 166245431Sdim // Never add a space, even if the comma, ##, or arg had a space. 167245431Sdim NextTokGetsSpace = false; 168245431Sdim return true; 169245431Sdim} 170245431Sdim 171193326Sed/// Expand the arguments of a function-like macro so that we can quickly 172193326Sed/// return preexpanded tokens from Tokens. 173193326Sedvoid TokenLexer::ExpandFunctionArguments() { 174224145Sdim 175226890Sdim SmallVector<Token, 128> ResultToks; 176198092Srdivacky 177193326Sed // Loop through 'Tokens', expanding them into ResultToks. Keep 178193326Sed // track of whether we change anything. If not, no need to keep them. If so, 179193326Sed // we install the newly expanded sequence as the new 'Tokens' list. 180193326Sed bool MadeChange = false; 181198092Srdivacky 182193326Sed // NextTokGetsSpace - When this is true, the next token appended to the 183193326Sed // output list will get a leading space, regardless of whether it had one to 184193326Sed // begin with or not. This is used for placemarker support. 185193326Sed bool NextTokGetsSpace = false; 186198092Srdivacky 187193326Sed for (unsigned i = 0, e = NumTokens; i != e; ++i) { 188193326Sed // If we found the stringify operator, get the argument stringified. The 189193326Sed // preprocessor already verified that the following token is a macro name 190193326Sed // when the #define was parsed. 191193326Sed const Token &CurTok = Tokens[i]; 192193326Sed if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) { 193193326Sed int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); 194193326Sed assert(ArgNo != -1 && "Token following # is not an argument?"); 195198092Srdivacky 196226890Sdim SourceLocation ExpansionLocStart = 197226890Sdim getExpansionLocForMacroDefLoc(CurTok.getLocation()); 198226890Sdim SourceLocation ExpansionLocEnd = 199226890Sdim getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation()); 200224145Sdim 201193326Sed Token Res; 202193326Sed if (CurTok.is(tok::hash)) // Stringify 203226890Sdim Res = ActualArgs->getStringifiedArgument(ArgNo, PP, 204226890Sdim ExpansionLocStart, 205226890Sdim ExpansionLocEnd); 206193326Sed else { 207193326Sed // 'charify': don't bother caching these. 208193326Sed Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), 209226890Sdim PP, true, 210226890Sdim ExpansionLocStart, 211226890Sdim ExpansionLocEnd); 212193326Sed } 213198092Srdivacky 214193326Sed // The stringified/charified string leading space flag gets set to match 215193326Sed // the #/#@ operator. 216193326Sed if (CurTok.hasLeadingSpace() || NextTokGetsSpace) 217193326Sed Res.setFlag(Token::LeadingSpace); 218198092Srdivacky 219193326Sed ResultToks.push_back(Res); 220193326Sed MadeChange = true; 221193326Sed ++i; // Skip arg name. 222193326Sed NextTokGetsSpace = false; 223193326Sed continue; 224193326Sed } 225198092Srdivacky 226193326Sed // Otherwise, if this is not an argument token, just add the token to the 227193326Sed // output buffer. 228193326Sed IdentifierInfo *II = CurTok.getIdentifierInfo(); 229193326Sed int ArgNo = II ? Macro->getArgumentNum(II) : -1; 230193326Sed if (ArgNo == -1) { 231193326Sed // This isn't an argument, just add it. 232193326Sed ResultToks.push_back(CurTok); 233193326Sed 234193326Sed if (NextTokGetsSpace) { 235193326Sed ResultToks.back().setFlag(Token::LeadingSpace); 236193326Sed NextTokGetsSpace = false; 237193326Sed } 238193326Sed continue; 239193326Sed } 240198092Srdivacky 241193326Sed // An argument is expanded somehow, the result is different than the 242193326Sed // input. 243193326Sed MadeChange = true; 244193326Sed 245193326Sed // Otherwise, this is a use of the argument. Find out if there is a paste 246193326Sed // (##) operator before or after the argument. 247263509Sdim bool NonEmptyPasteBefore = 248193326Sed !ResultToks.empty() && ResultToks.back().is(tok::hashhash); 249263509Sdim bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash); 250193326Sed bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); 251263509Sdim assert(!NonEmptyPasteBefore || PasteBefore); 252198092Srdivacky 253245431Sdim // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there 254245431Sdim // are no trailing commas if __VA_ARGS__ is empty. 255245431Sdim if (!PasteBefore && ActualArgs->isVarargsElidedUse() && 256245431Sdim MaybeRemoveCommaBeforeVaArgs(ResultToks, NextTokGetsSpace, 257245431Sdim /*HasPasteOperator=*/false, 258245431Sdim Macro, ArgNo, PP)) 259245431Sdim continue; 260245431Sdim 261193326Sed // If it is not the LHS/RHS of a ## operator, we must pre-expand the 262193326Sed // argument and substitute the expanded tokens into the result. This is 263193326Sed // C99 6.10.3.1p1. 264193326Sed if (!PasteBefore && !PasteAfter) { 265193326Sed const Token *ResultArgToks; 266193326Sed 267193326Sed // Only preexpand the argument if it could possibly need it. This 268193326Sed // avoids some work in common cases. 269193326Sed const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); 270193326Sed if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) 271201361Srdivacky ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; 272193326Sed else 273193326Sed ResultArgToks = ArgTok; // Use non-preexpanded tokens. 274198092Srdivacky 275193326Sed // If the arg token expanded into anything, append it. 276193326Sed if (ResultArgToks->isNot(tok::eof)) { 277193326Sed unsigned FirstResult = ResultToks.size(); 278193326Sed unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); 279193326Sed ResultToks.append(ResultArgToks, ResultArgToks+NumToks); 280198092Srdivacky 281263509Sdim // In Microsoft-compatibility mode, we follow MSVC's preprocessing 282263509Sdim // behavior by not considering single commas from nested macro 283263509Sdim // expansions as argument separators. Set a flag on the token so we can 284263509Sdim // test for this later when the macro expansion is processed. 285263509Sdim if (PP.getLangOpts().MicrosoftMode && NumToks == 1 && 286263509Sdim ResultToks.back().is(tok::comma)) 287263509Sdim ResultToks.back().setFlag(Token::IgnoredComma); 288263509Sdim 289224145Sdim // If the '##' came from expanding an argument, turn it into 'unknown' 290224145Sdim // to avoid pasting. 291224145Sdim for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) { 292224145Sdim Token &Tok = ResultToks[i]; 293224145Sdim if (Tok.is(tok::hashhash)) 294224145Sdim Tok.setKind(tok::unknown); 295224145Sdim } 296224145Sdim 297224145Sdim if(ExpandLocStart.isValid()) { 298226890Sdim updateLocForMacroArgTokens(CurTok.getLocation(), 299226890Sdim ResultToks.begin()+FirstResult, 300226890Sdim ResultToks.end()); 301224145Sdim } 302224145Sdim 303193326Sed // If any tokens were substituted from the argument, the whitespace 304193326Sed // before the first token should match the whitespace of the arg 305193326Sed // identifier. 306193326Sed ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, 307193326Sed CurTok.hasLeadingSpace() || 308193326Sed NextTokGetsSpace); 309193326Sed NextTokGetsSpace = false; 310193326Sed } else { 311193326Sed // If this is an empty argument, and if there was whitespace before the 312193326Sed // formal token, make sure the next token gets whitespace before it. 313193326Sed NextTokGetsSpace = CurTok.hasLeadingSpace(); 314193326Sed } 315193326Sed continue; 316193326Sed } 317198092Srdivacky 318193326Sed // Okay, we have a token that is either the LHS or RHS of a paste (##) 319193326Sed // argument. It gets substituted as its non-pre-expanded tokens. 320193326Sed const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); 321193326Sed unsigned NumToks = MacroArgs::getArgLength(ArgToks); 322193326Sed if (NumToks) { // Not an empty argument? 323245431Sdim // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned 324245431Sdim // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when 325245431Sdim // the expander trys to paste ',' with the first token of the __VA_ARGS__ 326193326Sed // expansion. 327263509Sdim if (NonEmptyPasteBefore && ResultToks.size() >= 2 && 328193326Sed ResultToks[ResultToks.size()-2].is(tok::comma) && 329193326Sed (unsigned)ArgNo == Macro->getNumArgs()-1 && 330193326Sed Macro->isVariadic()) { 331193326Sed // Remove the paste operator, report use of the extension. 332263509Sdim PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); 333193326Sed } 334198092Srdivacky 335193326Sed ResultToks.append(ArgToks, ArgToks+NumToks); 336198092Srdivacky 337224145Sdim // If the '##' came from expanding an argument, turn it into 'unknown' 338224145Sdim // to avoid pasting. 339224145Sdim for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size(); 340224145Sdim i != e; ++i) { 341224145Sdim Token &Tok = ResultToks[i]; 342224145Sdim if (Tok.is(tok::hashhash)) 343224145Sdim Tok.setKind(tok::unknown); 344224145Sdim } 345224145Sdim 346224145Sdim if (ExpandLocStart.isValid()) { 347226890Sdim updateLocForMacroArgTokens(CurTok.getLocation(), 348226890Sdim ResultToks.end()-NumToks, ResultToks.end()); 349224145Sdim } 350224145Sdim 351193326Sed // If this token (the macro argument) was supposed to get leading 352193326Sed // whitespace, transfer this information onto the first token of the 353193326Sed // expansion. 354193326Sed // 355193326Sed // Do not do this if the paste operator occurs before the macro argument, 356193326Sed // as in "A ## MACROARG". In valid code, the first token will get 357193326Sed // smooshed onto the preceding one anyway (forming AMACROARG). In 358193326Sed // assembler-with-cpp mode, invalid pastes are allowed through: in this 359193326Sed // case, we do not want the extra whitespace to be added. For example, 360193326Sed // we want ". ## foo" -> ".foo" not ". foo". 361193326Sed if ((CurTok.hasLeadingSpace() || NextTokGetsSpace) && 362263509Sdim !NonEmptyPasteBefore) 363193326Sed ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace); 364198092Srdivacky 365193326Sed NextTokGetsSpace = false; 366193326Sed continue; 367193326Sed } 368198092Srdivacky 369193326Sed // If an empty argument is on the LHS or RHS of a paste, the standard (C99 370193326Sed // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We 371193326Sed // implement this by eating ## operators when a LHS or RHS expands to 372193326Sed // empty. 373193326Sed NextTokGetsSpace |= CurTok.hasLeadingSpace(); 374193326Sed if (PasteAfter) { 375193326Sed // Discard the argument token and skip (don't copy to the expansion 376193326Sed // buffer) the paste operator after it. 377193326Sed NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace(); 378193326Sed ++i; 379193326Sed continue; 380193326Sed } 381198092Srdivacky 382193326Sed // If this is on the RHS of a paste operator, we've already copied the 383263509Sdim // paste operator to the ResultToks list, unless the LHS was empty too. 384263509Sdim // Remove it. 385263509Sdim assert(PasteBefore); 386263509Sdim if (NonEmptyPasteBefore) { 387263509Sdim assert(ResultToks.back().is(tok::hashhash)); 388263509Sdim NextTokGetsSpace |= ResultToks.pop_back_val().hasLeadingSpace(); 389263509Sdim } 390198092Srdivacky 391193326Sed // If this is the __VA_ARGS__ token, and if the argument wasn't provided, 392193326Sed // and if the macro had at least one real argument, and if the token before 393245431Sdim // the ## was a comma, remove the comma. This is a GCC extension which is 394245431Sdim // disabled when using -std=c99. 395245431Sdim if (ActualArgs->isVarargsElidedUse()) 396245431Sdim MaybeRemoveCommaBeforeVaArgs(ResultToks, NextTokGetsSpace, 397245431Sdim /*HasPasteOperator=*/true, 398245431Sdim Macro, ArgNo, PP); 399245431Sdim 400193326Sed continue; 401193326Sed } 402198092Srdivacky 403193326Sed // If anything changed, install this as the new Tokens list. 404193326Sed if (MadeChange) { 405193326Sed assert(!OwnsTokens && "This would leak if we already own the token list"); 406193326Sed // This is deleted in the dtor. 407193326Sed NumTokens = ResultToks.size(); 408224145Sdim // The tokens will be added to Preprocessor's cache and will be removed 409224145Sdim // when this TokenLexer finishes lexing them. 410224145Sdim Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); 411198092Srdivacky 412224145Sdim // The preprocessor cache of macro expanded tokens owns these tokens,not us. 413193326Sed OwnsTokens = false; 414193326Sed } 415193326Sed} 416193326Sed 417193326Sed/// Lex - Lex and return a token from this macro stream. 418193326Sed/// 419263509Sdimbool TokenLexer::Lex(Token &Tok) { 420193326Sed // Lexing off the end of the macro, pop this macro off the expansion stack. 421193326Sed if (isAtEnd()) { 422193326Sed // If this is a macro (not a token stream), mark the macro enabled now 423193326Sed // that it is no longer being expanded. 424193326Sed if (Macro) Macro->EnableMacro(); 425193326Sed 426263509Sdim Tok.startToken(); 427263509Sdim Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 428263509Sdim Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 429263509Sdim if (CurToken == 0) 430263509Sdim Tok.setFlag(Token::LeadingEmptyMacro); 431263509Sdim return PP.HandleEndOfTokenLexer(Tok); 432193326Sed } 433198092Srdivacky 434224145Sdim SourceManager &SM = PP.getSourceManager(); 435224145Sdim 436193326Sed // If this is the first token of the expanded result, we inherit spacing 437193326Sed // properties later. 438193326Sed bool isFirstToken = CurToken == 0; 439198092Srdivacky 440193326Sed // Get the next token to return. 441193326Sed Tok = Tokens[CurToken++]; 442198092Srdivacky 443193326Sed bool TokenIsFromPaste = false; 444198092Srdivacky 445193326Sed // If this token is followed by a token paste (##) operator, paste the tokens! 446224145Sdim // Note that ## is a normal token when not expanding a macro. 447224145Sdim if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash) && Macro) { 448200583Srdivacky // When handling the microsoft /##/ extension, the final token is 449200583Srdivacky // returned by PasteTokens, not the pasted token. 450200583Srdivacky if (PasteTokens(Tok)) 451263509Sdim return true; 452205219Srdivacky 453200583Srdivacky TokenIsFromPaste = true; 454198092Srdivacky } 455193326Sed 456193326Sed // The token's current location indicate where the token was lexed from. We 457193326Sed // need this information to compute the spelling of the token, but any 458193326Sed // diagnostics for the expanded token should appear as if they came from 459224145Sdim // ExpansionLoc. Pull this information together into a new SourceLocation 460193326Sed // that captures all of this. 461224145Sdim if (ExpandLocStart.isValid() && // Don't do this for token streams. 462224145Sdim // Check that the token's location was not already set properly. 463226890Sdim SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { 464224145Sdim SourceLocation instLoc; 465224145Sdim if (Tok.is(tok::comment)) { 466226890Sdim instLoc = SM.createExpansionLoc(Tok.getLocation(), 467226890Sdim ExpandLocStart, 468226890Sdim ExpandLocEnd, 469226890Sdim Tok.getLength()); 470224145Sdim } else { 471226890Sdim instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); 472224145Sdim } 473224145Sdim 474224145Sdim Tok.setLocation(instLoc); 475193326Sed } 476198092Srdivacky 477193326Sed // If this is the first token, set the lexical properties of the token to 478193326Sed // match the lexical properties of the macro identifier. 479193326Sed if (isFirstToken) { 480193326Sed Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 481193326Sed Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 482263509Sdim AtStartOfLine = false; 483263509Sdim HasLeadingSpace = false; 484193326Sed } 485198092Srdivacky 486193326Sed // Handle recursive expansion! 487193326Sed if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) { 488193326Sed // Change the kind of this identifier to the appropriate token kind, e.g. 489193326Sed // turning "for" into a keyword. 490193326Sed IdentifierInfo *II = Tok.getIdentifierInfo(); 491193326Sed Tok.setKind(II->getTokenID()); 492198092Srdivacky 493193326Sed // If this identifier was poisoned and from a paste, emit an error. This 494193326Sed // won't be handled by Preprocessor::HandleIdentifier because this is coming 495193326Sed // from a macro expansion. 496193326Sed if (II->isPoisoned() && TokenIsFromPaste) { 497221345Sdim PP.HandlePoisonedIdentifier(Tok); 498193326Sed } 499198092Srdivacky 500193326Sed if (!DisableMacroExpansion && II->isHandleIdentifierCase()) 501263509Sdim return PP.HandleIdentifier(Tok); 502193326Sed } 503193326Sed 504193326Sed // Otherwise, return a normal token. 505263509Sdim return true; 506193326Sed} 507193326Sed 508193326Sed/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## 509193326Sed/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there 510193326Sed/// are more ## after it, chomp them iteratively. Return the result as Tok. 511193326Sed/// If this returns true, the caller should immediately return the token. 512193326Sedbool TokenLexer::PasteTokens(Token &Tok) { 513235633Sdim SmallString<128> Buffer; 514193326Sed const char *ResultTokStrPtr = 0; 515226890Sdim SourceLocation StartLoc = Tok.getLocation(); 516224145Sdim SourceLocation PasteOpLoc; 517193326Sed do { 518193326Sed // Consume the ## operator. 519224145Sdim PasteOpLoc = Tokens[CurToken].getLocation(); 520193326Sed ++CurToken; 521193326Sed assert(!isAtEnd() && "No token on the RHS of a paste operator!"); 522198092Srdivacky 523193326Sed // Get the RHS token. 524193326Sed const Token &RHS = Tokens[CurToken]; 525198092Srdivacky 526193326Sed // Allocate space for the result token. This is guaranteed to be enough for 527193326Sed // the two tokens. 528193326Sed Buffer.resize(Tok.getLength() + RHS.getLength()); 529198092Srdivacky 530193326Sed // Get the spelling of the LHS token in Buffer. 531193326Sed const char *BufPtr = &Buffer[0]; 532205408Srdivacky bool Invalid = false; 533205408Srdivacky unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid); 534193326Sed if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! 535193326Sed memcpy(&Buffer[0], BufPtr, LHSLen); 536205408Srdivacky if (Invalid) 537205408Srdivacky return true; 538205408Srdivacky 539193326Sed BufPtr = &Buffer[LHSLen]; 540205408Srdivacky unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); 541205408Srdivacky if (Invalid) 542205408Srdivacky return true; 543193326Sed if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! 544193326Sed memcpy(&Buffer[LHSLen], BufPtr, RHSLen); 545198092Srdivacky 546193326Sed // Trim excess space. 547193326Sed Buffer.resize(LHSLen+RHSLen); 548198092Srdivacky 549193326Sed // Plop the pasted result (including the trailing newline and null) into a 550193326Sed // scratch buffer where we can lex it. 551193326Sed Token ResultTokTmp; 552193326Sed ResultTokTmp.startToken(); 553198092Srdivacky 554193326Sed // Claim that the tmp token is a string_literal so that we can get the 555201361Srdivacky // character pointer back from CreateString in getLiteralData(). 556193326Sed ResultTokTmp.setKind(tok::string_literal); 557245431Sdim PP.CreateString(Buffer, ResultTokTmp); 558193326Sed SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); 559193326Sed ResultTokStrPtr = ResultTokTmp.getLiteralData(); 560193326Sed 561193326Sed // Lex the resultant pasted token into Result. 562193326Sed Token Result; 563198092Srdivacky 564218893Sdim if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { 565193326Sed // Common paste case: identifier+identifier = identifier. Avoid creating 566193326Sed // a lexer and other overhead. 567193326Sed PP.IncrementPasteCounter(true); 568193326Sed Result.startToken(); 569218893Sdim Result.setKind(tok::raw_identifier); 570218893Sdim Result.setRawIdentifierData(ResultTokStrPtr); 571193326Sed Result.setLocation(ResultTokLoc); 572193326Sed Result.setLength(LHSLen+RHSLen); 573193326Sed } else { 574193326Sed PP.IncrementPasteCounter(false); 575198092Srdivacky 576193326Sed assert(ResultTokLoc.isFileID() && 577193326Sed "Should be a raw location into scratch buffer"); 578193326Sed SourceManager &SourceMgr = PP.getSourceManager(); 579193326Sed FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); 580198092Srdivacky 581205219Srdivacky bool Invalid = false; 582205219Srdivacky const char *ScratchBufStart 583205219Srdivacky = SourceMgr.getBufferData(LocFileID, &Invalid).data(); 584205219Srdivacky if (Invalid) 585205219Srdivacky return false; 586198092Srdivacky 587193326Sed // Make a lexer to lex this string from. Lex just this one token. 588193326Sed // Make a lexer object so that we lex and expand the paste result. 589193326Sed Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), 590235633Sdim PP.getLangOpts(), ScratchBufStart, 591193326Sed ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); 592198092Srdivacky 593193326Sed // Lex a token in raw mode. This way it won't look up identifiers 594193326Sed // automatically, lexing off the end will return an eof token, and 595193326Sed // warnings are disabled. This returns true if the result token is the 596193326Sed // entire buffer. 597193326Sed bool isInvalid = !TL.LexFromRawLexer(Result); 598198092Srdivacky 599193326Sed // If we got an EOF token, we didn't form even ONE token. For example, we 600193326Sed // did "/ ## /" to get "//". 601193326Sed isInvalid |= Result.is(tok::eof); 602198092Srdivacky 603193326Sed // If pasting the two tokens didn't form a full new token, this is an 604193326Sed // error. This occurs with "x ## +" and other stuff. Return with Tok 605193326Sed // unmodified and with RHS as the next token to lex. 606193326Sed if (isInvalid) { 607193326Sed // Test for the Microsoft extension of /##/ turning into // here on the 608193326Sed // error path. 609235633Sdim if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && 610193326Sed RHS.is(tok::slash)) { 611193326Sed HandleMicrosoftCommentPaste(Tok); 612193326Sed return true; 613193326Sed } 614198092Srdivacky 615212904Sdim // Do not emit the error when preprocessing assembler code. 616235633Sdim if (!PP.getLangOpts().AsmPreprocessor) { 617224145Sdim // Explicitly convert the token location to have proper expansion 618193326Sed // information so that the user knows where it came from. 619193326Sed SourceManager &SM = PP.getSourceManager(); 620193326Sed SourceLocation Loc = 621226890Sdim SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); 622212904Sdim // If we're in microsoft extensions mode, downgrade this from a hard 623212904Sdim // error to a warning that defaults to an error. This allows 624212904Sdim // disabling it. 625212904Sdim PP.Diag(Loc, 626235633Sdim PP.getLangOpts().MicrosoftExt ? diag::err_pp_bad_paste_ms 627226890Sdim : diag::err_pp_bad_paste) 628212904Sdim << Buffer.str(); 629193326Sed } 630198092Srdivacky 631245431Sdim // An error has occurred so exit loop. 632245431Sdim break; 633193326Sed } 634198092Srdivacky 635193326Sed // Turn ## into 'unknown' to avoid # ## # from looking like a paste 636193326Sed // operator. 637193326Sed if (Result.is(tok::hashhash)) 638193326Sed Result.setKind(tok::unknown); 639193326Sed } 640198092Srdivacky 641245431Sdim // Transfer properties of the LHS over the Result. 642193326Sed Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); 643193326Sed Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); 644224145Sdim 645193326Sed // Finally, replace LHS with the result, consume the RHS, and iterate. 646193326Sed ++CurToken; 647193326Sed Tok = Result; 648193326Sed } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); 649198092Srdivacky 650226890Sdim SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); 651226890Sdim 652224145Sdim // The token's current location indicate where the token was lexed from. We 653224145Sdim // need this information to compute the spelling of the token, but any 654224145Sdim // diagnostics for the expanded token should appear as if the token was 655226890Sdim // expanded from the full ## expression. Pull this information together into 656224145Sdim // a new SourceLocation that captures all of this. 657226890Sdim SourceManager &SM = PP.getSourceManager(); 658226890Sdim if (StartLoc.isFileID()) 659226890Sdim StartLoc = getExpansionLocForMacroDefLoc(StartLoc); 660226890Sdim if (EndLoc.isFileID()) 661226890Sdim EndLoc = getExpansionLocForMacroDefLoc(EndLoc); 662252723Sdim FileID MacroFID = SM.getFileID(MacroExpansionStart); 663252723Sdim while (SM.getFileID(StartLoc) != MacroFID) 664252723Sdim StartLoc = SM.getImmediateExpansionRange(StartLoc).first; 665252723Sdim while (SM.getFileID(EndLoc) != MacroFID) 666252723Sdim EndLoc = SM.getImmediateExpansionRange(EndLoc).second; 667252723Sdim 668226890Sdim Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, 669226890Sdim Tok.getLength())); 670224145Sdim 671193326Sed // Now that we got the result token, it will be subject to expansion. Since 672193326Sed // token pasting re-lexes the result token in raw mode, identifier information 673193326Sed // isn't looked up. As such, if the result is an identifier, look up id info. 674218893Sdim if (Tok.is(tok::raw_identifier)) { 675193326Sed // Look up the identifier info for the token. We disabled identifier lookup 676193326Sed // by saying we're skipping contents, so we need to do this manually. 677218893Sdim PP.LookUpIdentifierInfo(Tok); 678193326Sed } 679193326Sed return false; 680193326Sed} 681193326Sed 682193326Sed/// isNextTokenLParen - If the next token lexed will pop this macro off the 683193326Sed/// expansion stack, return 2. If the next unexpanded token is a '(', return 684193326Sed/// 1, otherwise return 0. 685193326Sedunsigned TokenLexer::isNextTokenLParen() const { 686193326Sed // Out of tokens? 687193326Sed if (isAtEnd()) 688193326Sed return 2; 689193326Sed return Tokens[CurToken].is(tok::l_paren); 690193326Sed} 691193326Sed 692219077Sdim/// isParsingPreprocessorDirective - Return true if we are in the middle of a 693219077Sdim/// preprocessor directive. 694219077Sdimbool TokenLexer::isParsingPreprocessorDirective() const { 695221345Sdim return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); 696219077Sdim} 697193326Sed 698193326Sed/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes 699193326Sed/// together to form a comment that comments out everything in the current 700193326Sed/// macro, other active macros, and anything left on the current physical 701224145Sdim/// source line of the expanded buffer. Handle this by returning the 702193326Sed/// first token on the next line. 703193326Sedvoid TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) { 704193326Sed // We 'comment out' the rest of this macro by just ignoring the rest of the 705193326Sed // tokens that have not been lexed yet, if any. 706198092Srdivacky 707193326Sed // Since this must be a macro, mark the macro enabled now that it is no longer 708193326Sed // being expanded. 709193326Sed assert(Macro && "Token streams can't paste comments"); 710193326Sed Macro->EnableMacro(); 711198092Srdivacky 712193326Sed PP.HandleMicrosoftCommentPaste(Tok); 713193326Sed} 714224145Sdim 715226890Sdim/// \brief If \arg loc is a file ID and points inside the current macro 716224145Sdim/// definition, returns the appropriate source location pointing at the 717226890Sdim/// macro expansion source location entry, otherwise it returns an invalid 718226890Sdim/// SourceLocation. 719226890SdimSourceLocation 720226890SdimTokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { 721224145Sdim assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && 722224145Sdim "Not appropriate for token streams"); 723226890Sdim assert(loc.isValid() && loc.isFileID()); 724224145Sdim 725224145Sdim SourceManager &SM = PP.getSourceManager(); 726226890Sdim assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && 727226890Sdim "Expected loc to come from the macro definition"); 728226890Sdim 729226890Sdim unsigned relativeOffset = 0; 730226890Sdim SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); 731226890Sdim return MacroExpansionStart.getLocWithOffset(relativeOffset); 732226890Sdim} 733226890Sdim 734226890Sdim/// \brief Finds the tokens that are consecutive (from the same FileID) 735226890Sdim/// creates a single SLocEntry, and assigns SourceLocations to each token that 736226890Sdim/// point to that SLocEntry. e.g for 737226890Sdim/// assert(foo == bar); 738226890Sdim/// There will be a single SLocEntry for the "foo == bar" chunk and locations 739226890Sdim/// for the 'foo', '==', 'bar' tokens will point inside that chunk. 740226890Sdim/// 741226890Sdim/// \arg begin_tokens will be updated to a position past all the found 742226890Sdim/// consecutive tokens. 743226890Sdimstatic void updateConsecutiveMacroArgTokens(SourceManager &SM, 744226890Sdim SourceLocation InstLoc, 745226890Sdim Token *&begin_tokens, 746226890Sdim Token * end_tokens) { 747226890Sdim assert(begin_tokens < end_tokens); 748226890Sdim 749226890Sdim SourceLocation FirstLoc = begin_tokens->getLocation(); 750226890Sdim SourceLocation CurLoc = FirstLoc; 751226890Sdim 752226890Sdim // Compare the source location offset of tokens and group together tokens that 753226890Sdim // are close, even if their locations point to different FileIDs. e.g. 754226890Sdim // 755226890Sdim // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) 756226890Sdim // ^ ^ 757226890Sdim // |bar foo cake| (one SLocEntry chunk for all tokens) 758226890Sdim // 759226890Sdim // we can perform this "merge" since the token's spelling location depends 760226890Sdim // on the relative offset. 761226890Sdim 762226890Sdim Token *NextTok = begin_tokens + 1; 763226890Sdim for (; NextTok < end_tokens; ++NextTok) { 764252723Sdim SourceLocation NextLoc = NextTok->getLocation(); 765252723Sdim if (CurLoc.isFileID() != NextLoc.isFileID()) 766252723Sdim break; // Token from different kind of FileID. 767252723Sdim 768226890Sdim int RelOffs; 769252723Sdim if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) 770226890Sdim break; // Token from different local/loaded location. 771226890Sdim // Check that token is not before the previous token or more than 50 772226890Sdim // "characters" away. 773226890Sdim if (RelOffs < 0 || RelOffs > 50) 774226890Sdim break; 775252723Sdim CurLoc = NextLoc; 776224145Sdim } 777224145Sdim 778226890Sdim // For the consecutive tokens, find the length of the SLocEntry to contain 779226890Sdim // all of them. 780226890Sdim Token &LastConsecutiveTok = *(NextTok-1); 781226890Sdim int LastRelOffs = 0; 782226890Sdim SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), 783226890Sdim &LastRelOffs); 784226890Sdim unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); 785226890Sdim 786226890Sdim // Create a macro expansion SLocEntry that will "contain" all of the tokens. 787226890Sdim SourceLocation Expansion = 788226890Sdim SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); 789226890Sdim 790226890Sdim // Change the location of the tokens from the spelling location to the new 791226890Sdim // expanded location. 792226890Sdim for (; begin_tokens < NextTok; ++begin_tokens) { 793226890Sdim Token &Tok = *begin_tokens; 794226890Sdim int RelOffs = 0; 795226890Sdim SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); 796226890Sdim Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); 797226890Sdim } 798224145Sdim} 799226890Sdim 800226890Sdim/// \brief Creates SLocEntries and updates the locations of macro argument 801226890Sdim/// tokens to their new expanded locations. 802226890Sdim/// 803226890Sdim/// \param ArgIdDefLoc the location of the macro argument id inside the macro 804226890Sdim/// definition. 805226890Sdim/// \param Tokens the macro argument tokens to update. 806226890Sdimvoid TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, 807226890Sdim Token *begin_tokens, 808226890Sdim Token *end_tokens) { 809226890Sdim SourceManager &SM = PP.getSourceManager(); 810226890Sdim 811226890Sdim SourceLocation InstLoc = 812226890Sdim getExpansionLocForMacroDefLoc(ArgIdSpellLoc); 813226890Sdim 814226890Sdim while (begin_tokens < end_tokens) { 815226890Sdim // If there's only one token just create a SLocEntry for it. 816226890Sdim if (end_tokens - begin_tokens == 1) { 817226890Sdim Token &Tok = *begin_tokens; 818226890Sdim Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), 819226890Sdim InstLoc, 820226890Sdim Tok.getLength())); 821226890Sdim return; 822226890Sdim } 823226890Sdim 824226890Sdim updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); 825226890Sdim } 826226890Sdim} 827263509Sdim 828263509Sdimvoid TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { 829263509Sdim AtStartOfLine = Result.isAtStartOfLine(); 830263509Sdim HasLeadingSpace = Result.hasLeadingSpace(); 831263509Sdim} 832