MacroArgs.cpp revision 200583
1//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the TokenLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "MacroArgs.h" 15#include "clang/Lex/MacroInfo.h" 16#include "clang/Lex/Preprocessor.h" 17#include "clang/Lex/LexDiagnostic.h" 18using namespace clang; 19 20/// MacroArgs ctor function - This destroys the vector passed in. 21MacroArgs *MacroArgs::create(const MacroInfo *MI, 22 const Token *UnexpArgTokens, 23 unsigned NumToks, bool VarargsElided, 24 Preprocessor &PP) { 25 assert(MI->isFunctionLike() && 26 "Can't have args for an object-like macro!"); 27 28 // Allocate memory for the MacroArgs object with the lexer tokens at the end. 29 MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) + 30 NumToks*sizeof(Token)); 31 // Construct the macroargs object. 32 new (Result) MacroArgs(NumToks, VarargsElided); 33 34 // Copy the actual unexpanded tokens to immediately after the result ptr. 35 if (NumToks) 36 memcpy(const_cast<Token*>(Result->getUnexpArgument(0)), 37 UnexpArgTokens, NumToks*sizeof(Token)); 38 39 return Result; 40} 41 42/// destroy - Destroy and deallocate the memory for this object. 43/// 44void MacroArgs::destroy(Preprocessor &PP) { 45 // Run the dtor to deallocate the vectors. 46 this->~MacroArgs(); 47 // Release the memory for the object. 48 free(this); 49} 50 51/// deallocate - This should only be called by the Preprocessor when managing 52/// its freelist. 53MacroArgs *MacroArgs::deallocate() { 54 MacroArgs *Next = ArgCache; 55 56 // Run the dtor to deallocate the vectors. 57 this->~MacroArgs(); 58 // Release the memory for the object. 59 free(this); 60 61 return Next; 62} 63 64 65/// getArgLength - Given a pointer to an expanded or unexpanded argument, 66/// return the number of tokens, not counting the EOF, that make up the 67/// argument. 68unsigned MacroArgs::getArgLength(const Token *ArgPtr) { 69 unsigned NumArgTokens = 0; 70 for (; ArgPtr->isNot(tok::eof); ++ArgPtr) 71 ++NumArgTokens; 72 return NumArgTokens; 73} 74 75 76/// getUnexpArgument - Return the unexpanded tokens for the specified formal. 77/// 78const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { 79 // The unexpanded argument tokens start immediately after the MacroArgs object 80 // in memory. 81 const Token *Start = (const Token *)(this+1); 82 const Token *Result = Start; 83 // Scan to find Arg. 84 for (; Arg; ++Result) { 85 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 86 if (Result->is(tok::eof)) 87 --Arg; 88 } 89 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 90 return Result; 91} 92 93 94/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected 95/// by pre-expansion, return false. Otherwise, conservatively return true. 96bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, 97 Preprocessor &PP) const { 98 // If there are no identifiers in the argument list, or if the identifiers are 99 // known to not be macros, pre-expansion won't modify it. 100 for (; ArgTok->isNot(tok::eof); ++ArgTok) 101 if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) { 102 if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled()) 103 // Return true even though the macro could be a function-like macro 104 // without a following '(' token. 105 return true; 106 } 107 return false; 108} 109 110/// getPreExpArgument - Return the pre-expanded form of the specified 111/// argument. 112const std::vector<Token> & 113MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { 114 assert(Arg < NumUnexpArgTokens && "Invalid argument number!"); 115 116 // If we have already computed this, return it. 117 if (PreExpArgTokens.empty()) 118 PreExpArgTokens.resize(NumUnexpArgTokens); 119 120 std::vector<Token> &Result = PreExpArgTokens[Arg]; 121 if (!Result.empty()) return Result; 122 123 const Token *AT = getUnexpArgument(Arg); 124 unsigned NumToks = getArgLength(AT)+1; // Include the EOF. 125 126 // Otherwise, we have to pre-expand this argument, populating Result. To do 127 // this, we set up a fake TokenLexer to lex from the unexpanded argument 128 // list. With this installed, we lex expanded tokens until we hit the EOF 129 // token at the end of the unexp list. 130 PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 131 false /*owns tokens*/); 132 133 // Lex all of the macro-expanded tokens into Result. 134 do { 135 Result.push_back(Token()); 136 Token &Tok = Result.back(); 137 PP.Lex(Tok); 138 } while (Result.back().isNot(tok::eof)); 139 140 // Pop the token stream off the top of the stack. We know that the internal 141 // pointer inside of it is to the "end" of the token stream, but the stack 142 // will not otherwise be popped until the next token is lexed. The problem is 143 // that the token may be lexed sometime after the vector of tokens itself is 144 // destroyed, which would be badness. 145 PP.RemoveTopOfLexerStack(); 146 return Result; 147} 148 149 150/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of 151/// tokens into the literal string token that should be produced by the C # 152/// preprocessor operator. If Charify is true, then it should be turned into 153/// a character literal for the Microsoft charize (#@) extension. 154/// 155Token MacroArgs::StringifyArgument(const Token *ArgToks, 156 Preprocessor &PP, bool Charify) { 157 Token Tok; 158 Tok.startToken(); 159 Tok.setKind(tok::string_literal); 160 161 const Token *ArgTokStart = ArgToks; 162 163 // Stringify all the tokens. 164 llvm::SmallString<128> Result; 165 Result += "\""; 166 167 bool isFirst = true; 168 for (; ArgToks->isNot(tok::eof); ++ArgToks) { 169 const Token &Tok = *ArgToks; 170 if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) 171 Result += ' '; 172 isFirst = false; 173 174 // If this is a string or character constant, escape the token as specified 175 // by 6.10.3.2p2. 176 if (Tok.is(tok::string_literal) || // "foo" 177 Tok.is(tok::wide_string_literal) || // L"foo" 178 Tok.is(tok::char_constant)) { // 'x' and L'x'. 179 std::string Str = Lexer::Stringify(PP.getSpelling(Tok)); 180 Result.append(Str.begin(), Str.end()); 181 } else { 182 // Otherwise, just append the token. Do some gymnastics to get the token 183 // in place and avoid copies where possible. 184 unsigned CurStrLen = Result.size(); 185 Result.resize(CurStrLen+Tok.getLength()); 186 const char *BufPtr = &Result[CurStrLen]; 187 unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr); 188 189 // If getSpelling returned a pointer to an already uniqued version of the 190 // string instead of filling in BufPtr, memcpy it onto our string. 191 if (BufPtr != &Result[CurStrLen]) 192 memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); 193 194 // If the token was dirty, the spelling may be shorter than the token. 195 if (ActualTokLen != Tok.getLength()) 196 Result.resize(CurStrLen+ActualTokLen); 197 } 198 } 199 200 // If the last character of the string is a \, and if it isn't escaped, this 201 // is an invalid string literal, diagnose it as specified in C99. 202 if (Result.back() == '\\') { 203 // Count the number of consequtive \ characters. If even, then they are 204 // just escaped backslashes, otherwise it's an error. 205 unsigned FirstNonSlash = Result.size()-2; 206 // Guaranteed to find the starting " if nothing else. 207 while (Result[FirstNonSlash] == '\\') 208 --FirstNonSlash; 209 if ((Result.size()-1-FirstNonSlash) & 1) { 210 // Diagnose errors for things like: #define F(X) #X / F(\) 211 PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); 212 Result.pop_back(); // remove one of the \'s. 213 } 214 } 215 Result += '"'; 216 217 // If this is the charify operation and the result is not a legal character 218 // constant, diagnose it. 219 if (Charify) { 220 // First step, turn double quotes into single quotes: 221 Result[0] = '\''; 222 Result[Result.size()-1] = '\''; 223 224 // Check for bogus character. 225 bool isBad = false; 226 if (Result.size() == 3) 227 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. 228 else 229 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' 230 231 if (isBad) { 232 PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); 233 Result = "' '"; // Use something arbitrary, but legal. 234 } 235 } 236 237 PP.CreateString(&Result[0], Result.size(), Tok); 238 return Tok; 239} 240 241/// getStringifiedArgument - Compute, cache, and return the specified argument 242/// that has been 'stringified' as required by the # operator. 243const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo, 244 Preprocessor &PP) { 245 assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!"); 246 if (StringifiedArgs.empty()) { 247 StringifiedArgs.resize(getNumArguments()); 248 memset(&StringifiedArgs[0], 0, 249 sizeof(StringifiedArgs[0])*getNumArguments()); 250 } 251 if (StringifiedArgs[ArgNo].isNot(tok::string_literal)) 252 StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP); 253 return StringifiedArgs[ArgNo]; 254} 255