MacroArgs.cpp revision 193326
1//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the TokenLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "MacroArgs.h" 15#include "clang/Lex/MacroInfo.h" 16#include "clang/Lex/Preprocessor.h" 17#include "clang/Lex/LexDiagnostic.h" 18using namespace clang; 19 20/// MacroArgs ctor function - This destroys the vector passed in. 21MacroArgs *MacroArgs::create(const MacroInfo *MI, 22 const Token *UnexpArgTokens, 23 unsigned NumToks, bool VarargsElided) { 24 assert(MI->isFunctionLike() && 25 "Can't have args for an object-like macro!"); 26 27 // Allocate memory for the MacroArgs object with the lexer tokens at the end. 28 MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) + 29 NumToks*sizeof(Token)); 30 // Construct the macroargs object. 31 new (Result) MacroArgs(NumToks, VarargsElided); 32 33 // Copy the actual unexpanded tokens to immediately after the result ptr. 34 if (NumToks) 35 memcpy(const_cast<Token*>(Result->getUnexpArgument(0)), 36 UnexpArgTokens, NumToks*sizeof(Token)); 37 38 return Result; 39} 40 41/// destroy - Destroy and deallocate the memory for this object. 42/// 43void MacroArgs::destroy() { 44 // Run the dtor to deallocate the vectors. 45 this->~MacroArgs(); 46 // Release the memory for the object. 47 free(this); 48} 49 50 51/// getArgLength - Given a pointer to an expanded or unexpanded argument, 52/// return the number of tokens, not counting the EOF, that make up the 53/// argument. 54unsigned MacroArgs::getArgLength(const Token *ArgPtr) { 55 unsigned NumArgTokens = 0; 56 for (; ArgPtr->isNot(tok::eof); ++ArgPtr) 57 ++NumArgTokens; 58 return NumArgTokens; 59} 60 61 62/// getUnexpArgument - Return the unexpanded tokens for the specified formal. 63/// 64const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { 65 // The unexpanded argument tokens start immediately after the MacroArgs object 66 // in memory. 67 const Token *Start = (const Token *)(this+1); 68 const Token *Result = Start; 69 // Scan to find Arg. 70 for (; Arg; ++Result) { 71 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 72 if (Result->is(tok::eof)) 73 --Arg; 74 } 75 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 76 return Result; 77} 78 79 80/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected 81/// by pre-expansion, return false. Otherwise, conservatively return true. 82bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, 83 Preprocessor &PP) const { 84 // If there are no identifiers in the argument list, or if the identifiers are 85 // known to not be macros, pre-expansion won't modify it. 86 for (; ArgTok->isNot(tok::eof); ++ArgTok) 87 if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) { 88 if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled()) 89 // Return true even though the macro could be a function-like macro 90 // without a following '(' token. 91 return true; 92 } 93 return false; 94} 95 96/// getPreExpArgument - Return the pre-expanded form of the specified 97/// argument. 98const std::vector<Token> & 99MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { 100 assert(Arg < NumUnexpArgTokens && "Invalid argument number!"); 101 102 // If we have already computed this, return it. 103 if (PreExpArgTokens.empty()) 104 PreExpArgTokens.resize(NumUnexpArgTokens); 105 106 std::vector<Token> &Result = PreExpArgTokens[Arg]; 107 if (!Result.empty()) return Result; 108 109 const Token *AT = getUnexpArgument(Arg); 110 unsigned NumToks = getArgLength(AT)+1; // Include the EOF. 111 112 // Otherwise, we have to pre-expand this argument, populating Result. To do 113 // this, we set up a fake TokenLexer to lex from the unexpanded argument 114 // list. With this installed, we lex expanded tokens until we hit the EOF 115 // token at the end of the unexp list. 116 PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 117 false /*owns tokens*/); 118 119 // Lex all of the macro-expanded tokens into Result. 120 do { 121 Result.push_back(Token()); 122 Token &Tok = Result.back(); 123 PP.Lex(Tok); 124 } while (Result.back().isNot(tok::eof)); 125 126 // Pop the token stream off the top of the stack. We know that the internal 127 // pointer inside of it is to the "end" of the token stream, but the stack 128 // will not otherwise be popped until the next token is lexed. The problem is 129 // that the token may be lexed sometime after the vector of tokens itself is 130 // destroyed, which would be badness. 131 PP.RemoveTopOfLexerStack(); 132 return Result; 133} 134 135 136/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of 137/// tokens into the literal string token that should be produced by the C # 138/// preprocessor operator. If Charify is true, then it should be turned into 139/// a character literal for the Microsoft charize (#@) extension. 140/// 141Token MacroArgs::StringifyArgument(const Token *ArgToks, 142 Preprocessor &PP, bool Charify) { 143 Token Tok; 144 Tok.startToken(); 145 Tok.setKind(tok::string_literal); 146 147 const Token *ArgTokStart = ArgToks; 148 149 // Stringify all the tokens. 150 llvm::SmallString<128> Result; 151 Result += "\""; 152 153 bool isFirst = true; 154 for (; ArgToks->isNot(tok::eof); ++ArgToks) { 155 const Token &Tok = *ArgToks; 156 if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) 157 Result += ' '; 158 isFirst = false; 159 160 // If this is a string or character constant, escape the token as specified 161 // by 6.10.3.2p2. 162 if (Tok.is(tok::string_literal) || // "foo" 163 Tok.is(tok::wide_string_literal) || // L"foo" 164 Tok.is(tok::char_constant)) { // 'x' and L'x'. 165 std::string Str = Lexer::Stringify(PP.getSpelling(Tok)); 166 Result.append(Str.begin(), Str.end()); 167 } else { 168 // Otherwise, just append the token. Do some gymnastics to get the token 169 // in place and avoid copies where possible. 170 unsigned CurStrLen = Result.size(); 171 Result.resize(CurStrLen+Tok.getLength()); 172 const char *BufPtr = &Result[CurStrLen]; 173 unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr); 174 175 // If getSpelling returned a pointer to an already uniqued version of the 176 // string instead of filling in BufPtr, memcpy it onto our string. 177 if (BufPtr != &Result[CurStrLen]) 178 memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); 179 180 // If the token was dirty, the spelling may be shorter than the token. 181 if (ActualTokLen != Tok.getLength()) 182 Result.resize(CurStrLen+ActualTokLen); 183 } 184 } 185 186 // If the last character of the string is a \, and if it isn't escaped, this 187 // is an invalid string literal, diagnose it as specified in C99. 188 if (Result.back() == '\\') { 189 // Count the number of consequtive \ characters. If even, then they are 190 // just escaped backslashes, otherwise it's an error. 191 unsigned FirstNonSlash = Result.size()-2; 192 // Guaranteed to find the starting " if nothing else. 193 while (Result[FirstNonSlash] == '\\') 194 --FirstNonSlash; 195 if ((Result.size()-1-FirstNonSlash) & 1) { 196 // Diagnose errors for things like: #define F(X) #X / F(\) 197 PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); 198 Result.pop_back(); // remove one of the \'s. 199 } 200 } 201 Result += '"'; 202 203 // If this is the charify operation and the result is not a legal character 204 // constant, diagnose it. 205 if (Charify) { 206 // First step, turn double quotes into single quotes: 207 Result[0] = '\''; 208 Result[Result.size()-1] = '\''; 209 210 // Check for bogus character. 211 bool isBad = false; 212 if (Result.size() == 3) 213 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. 214 else 215 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' 216 217 if (isBad) { 218 PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); 219 Result = "' '"; // Use something arbitrary, but legal. 220 } 221 } 222 223 PP.CreateString(&Result[0], Result.size(), Tok); 224 return Tok; 225} 226 227/// getStringifiedArgument - Compute, cache, and return the specified argument 228/// that has been 'stringified' as required by the # operator. 229const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo, 230 Preprocessor &PP) { 231 assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!"); 232 if (StringifiedArgs.empty()) { 233 StringifiedArgs.resize(getNumArguments()); 234 memset(&StringifiedArgs[0], 0, 235 sizeof(StringifiedArgs[0])*getNumArguments()); 236 } 237 if (StringifiedArgs[ArgNo].isNot(tok::string_literal)) 238 StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP); 239 return StringifiedArgs[ArgNo]; 240} 241