PrintfFormatString.cpp revision 203955
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::FormatSpecifier; 19using clang::analyze_printf::OptionalAmount; 20using clang::analyze_printf::ArgTypeResult; 21using clang::analyze_printf::FormatStringHandler; 22using namespace clang; 23 24namespace { 25class FormatSpecifierResult { 26 FormatSpecifier FS; 27 const char *Start; 28 bool Stop; 29public: 30 FormatSpecifierResult(bool stop = false) 31 : Start(0), Stop(stop) {} 32 FormatSpecifierResult(const char *start, 33 const FormatSpecifier &fs) 34 : FS(fs), Start(start), Stop(false) {} 35 36 37 const char *getStart() const { return Start; } 38 bool shouldStop() const { return Stop; } 39 bool hasValue() const { return Start != 0; } 40 const FormatSpecifier &getValue() const { 41 assert(hasValue()); 42 return FS; 43 } 44 const FormatSpecifier &getValue() { return FS; } 45}; 46} // end anonymous namespace 47 48template <typename T> 49class UpdateOnReturn { 50 T &ValueToUpdate; 51 const T &ValueToCopy; 52public: 53 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 54 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 55 56 ~UpdateOnReturn() { 57 ValueToUpdate = ValueToCopy; 58 } 59}; 60 61//===----------------------------------------------------------------------===// 62// Methods for parsing format strings. 63//===----------------------------------------------------------------------===// 64 65static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 66 const char *I = Beg; 67 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 68 69 bool foundDigits = false; 70 unsigned accumulator = 0; 71 72 for ( ; I != E; ++I) { 73 char c = *I; 74 if (c >= '0' && c <= '9') { 75 foundDigits = true; 76 accumulator += (accumulator * 10) + (c - '0'); 77 continue; 78 } 79 80 if (foundDigits) 81 return OptionalAmount(accumulator, Beg); 82 83 if (c == '*') { 84 ++I; 85 return OptionalAmount(OptionalAmount::Arg, Beg); 86 } 87 88 break; 89 } 90 91 return OptionalAmount(); 92} 93 94static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 95 const char *&Beg, 96 const char *E) { 97 98 using namespace clang::analyze_printf; 99 100 const char *I = Beg; 101 const char *Start = 0; 102 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 103 104 // Look for a '%' character that indicates the start of a format specifier. 105 for ( ; I != E ; ++I) { 106 char c = *I; 107 if (c == '\0') { 108 // Detect spurious null characters, which are likely errors. 109 H.HandleNullChar(I); 110 return true; 111 } 112 if (c == '%') { 113 Start = I++; // Record the start of the format specifier. 114 break; 115 } 116 } 117 118 // No format specifier found? 119 if (!Start) 120 return false; 121 122 if (I == E) { 123 // No more characters left? 124 H.HandleIncompleteFormatSpecifier(Start, E - Start); 125 return true; 126 } 127 128 FormatSpecifier FS; 129 130 // Look for flags (if any). 131 bool hasMore = true; 132 for ( ; I != E; ++I) { 133 switch (*I) { 134 default: hasMore = false; break; 135 case '-': FS.setIsLeftJustified(); break; 136 case '+': FS.setHasPlusPrefix(); break; 137 case ' ': FS.setHasSpacePrefix(); break; 138 case '#': FS.setHasAlternativeForm(); break; 139 case '0': FS.setHasLeadingZeros(); break; 140 } 141 if (!hasMore) 142 break; 143 } 144 145 if (I == E) { 146 // No more characters left? 147 H.HandleIncompleteFormatSpecifier(Start, E - Start); 148 return true; 149 } 150 151 // Look for the field width (if any). 152 FS.setFieldWidth(ParseAmount(I, E)); 153 154 if (I == E) { 155 // No more characters left? 156 H.HandleIncompleteFormatSpecifier(Start, E - Start); 157 return true; 158 } 159 160 // Look for the precision (if any). 161 if (*I == '.') { 162 ++I; 163 if (I == E) { 164 H.HandleIncompleteFormatSpecifier(Start, E - Start); 165 return true; 166 } 167 168 FS.setPrecision(ParseAmount(I, E)); 169 170 if (I == E) { 171 // No more characters left? 172 H.HandleIncompleteFormatSpecifier(Start, E - Start); 173 return true; 174 } 175 } 176 177 // Look for the length modifier. 178 LengthModifier lm = None; 179 switch (*I) { 180 default: 181 break; 182 case 'h': 183 ++I; 184 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 185 break; 186 case 'l': 187 ++I; 188 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 189 break; 190 case 'j': lm = AsIntMax; ++I; break; 191 case 'z': lm = AsSizeT; ++I; break; 192 case 't': lm = AsPtrDiff; ++I; break; 193 case 'L': lm = AsLongDouble; ++I; break; 194 case 'q': lm = AsLongLong; ++I; break; 195 } 196 FS.setLengthModifier(lm); 197 198 if (I == E) { 199 // No more characters left? 200 H.HandleIncompleteFormatSpecifier(Start, E - Start); 201 return true; 202 } 203 204 if (*I == '\0') { 205 // Detect spurious null characters, which are likely errors. 206 H.HandleNullChar(I); 207 return true; 208 } 209 210 // Finally, look for the conversion specifier. 211 const char *conversionPosition = I++; 212 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 213 switch (*conversionPosition) { 214 default: 215 break; 216 // C99: 7.19.6.1 (section 8). 217 case 'd': k = ConversionSpecifier::dArg; break; 218 case 'i': k = ConversionSpecifier::iArg; break; 219 case 'o': k = ConversionSpecifier::oArg; break; 220 case 'u': k = ConversionSpecifier::uArg; break; 221 case 'x': k = ConversionSpecifier::xArg; break; 222 case 'X': k = ConversionSpecifier::XArg; break; 223 case 'f': k = ConversionSpecifier::fArg; break; 224 case 'F': k = ConversionSpecifier::FArg; break; 225 case 'e': k = ConversionSpecifier::eArg; break; 226 case 'E': k = ConversionSpecifier::EArg; break; 227 case 'g': k = ConversionSpecifier::gArg; break; 228 case 'G': k = ConversionSpecifier::GArg; break; 229 case 'a': k = ConversionSpecifier::aArg; break; 230 case 'A': k = ConversionSpecifier::AArg; break; 231 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 232 case 's': k = ConversionSpecifier::CStrArg; break; 233 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 234 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 235 case '%': k = ConversionSpecifier::PercentArg; break; 236 // Objective-C. 237 case '@': k = ConversionSpecifier::ObjCObjArg; break; 238 // Glibc specific. 239 case 'm': k = ConversionSpecifier::PrintErrno; break; 240 } 241 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); 242 243 if (k == ConversionSpecifier::InvalidSpecifier) { 244 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 245 return false; // Keep processing format specifiers. 246 } 247 return FormatSpecifierResult(Start, FS); 248} 249 250bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 251 const char *I, const char *E) { 252 // Keep looking for a format specifier until we have exhausted the string. 253 while (I != E) { 254 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); 255 // Did a fail-stop error of any kind occur when parsing the specifier? 256 // If so, don't do any more processing. 257 if (FSR.shouldStop()) 258 return true;; 259 // Did we exhaust the string or encounter an error that 260 // we can recover from? 261 if (!FSR.hasValue()) 262 continue; 263 // We have a format specifier. Pass it to the callback. 264 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 265 I - FSR.getStart())) 266 return true; 267 } 268 assert(I == E && "Format string not exhausted"); 269 return false; 270} 271 272FormatStringHandler::~FormatStringHandler() {} 273 274//===----------------------------------------------------------------------===// 275// Methods on ArgTypeResult. 276//===----------------------------------------------------------------------===// 277 278bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 279 assert(isValid()); 280 281 if (K == UnknownTy) 282 return true; 283 284 if (K == SpecificTy) { 285 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 286 287 if (T == argTy) 288 return true; 289 290 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 291 switch (BT->getKind()) { 292 default: 293 break; 294 case BuiltinType::Char_S: 295 case BuiltinType::SChar: 296 return T == C.UnsignedCharTy; 297 case BuiltinType::Char_U: 298 case BuiltinType::UChar: 299 return T == C.SignedCharTy; 300 case BuiltinType::Short: 301 return T == C.UnsignedShortTy; 302 case BuiltinType::UShort: 303 return T == C.ShortTy; 304 case BuiltinType::Int: 305 return T == C.UnsignedIntTy; 306 case BuiltinType::UInt: 307 return T == C.IntTy; 308 case BuiltinType::Long: 309 return T == C.UnsignedLongTy; 310 case BuiltinType::ULong: 311 return T == C.LongTy; 312 case BuiltinType::LongLong: 313 return T == C.UnsignedLongLongTy; 314 case BuiltinType::ULongLong: 315 return T == C.LongLongTy; 316 } 317 318 return false; 319 } 320 321 if (K == CStrTy) { 322 const PointerType *PT = argTy->getAs<PointerType>(); 323 if (!PT) 324 return false; 325 326 QualType pointeeTy = PT->getPointeeType(); 327 328 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 329 switch (BT->getKind()) { 330 case BuiltinType::Void: 331 case BuiltinType::Char_U: 332 case BuiltinType::UChar: 333 case BuiltinType::Char_S: 334 case BuiltinType::SChar: 335 return true; 336 default: 337 break; 338 } 339 340 return false; 341 } 342 343 if (K == WCStrTy) { 344 const PointerType *PT = argTy->getAs<PointerType>(); 345 if (!PT) 346 return false; 347 348 QualType pointeeTy = PT->getPointeeType(); 349 return pointeeTy == C.WCharTy; 350 } 351 352 return false; 353} 354 355QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 356 assert(isValid()); 357 if (K == SpecificTy) 358 return T; 359 if (K == CStrTy) 360 return C.getPointerType(C.CharTy); 361 if (K == WCStrTy) 362 return C.getPointerType(C.WCharTy); 363 if (K == ObjCPointerTy) 364 return C.ObjCBuiltinIdTy; 365 366 return QualType(); 367} 368 369//===----------------------------------------------------------------------===// 370// Methods on OptionalAmount. 371//===----------------------------------------------------------------------===// 372 373ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { 374 return Ctx.IntTy; 375} 376 377//===----------------------------------------------------------------------===// 378// Methods on FormatSpecifier. 379//===----------------------------------------------------------------------===// 380 381ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 382 if (!CS.consumesDataArgument()) 383 return ArgTypeResult::Invalid(); 384 385 if (CS.isIntArg()) 386 switch (LM) { 387 case AsLongDouble: 388 return ArgTypeResult::Invalid(); 389 case None: return Ctx.IntTy; 390 case AsChar: return Ctx.SignedCharTy; 391 case AsShort: return Ctx.ShortTy; 392 case AsLong: return Ctx.LongTy; 393 case AsLongLong: return Ctx.LongLongTy; 394 case AsIntMax: 395 // FIXME: Return unknown for now. 396 return ArgTypeResult(); 397 case AsSizeT: return Ctx.getSizeType(); 398 case AsPtrDiff: return Ctx.getPointerDiffType(); 399 } 400 401 if (CS.isUIntArg()) 402 switch (LM) { 403 case AsLongDouble: 404 return ArgTypeResult::Invalid(); 405 case None: return Ctx.UnsignedIntTy; 406 case AsChar: return Ctx.UnsignedCharTy; 407 case AsShort: return Ctx.UnsignedShortTy; 408 case AsLong: return Ctx.UnsignedLongTy; 409 case AsLongLong: return Ctx.UnsignedLongLongTy; 410 case AsIntMax: 411 // FIXME: Return unknown for now. 412 return ArgTypeResult(); 413 case AsSizeT: 414 // FIXME: How to get the corresponding unsigned 415 // version of size_t? 416 return ArgTypeResult(); 417 case AsPtrDiff: 418 // FIXME: How to get the corresponding unsigned 419 // version of ptrdiff_t? 420 return ArgTypeResult(); 421 } 422 423 if (CS.isDoubleArg()) { 424 if (LM == AsLongDouble) 425 return Ctx.LongDoubleTy; 426 return Ctx.DoubleTy; 427 } 428 429 if (CS.getKind() == ConversionSpecifier::CStrArg) 430 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy 431 : ArgTypeResult::CStrTy); 432 433 // FIXME: Handle other cases. 434 return ArgTypeResult(); 435} 436 437