PrintfFormatString.cpp revision 208987
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::ArgTypeResult; 19using clang::analyze_printf::FormatSpecifier; 20using clang::analyze_printf::FormatStringHandler; 21using clang::analyze_printf::OptionalAmount; 22using clang::analyze_printf::PositionContext; 23 24using namespace clang; 25 26namespace { 27class FormatSpecifierResult { 28 FormatSpecifier FS; 29 const char *Start; 30 bool Stop; 31public: 32 FormatSpecifierResult(bool stop = false) 33 : Start(0), Stop(stop) {} 34 FormatSpecifierResult(const char *start, 35 const FormatSpecifier &fs) 36 : FS(fs), Start(start), Stop(false) {} 37 38 39 const char *getStart() const { return Start; } 40 bool shouldStop() const { return Stop; } 41 bool hasValue() const { return Start != 0; } 42 const FormatSpecifier &getValue() const { 43 assert(hasValue()); 44 return FS; 45 } 46 const FormatSpecifier &getValue() { return FS; } 47}; 48} // end anonymous namespace 49 50template <typename T> 51class UpdateOnReturn { 52 T &ValueToUpdate; 53 const T &ValueToCopy; 54public: 55 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 56 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 57 58 ~UpdateOnReturn() { 59 ValueToUpdate = ValueToCopy; 60 } 61}; 62 63//===----------------------------------------------------------------------===// 64// Methods for parsing format strings. 65//===----------------------------------------------------------------------===// 66 67static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 68 const char *I = Beg; 69 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 70 71 unsigned accumulator = 0; 72 bool hasDigits = false; 73 74 for ( ; I != E; ++I) { 75 char c = *I; 76 if (c >= '0' && c <= '9') { 77 hasDigits = true; 78 accumulator = (accumulator * 10) + (c - '0'); 79 continue; 80 } 81 82 if (hasDigits) 83 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); 84 85 break; 86 } 87 88 return OptionalAmount(); 89} 90 91static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, 92 unsigned &argIndex) { 93 if (*Beg == '*') { 94 ++Beg; 95 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); 96 } 97 98 return ParseAmount(Beg, E); 99} 100 101static OptionalAmount ParsePositionAmount(FormatStringHandler &H, 102 const char *Start, 103 const char *&Beg, const char *E, 104 PositionContext p) { 105 if (*Beg == '*') { 106 const char *I = Beg + 1; 107 const OptionalAmount &Amt = ParseAmount(I, E); 108 109 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 110 H.HandleInvalidPosition(Beg, I - Beg, p); 111 return OptionalAmount(false); 112 } 113 114 if (I== E) { 115 // No more characters left? 116 H.HandleIncompleteFormatSpecifier(Start, E - Start); 117 return OptionalAmount(false); 118 } 119 120 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 121 122 if (*I == '$') { 123 // Special case: '*0$', since this is an easy mistake. 124 if (Amt.getConstantAmount() == 0) { 125 H.HandleZeroPosition(Beg, I - Beg + 1); 126 return OptionalAmount(false); 127 } 128 129 const char *Tmp = Beg; 130 Beg = ++I; 131 132 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 133 Tmp); 134 } 135 136 H.HandleInvalidPosition(Beg, I - Beg, p); 137 return OptionalAmount(false); 138 } 139 140 return ParseAmount(Beg, E); 141} 142 143static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, 144 const char *Start, const char *&Beg, const char *E, 145 unsigned *argIndex) { 146 if (argIndex) { 147 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 148 } 149 else { 150 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 151 analyze_printf::PrecisionPos); 152 if (Amt.isInvalid()) 153 return true; 154 FS.setPrecision(Amt); 155 } 156 return false; 157} 158 159static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, 160 const char *Start, const char *&Beg, const char *E, 161 unsigned *argIndex) { 162 // FIXME: Support negative field widths. 163 if (argIndex) { 164 FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 165 } 166 else { 167 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 168 analyze_printf::FieldWidthPos); 169 if (Amt.isInvalid()) 170 return true; 171 FS.setFieldWidth(Amt); 172 } 173 return false; 174} 175 176 177static bool ParseArgPosition(FormatStringHandler &H, 178 FormatSpecifier &FS, const char *Start, 179 const char *&Beg, const char *E) { 180 181 using namespace clang::analyze_printf; 182 const char *I = Beg; 183 184 const OptionalAmount &Amt = ParseAmount(I, E); 185 186 if (I == E) { 187 // No more characters left? 188 H.HandleIncompleteFormatSpecifier(Start, E - Start); 189 return true; 190 } 191 192 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 193 // Special case: '%0$', since this is an easy mistake. 194 if (Amt.getConstantAmount() == 0) { 195 H.HandleZeroPosition(Start, I - Start); 196 return true; 197 } 198 199 FS.setArgIndex(Amt.getConstantAmount() - 1); 200 FS.setUsesPositionalArg(); 201 // Update the caller's pointer if we decided to consume 202 // these characters. 203 Beg = I; 204 return false; 205 } 206 207 return false; 208} 209 210static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 211 const char *&Beg, 212 const char *E, 213 unsigned &argIndex, 214 bool FormatExtensions) { 215 216 using namespace clang::analyze_printf; 217 218 const char *I = Beg; 219 const char *Start = 0; 220 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 221 222 // Look for a '%' character that indicates the start of a format specifier. 223 for ( ; I != E ; ++I) { 224 char c = *I; 225 if (c == '\0') { 226 // Detect spurious null characters, which are likely errors. 227 H.HandleNullChar(I); 228 return true; 229 } 230 if (c == '%') { 231 Start = I++; // Record the start of the format specifier. 232 break; 233 } 234 } 235 236 // No format specifier found? 237 if (!Start) 238 return false; 239 240 if (I == E) { 241 // No more characters left? 242 H.HandleIncompleteFormatSpecifier(Start, E - Start); 243 return true; 244 } 245 246 FormatSpecifier FS; 247 if (ParseArgPosition(H, FS, Start, I, E)) 248 return true; 249 250 if (I == E) { 251 // No more characters left? 252 H.HandleIncompleteFormatSpecifier(Start, E - Start); 253 return true; 254 } 255 256 // Look for flags (if any). 257 bool hasMore = true; 258 for ( ; I != E; ++I) { 259 switch (*I) { 260 default: hasMore = false; break; 261 case '-': FS.setIsLeftJustified(); break; 262 case '+': FS.setHasPlusPrefix(); break; 263 case ' ': FS.setHasSpacePrefix(); break; 264 case '#': FS.setHasAlternativeForm(); break; 265 case '0': FS.setHasLeadingZeros(); break; 266 } 267 if (!hasMore) 268 break; 269 } 270 271 if (I == E) { 272 // No more characters left? 273 H.HandleIncompleteFormatSpecifier(Start, E - Start); 274 return true; 275 } 276 277 // Look for the field width (if any). 278 if (ParseFieldWidth(H, FS, Start, I, E, 279 FS.usesPositionalArg() ? 0 : &argIndex)) 280 return true; 281 282 if (I == E) { 283 // No more characters left? 284 H.HandleIncompleteFormatSpecifier(Start, E - Start); 285 return true; 286 } 287 288 // Look for the precision (if any). 289 if (*I == '.') { 290 ++I; 291 if (I == E) { 292 H.HandleIncompleteFormatSpecifier(Start, E - Start); 293 return true; 294 } 295 296 if (ParsePrecision(H, FS, Start, I, E, 297 FS.usesPositionalArg() ? 0 : &argIndex)) 298 return true; 299 300 if (I == E) { 301 // No more characters left? 302 H.HandleIncompleteFormatSpecifier(Start, E - Start); 303 return true; 304 } 305 } 306 307 // Look for the length modifier. 308 LengthModifier lm = None; 309 switch (*I) { 310 default: 311 break; 312 case 'h': 313 ++I; 314 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 315 break; 316 case 'l': 317 ++I; 318 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 319 break; 320 case 'j': lm = AsIntMax; ++I; break; 321 case 'z': lm = AsSizeT; ++I; break; 322 case 't': lm = AsPtrDiff; ++I; break; 323 case 'L': lm = AsLongDouble; ++I; break; 324 case 'q': lm = AsLongLong; ++I; break; 325 } 326 FS.setLengthModifier(lm); 327 328 if (I == E) { 329 // No more characters left? 330 H.HandleIncompleteFormatSpecifier(Start, E - Start); 331 return true; 332 } 333 334 if (*I == '\0') { 335 // Detect spurious null characters, which are likely errors. 336 H.HandleNullChar(I); 337 return true; 338 } 339 340 // Finally, look for the conversion specifier. 341 const char *conversionPosition = I++; 342 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 343 switch (*conversionPosition) { 344 default: 345 break; 346 // C99: 7.19.6.1 (section 8). 347 case '%': k = ConversionSpecifier::PercentArg; break; 348 case 'A': k = ConversionSpecifier::AArg; break; 349 case 'E': k = ConversionSpecifier::EArg; break; 350 case 'F': k = ConversionSpecifier::FArg; break; 351 case 'G': k = ConversionSpecifier::GArg; break; 352 case 'X': k = ConversionSpecifier::XArg; break; 353 case 'a': k = ConversionSpecifier::aArg; break; 354 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 355 case 'd': k = ConversionSpecifier::dArg; break; 356 case 'e': k = ConversionSpecifier::eArg; break; 357 case 'f': k = ConversionSpecifier::fArg; break; 358 case 'g': k = ConversionSpecifier::gArg; break; 359 case 'i': k = ConversionSpecifier::iArg; break; 360 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 361 case 'o': k = ConversionSpecifier::oArg; break; 362 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 363 case 's': k = ConversionSpecifier::CStrArg; break; 364 case 'u': k = ConversionSpecifier::uArg; break; 365 case 'x': k = ConversionSpecifier::xArg; break; 366 // Mac OS X (unicode) specific 367 case 'C': k = ConversionSpecifier::CArg; break; 368 case 'S': k = ConversionSpecifier::UnicodeStrArg; break; 369 // Objective-C. 370 case '@': k = ConversionSpecifier::ObjCObjArg; break; 371 // Glibc specific. 372 case 'm': k = ConversionSpecifier::PrintErrno; break; 373 // FreeBSD format extensions 374 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */ 375 case 'r': if (FormatExtensions) k = ConversionSpecifier::xArg; break; 376 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break; 377 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */ 378 } 379 ConversionSpecifier CS(conversionPosition, k); 380 FS.setConversionSpecifier(CS); 381 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 382 FS.setArgIndex(argIndex++); 383 // FreeBSD extension 384 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg) 385 argIndex++; 386 387 if (k == ConversionSpecifier::InvalidSpecifier) { 388 // Assume the conversion takes one argument. 389 return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 390 } 391 return FormatSpecifierResult(Start, FS); 392} 393 394bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 395 const char *I, const char *E, bool FormatExtensions) { 396 397 unsigned argIndex = 0; 398 399 // Keep looking for a format specifier until we have exhausted the string. 400 while (I != E) { 401 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex, FormatExtensions); 402 // Did a fail-stop error of any kind occur when parsing the specifier? 403 // If so, don't do any more processing. 404 if (FSR.shouldStop()) 405 return true;; 406 // Did we exhaust the string or encounter an error that 407 // we can recover from? 408 if (!FSR.hasValue()) 409 continue; 410 // We have a format specifier. Pass it to the callback. 411 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 412 I - FSR.getStart())) 413 return true; 414 } 415 assert(I == E && "Format string not exhausted"); 416 return false; 417} 418 419FormatStringHandler::~FormatStringHandler() {} 420 421//===----------------------------------------------------------------------===// 422// Methods on ArgTypeResult. 423//===----------------------------------------------------------------------===// 424 425bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 426 assert(isValid()); 427 428 if (K == UnknownTy) 429 return true; 430 431 if (K == SpecificTy) { 432 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 433 434 if (T == argTy) 435 return true; 436 437 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 438 switch (BT->getKind()) { 439 default: 440 break; 441 case BuiltinType::Char_S: 442 case BuiltinType::SChar: 443 return T == C.UnsignedCharTy; 444 case BuiltinType::Char_U: 445 case BuiltinType::UChar: 446 return T == C.SignedCharTy; 447 case BuiltinType::Short: 448 return T == C.UnsignedShortTy; 449 case BuiltinType::UShort: 450 return T == C.ShortTy; 451 case BuiltinType::Int: 452 return T == C.UnsignedIntTy; 453 case BuiltinType::UInt: 454 return T == C.IntTy; 455 case BuiltinType::Long: 456 return T == C.UnsignedLongTy; 457 case BuiltinType::ULong: 458 return T == C.LongTy; 459 case BuiltinType::LongLong: 460 return T == C.UnsignedLongLongTy; 461 case BuiltinType::ULongLong: 462 return T == C.LongLongTy; 463 } 464 465 return false; 466 } 467 468 if (K == CStrTy) { 469 const PointerType *PT = argTy->getAs<PointerType>(); 470 if (!PT) 471 return false; 472 473 QualType pointeeTy = PT->getPointeeType(); 474 475 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 476 switch (BT->getKind()) { 477 case BuiltinType::Void: 478 case BuiltinType::Char_U: 479 case BuiltinType::UChar: 480 case BuiltinType::Char_S: 481 case BuiltinType::SChar: 482 return true; 483 default: 484 break; 485 } 486 487 return false; 488 } 489 490 if (K == WCStrTy) { 491 const PointerType *PT = argTy->getAs<PointerType>(); 492 if (!PT) 493 return false; 494 495 QualType pointeeTy = 496 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 497 498 return pointeeTy == C.getWCharType(); 499 } 500 501 return false; 502} 503 504QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 505 assert(isValid()); 506 if (K == SpecificTy) 507 return T; 508 if (K == CStrTy) 509 return C.getPointerType(C.CharTy); 510 if (K == WCStrTy) 511 return C.getPointerType(C.getWCharType()); 512 if (K == ObjCPointerTy) 513 return C.ObjCBuiltinIdTy; 514 515 return QualType(); 516} 517 518//===----------------------------------------------------------------------===// 519// Methods on OptionalAmount. 520//===----------------------------------------------------------------------===// 521 522ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { 523 return Ctx.IntTy; 524} 525 526//===----------------------------------------------------------------------===// 527// Methods on FormatSpecifier. 528//===----------------------------------------------------------------------===// 529 530ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 531 if (!CS.consumesDataArgument()) 532 return ArgTypeResult::Invalid(); 533 534 if (CS.isIntArg()) 535 switch (LM) { 536 case AsLongDouble: 537 return ArgTypeResult::Invalid(); 538 case None: return Ctx.IntTy; 539 case AsChar: return Ctx.SignedCharTy; 540 case AsShort: return Ctx.ShortTy; 541 case AsLong: return Ctx.LongTy; 542 case AsLongLong: return Ctx.LongLongTy; 543 case AsIntMax: 544 // FIXME: Return unknown for now. 545 return ArgTypeResult(); 546 case AsSizeT: return Ctx.getSizeType(); 547 case AsPtrDiff: return Ctx.getPointerDiffType(); 548 } 549 550 if (CS.isUIntArg()) 551 switch (LM) { 552 case AsLongDouble: 553 return ArgTypeResult::Invalid(); 554 case None: return Ctx.UnsignedIntTy; 555 case AsChar: return Ctx.UnsignedCharTy; 556 case AsShort: return Ctx.UnsignedShortTy; 557 case AsLong: return Ctx.UnsignedLongTy; 558 case AsLongLong: return Ctx.UnsignedLongLongTy; 559 case AsIntMax: 560 // FIXME: Return unknown for now. 561 return ArgTypeResult(); 562 case AsSizeT: 563 // FIXME: How to get the corresponding unsigned 564 // version of size_t? 565 return ArgTypeResult(); 566 case AsPtrDiff: 567 // FIXME: How to get the corresponding unsigned 568 // version of ptrdiff_t? 569 return ArgTypeResult(); 570 } 571 572 if (CS.isDoubleArg()) { 573 if (LM == AsLongDouble) 574 return Ctx.LongDoubleTy; 575 return Ctx.DoubleTy; 576 } 577 578 switch (CS.getKind()) { 579 case ConversionSpecifier::CStrArg: 580 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 581 case ConversionSpecifier::UnicodeStrArg: 582 // FIXME: This appears to be Mac OS X specific. 583 return ArgTypeResult::WCStrTy; 584 case ConversionSpecifier::CArg: 585 return Ctx.WCharTy; 586 default: 587 break; 588 } 589 590 // FIXME: Handle other cases. 591 return ArgTypeResult(); 592} 593 594