PrintfFormatString.cpp revision 218893
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } 42 else { 43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 44 analyze_format_string::PrecisionPos); 45 if (Amt.isInvalid()) 46 return true; 47 FS.setPrecision(Amt); 48 } 49 return false; 50} 51 52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 53 const char *&Beg, 54 const char *E, 55 unsigned &argIndex, 56 bool FormatExtensions) { 57 58 using namespace clang::analyze_format_string; 59 using namespace clang::analyze_printf; 60 61 const char *I = Beg; 62 const char *Start = 0; 63 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 64 65 // Look for a '%' character that indicates the start of a format specifier. 66 for ( ; I != E ; ++I) { 67 char c = *I; 68 if (c == '\0') { 69 // Detect spurious null characters, which are likely errors. 70 H.HandleNullChar(I); 71 return true; 72 } 73 if (c == '%') { 74 Start = I++; // Record the start of the format specifier. 75 break; 76 } 77 } 78 79 // No format specifier found? 80 if (!Start) 81 return false; 82 83 if (I == E) { 84 // No more characters left? 85 H.HandleIncompleteSpecifier(Start, E - Start); 86 return true; 87 } 88 89 PrintfSpecifier FS; 90 if (ParseArgPosition(H, FS, Start, I, E)) 91 return true; 92 93 if (I == E) { 94 // No more characters left? 95 H.HandleIncompleteSpecifier(Start, E - Start); 96 return true; 97 } 98 99 // Look for flags (if any). 100 bool hasMore = true; 101 for ( ; I != E; ++I) { 102 switch (*I) { 103 default: hasMore = false; break; 104 case '\'': 105 // FIXME: POSIX specific. Always accept? 106 FS.setHasThousandsGrouping(I); 107 break; 108 case '-': FS.setIsLeftJustified(I); break; 109 case '+': FS.setHasPlusPrefix(I); break; 110 case ' ': FS.setHasSpacePrefix(I); break; 111 case '#': FS.setHasAlternativeForm(I); break; 112 case '0': FS.setHasLeadingZeros(I); break; 113 } 114 if (!hasMore) 115 break; 116 } 117 118 if (I == E) { 119 // No more characters left? 120 H.HandleIncompleteSpecifier(Start, E - Start); 121 return true; 122 } 123 124 // Look for the field width (if any). 125 if (ParseFieldWidth(H, FS, Start, I, E, 126 FS.usesPositionalArg() ? 0 : &argIndex)) 127 return true; 128 129 if (I == E) { 130 // No more characters left? 131 H.HandleIncompleteSpecifier(Start, E - Start); 132 return true; 133 } 134 135 // Look for the precision (if any). 136 if (*I == '.') { 137 ++I; 138 if (I == E) { 139 H.HandleIncompleteSpecifier(Start, E - Start); 140 return true; 141 } 142 143 if (ParsePrecision(H, FS, Start, I, E, 144 FS.usesPositionalArg() ? 0 : &argIndex)) 145 return true; 146 147 if (I == E) { 148 // No more characters left? 149 H.HandleIncompleteSpecifier(Start, E - Start); 150 return true; 151 } 152 } 153 154 // Look for the length modifier. 155 if (ParseLengthModifier(FS, I, E) && I == E) { 156 // No more characters left? 157 H.HandleIncompleteSpecifier(Start, E - Start); 158 return true; 159 } 160 161 if (*I == '\0') { 162 // Detect spurious null characters, which are likely errors. 163 H.HandleNullChar(I); 164 return true; 165 } 166 167 // Finally, look for the conversion specifier. 168 const char *conversionPosition = I++; 169 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 170 switch (*conversionPosition) { 171 default: 172 break; 173 // C99: 7.19.6.1 (section 8). 174 case '%': k = ConversionSpecifier::PercentArg; break; 175 case 'A': k = ConversionSpecifier::AArg; break; 176 case 'E': k = ConversionSpecifier::EArg; break; 177 case 'F': k = ConversionSpecifier::FArg; break; 178 case 'G': k = ConversionSpecifier::GArg; break; 179 case 'X': k = ConversionSpecifier::XArg; break; 180 case 'a': k = ConversionSpecifier::aArg; break; 181 case 'c': k = ConversionSpecifier::cArg; break; 182 case 'd': k = ConversionSpecifier::dArg; break; 183 case 'e': k = ConversionSpecifier::eArg; break; 184 case 'f': k = ConversionSpecifier::fArg; break; 185 case 'g': k = ConversionSpecifier::gArg; break; 186 case 'i': k = ConversionSpecifier::iArg; break; 187 case 'n': k = ConversionSpecifier::nArg; break; 188 case 'o': k = ConversionSpecifier::oArg; break; 189 case 'p': k = ConversionSpecifier::pArg; break; 190 case 's': k = ConversionSpecifier::sArg; break; 191 case 'u': k = ConversionSpecifier::uArg; break; 192 case 'x': k = ConversionSpecifier::xArg; break; 193 // POSIX specific. 194 case 'C': k = ConversionSpecifier::CArg; break; 195 case 'S': k = ConversionSpecifier::SArg; break; 196 // Objective-C. 197 case '@': k = ConversionSpecifier::ObjCObjArg; break; 198 // Glibc specific. 199 case 'm': k = ConversionSpecifier::PrintErrno; break; 200 // FreeBSD format extensions 201 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */ 202 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break; 203 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break; 204 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */ 205 } 206 PrintfConversionSpecifier CS(conversionPosition, k); 207 FS.setConversionSpecifier(CS); 208 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 209 FS.setArgIndex(argIndex++); 210 // FreeBSD extension 211 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg) 212 argIndex++; 213 214 if (k == ConversionSpecifier::InvalidSpecifier) { 215 // Assume the conversion takes one argument. 216 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 217 } 218 return PrintfSpecifierResult(Start, FS); 219} 220 221bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 222 const char *I, 223 const char *E, 224 bool FormatExtensions) { 225 226 unsigned argIndex = 0; 227 228 // Keep looking for a format specifier until we have exhausted the string. 229 while (I != E) { 230 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 231 FormatExtensions); 232 // Did a fail-stop error of any kind occur when parsing the specifier? 233 // If so, don't do any more processing. 234 if (FSR.shouldStop()) 235 return true;; 236 // Did we exhaust the string or encounter an error that 237 // we can recover from? 238 if (!FSR.hasValue()) 239 continue; 240 // We have a format specifier. Pass it to the callback. 241 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 242 I - FSR.getStart())) 243 return true; 244 } 245 assert(I == E && "Format string not exhausted"); 246 return false; 247} 248 249//===----------------------------------------------------------------------===// 250// Methods on ConversionSpecifier. 251//===----------------------------------------------------------------------===// 252const char *ConversionSpecifier::toString() const { 253 switch (kind) { 254 case dArg: return "d"; 255 case iArg: return "i"; 256 case oArg: return "o"; 257 case uArg: return "u"; 258 case xArg: return "x"; 259 case XArg: return "X"; 260 case fArg: return "f"; 261 case FArg: return "F"; 262 case eArg: return "e"; 263 case EArg: return "E"; 264 case gArg: return "g"; 265 case GArg: return "G"; 266 case aArg: return "a"; 267 case AArg: return "A"; 268 case cArg: return "c"; 269 case sArg: return "s"; 270 case pArg: return "p"; 271 case nArg: return "n"; 272 case PercentArg: return "%"; 273 case ScanListArg: return "["; 274 case InvalidSpecifier: return NULL; 275 276 // MacOS X unicode extensions. 277 case CArg: return "C"; 278 case SArg: return "S"; 279 280 // Objective-C specific specifiers. 281 case ObjCObjArg: return "@"; 282 283 // FreeBSD specific specifiers. 284 case bArg: return "b"; 285 case DArg: return "D"; 286 case rArg: return "r"; 287 288 // GlibC specific specifiers. 289 case PrintErrno: return "m"; 290 } 291 return NULL; 292} 293 294//===----------------------------------------------------------------------===// 295// Methods on PrintfSpecifier. 296//===----------------------------------------------------------------------===// 297 298ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 299 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 300 301 if (!CS.consumesDataArgument()) 302 return ArgTypeResult::Invalid(); 303 304 if (CS.getKind() == ConversionSpecifier::cArg) 305 switch (LM.getKind()) { 306 case LengthModifier::None: return Ctx.IntTy; 307 case LengthModifier::AsLong: return ArgTypeResult::WIntTy; 308 default: 309 return ArgTypeResult::Invalid(); 310 } 311 312 if (CS.isIntArg()) 313 switch (LM.getKind()) { 314 case LengthModifier::AsLongDouble: 315 return ArgTypeResult::Invalid(); 316 case LengthModifier::None: return Ctx.IntTy; 317 case LengthModifier::AsChar: return Ctx.SignedCharTy; 318 case LengthModifier::AsShort: return Ctx.ShortTy; 319 case LengthModifier::AsLong: return Ctx.LongTy; 320 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 321 case LengthModifier::AsIntMax: 322 // FIXME: Return unknown for now. 323 return ArgTypeResult(); 324 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 325 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 326 } 327 328 if (CS.isUIntArg()) 329 switch (LM.getKind()) { 330 case LengthModifier::AsLongDouble: 331 return ArgTypeResult::Invalid(); 332 case LengthModifier::None: return Ctx.UnsignedIntTy; 333 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 334 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 335 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 336 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 337 case LengthModifier::AsIntMax: 338 // FIXME: Return unknown for now. 339 return ArgTypeResult(); 340 case LengthModifier::AsSizeT: 341 // FIXME: How to get the corresponding unsigned 342 // version of size_t? 343 return ArgTypeResult(); 344 case LengthModifier::AsPtrDiff: 345 // FIXME: How to get the corresponding unsigned 346 // version of ptrdiff_t? 347 return ArgTypeResult(); 348 } 349 350 if (CS.isDoubleArg()) { 351 if (LM.getKind() == LengthModifier::AsLongDouble) 352 return Ctx.LongDoubleTy; 353 return Ctx.DoubleTy; 354 } 355 356 switch (CS.getKind()) { 357 case ConversionSpecifier::sArg: 358 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 359 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 360 case ConversionSpecifier::SArg: 361 // FIXME: This appears to be Mac OS X specific. 362 return ArgTypeResult::WCStrTy; 363 case ConversionSpecifier::CArg: 364 return Ctx.WCharTy; 365 case ConversionSpecifier::pArg: 366 return ArgTypeResult::CPointerTy; 367 default: 368 break; 369 } 370 371 // FIXME: Handle other cases. 372 return ArgTypeResult(); 373} 374 375bool PrintfSpecifier::fixType(QualType QT) { 376 // Handle strings first (char *, wchar_t *) 377 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 378 CS.setKind(ConversionSpecifier::sArg); 379 380 // Disable irrelevant flags 381 HasAlternativeForm = 0; 382 HasLeadingZeroes = 0; 383 384 // Set the long length modifier for wide characters 385 if (QT->getPointeeType()->isWideCharType()) 386 LM.setKind(LengthModifier::AsWideChar); 387 388 return true; 389 } 390 391 // We can only work with builtin types. 392 if (!QT->isBuiltinType()) 393 return false; 394 395 // Everything else should be a base type 396 const BuiltinType *BT = QT->getAs<BuiltinType>(); 397 398 // Set length modifier 399 switch (BT->getKind()) { 400 default: 401 // The rest of the conversions are either optional or for non-builtin types 402 LM.setKind(LengthModifier::None); 403 break; 404 405 case BuiltinType::Char_U: 406 case BuiltinType::UChar: 407 case BuiltinType::Char_S: 408 case BuiltinType::SChar: 409 LM.setKind(LengthModifier::AsChar); 410 break; 411 412 case BuiltinType::Short: 413 case BuiltinType::UShort: 414 LM.setKind(LengthModifier::AsShort); 415 break; 416 417 case BuiltinType::WChar_S: 418 case BuiltinType::WChar_U: 419 case BuiltinType::Long: 420 case BuiltinType::ULong: 421 LM.setKind(LengthModifier::AsLong); 422 break; 423 424 case BuiltinType::LongLong: 425 case BuiltinType::ULongLong: 426 LM.setKind(LengthModifier::AsLongLong); 427 break; 428 429 case BuiltinType::LongDouble: 430 LM.setKind(LengthModifier::AsLongDouble); 431 break; 432 } 433 434 // Set conversion specifier and disable any flags which do not apply to it. 435 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 436 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 437 CS.setKind(ConversionSpecifier::cArg); 438 LM.setKind(LengthModifier::None); 439 Precision.setHowSpecified(OptionalAmount::NotSpecified); 440 HasAlternativeForm = 0; 441 HasLeadingZeroes = 0; 442 HasPlusPrefix = 0; 443 } 444 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 445 else if (QT->isRealFloatingType()) { 446 CS.setKind(ConversionSpecifier::fArg); 447 } 448 else if (QT->isPointerType()) { 449 CS.setKind(ConversionSpecifier::pArg); 450 Precision.setHowSpecified(OptionalAmount::NotSpecified); 451 HasAlternativeForm = 0; 452 HasLeadingZeroes = 0; 453 HasPlusPrefix = 0; 454 } 455 else if (QT->isSignedIntegerType()) { 456 CS.setKind(ConversionSpecifier::dArg); 457 HasAlternativeForm = 0; 458 } 459 else if (QT->isUnsignedIntegerType()) { 460 CS.setKind(ConversionSpecifier::uArg); 461 HasAlternativeForm = 0; 462 HasPlusPrefix = 0; 463 } 464 else { 465 return false; 466 } 467 468 return true; 469} 470 471void PrintfSpecifier::toString(llvm::raw_ostream &os) const { 472 // Whilst some features have no defined order, we are using the order 473 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 474 os << "%"; 475 476 // Positional args 477 if (usesPositionalArg()) { 478 os << getPositionalArgIndex() << "$"; 479 } 480 481 // Conversion flags 482 if (IsLeftJustified) os << "-"; 483 if (HasPlusPrefix) os << "+"; 484 if (HasSpacePrefix) os << " "; 485 if (HasAlternativeForm) os << "#"; 486 if (HasLeadingZeroes) os << "0"; 487 488 // Minimum field width 489 FieldWidth.toString(os); 490 // Precision 491 Precision.toString(os); 492 // Length modifier 493 os << LM.toString(); 494 // Conversion specifier 495 os << CS.toString(); 496} 497 498bool PrintfSpecifier::hasValidPlusPrefix() const { 499 if (!HasPlusPrefix) 500 return true; 501 502 // The plus prefix only makes sense for signed conversions 503 switch (CS.getKind()) { 504 case ConversionSpecifier::dArg: 505 case ConversionSpecifier::iArg: 506 case ConversionSpecifier::fArg: 507 case ConversionSpecifier::FArg: 508 case ConversionSpecifier::eArg: 509 case ConversionSpecifier::EArg: 510 case ConversionSpecifier::gArg: 511 case ConversionSpecifier::GArg: 512 case ConversionSpecifier::aArg: 513 case ConversionSpecifier::AArg: 514 case ConversionSpecifier::rArg: 515 return true; 516 517 default: 518 return false; 519 } 520} 521 522bool PrintfSpecifier::hasValidAlternativeForm() const { 523 if (!HasAlternativeForm) 524 return true; 525 526 // Alternate form flag only valid with the oxXaAeEfFgG conversions 527 switch (CS.getKind()) { 528 case ConversionSpecifier::oArg: 529 case ConversionSpecifier::xArg: 530 case ConversionSpecifier::XArg: 531 case ConversionSpecifier::aArg: 532 case ConversionSpecifier::AArg: 533 case ConversionSpecifier::eArg: 534 case ConversionSpecifier::EArg: 535 case ConversionSpecifier::fArg: 536 case ConversionSpecifier::FArg: 537 case ConversionSpecifier::gArg: 538 case ConversionSpecifier::GArg: 539 case ConversionSpecifier::rArg: 540 return true; 541 542 default: 543 return false; 544 } 545} 546 547bool PrintfSpecifier::hasValidLeadingZeros() const { 548 if (!HasLeadingZeroes) 549 return true; 550 551 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 552 switch (CS.getKind()) { 553 case ConversionSpecifier::dArg: 554 case ConversionSpecifier::iArg: 555 case ConversionSpecifier::oArg: 556 case ConversionSpecifier::uArg: 557 case ConversionSpecifier::xArg: 558 case ConversionSpecifier::XArg: 559 case ConversionSpecifier::aArg: 560 case ConversionSpecifier::AArg: 561 case ConversionSpecifier::eArg: 562 case ConversionSpecifier::EArg: 563 case ConversionSpecifier::fArg: 564 case ConversionSpecifier::FArg: 565 case ConversionSpecifier::gArg: 566 case ConversionSpecifier::GArg: 567 return true; 568 569 default: 570 return false; 571 } 572} 573 574bool PrintfSpecifier::hasValidSpacePrefix() const { 575 if (!HasSpacePrefix) 576 return true; 577 578 // The space prefix only makes sense for signed conversions 579 switch (CS.getKind()) { 580 case ConversionSpecifier::dArg: 581 case ConversionSpecifier::iArg: 582 case ConversionSpecifier::fArg: 583 case ConversionSpecifier::FArg: 584 case ConversionSpecifier::eArg: 585 case ConversionSpecifier::EArg: 586 case ConversionSpecifier::gArg: 587 case ConversionSpecifier::GArg: 588 case ConversionSpecifier::aArg: 589 case ConversionSpecifier::AArg: 590 return true; 591 592 default: 593 return false; 594 } 595} 596 597bool PrintfSpecifier::hasValidLeftJustified() const { 598 if (!IsLeftJustified) 599 return true; 600 601 // The left justified flag is valid for all conversions except n 602 switch (CS.getKind()) { 603 case ConversionSpecifier::nArg: 604 return false; 605 606 default: 607 return true; 608 } 609} 610 611bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 612 if (!HasThousandsGrouping) 613 return true; 614 615 switch (CS.getKind()) { 616 case ConversionSpecifier::dArg: 617 case ConversionSpecifier::iArg: 618 case ConversionSpecifier::uArg: 619 case ConversionSpecifier::fArg: 620 case ConversionSpecifier::FArg: 621 case ConversionSpecifier::gArg: 622 case ConversionSpecifier::GArg: 623 return true; 624 default: 625 return false; 626 } 627} 628 629bool PrintfSpecifier::hasValidPrecision() const { 630 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 631 return true; 632 633 // Precision is only valid with the diouxXaAeEfFgGs conversions 634 switch (CS.getKind()) { 635 case ConversionSpecifier::dArg: 636 case ConversionSpecifier::iArg: 637 case ConversionSpecifier::oArg: 638 case ConversionSpecifier::uArg: 639 case ConversionSpecifier::xArg: 640 case ConversionSpecifier::XArg: 641 case ConversionSpecifier::aArg: 642 case ConversionSpecifier::AArg: 643 case ConversionSpecifier::eArg: 644 case ConversionSpecifier::EArg: 645 case ConversionSpecifier::fArg: 646 case ConversionSpecifier::FArg: 647 case ConversionSpecifier::gArg: 648 case ConversionSpecifier::GArg: 649 case ConversionSpecifier::sArg: 650 return true; 651 652 default: 653 return false; 654 } 655} 656bool PrintfSpecifier::hasValidFieldWidth() const { 657 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 658 return true; 659 660 // The field width is valid for all conversions except n 661 switch (CS.getKind()) { 662 case ConversionSpecifier::nArg: 663 return false; 664 665 default: 666 return true; 667 } 668} 669