PrintfFormatString.cpp revision 226633
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } else { 42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 43 analyze_format_string::PrecisionPos); 44 if (Amt.isInvalid()) 45 return true; 46 FS.setPrecision(Amt); 47 } 48 return false; 49} 50 51static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 52 const char *&Beg, 53 const char *E, 54 unsigned &argIndex, 55 bool FormatExtensions) { 56 57 using namespace clang::analyze_format_string; 58 using namespace clang::analyze_printf; 59 60 const char *I = Beg; 61 const char *Start = 0; 62 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 63 64 // Look for a '%' character that indicates the start of a format specifier. 65 for ( ; I != E ; ++I) { 66 char c = *I; 67 if (c == '\0') { 68 // Detect spurious null characters, which are likely errors. 69 H.HandleNullChar(I); 70 return true; 71 } 72 if (c == '%') { 73 Start = I++; // Record the start of the format specifier. 74 break; 75 } 76 } 77 78 // No format specifier found? 79 if (!Start) 80 return false; 81 82 if (I == E) { 83 // No more characters left? 84 H.HandleIncompleteSpecifier(Start, E - Start); 85 return true; 86 } 87 88 PrintfSpecifier FS; 89 if (ParseArgPosition(H, FS, Start, I, E)) 90 return true; 91 92 if (I == E) { 93 // No more characters left? 94 H.HandleIncompleteSpecifier(Start, E - Start); 95 return true; 96 } 97 98 // Look for flags (if any). 99 bool hasMore = true; 100 for ( ; I != E; ++I) { 101 switch (*I) { 102 default: hasMore = false; break; 103 case '\'': 104 // FIXME: POSIX specific. Always accept? 105 FS.setHasThousandsGrouping(I); 106 break; 107 case '-': FS.setIsLeftJustified(I); break; 108 case '+': FS.setHasPlusPrefix(I); break; 109 case ' ': FS.setHasSpacePrefix(I); break; 110 case '#': FS.setHasAlternativeForm(I); break; 111 case '0': FS.setHasLeadingZeros(I); break; 112 } 113 if (!hasMore) 114 break; 115 } 116 117 if (I == E) { 118 // No more characters left? 119 H.HandleIncompleteSpecifier(Start, E - Start); 120 return true; 121 } 122 123 // Look for the field width (if any). 124 if (ParseFieldWidth(H, FS, Start, I, E, 125 FS.usesPositionalArg() ? 0 : &argIndex)) 126 return true; 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 134 // Look for the precision (if any). 135 if (*I == '.') { 136 ++I; 137 if (I == E) { 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 if (ParsePrecision(H, FS, Start, I, E, 143 FS.usesPositionalArg() ? 0 : &argIndex)) 144 return true; 145 146 if (I == E) { 147 // No more characters left? 148 H.HandleIncompleteSpecifier(Start, E - Start); 149 return true; 150 } 151 } 152 153 // Look for the length modifier. 154 if (ParseLengthModifier(FS, I, E) && I == E) { 155 // No more characters left? 156 H.HandleIncompleteSpecifier(Start, E - Start); 157 return true; 158 } 159 160 if (*I == '\0') { 161 // Detect spurious null characters, which are likely errors. 162 H.HandleNullChar(I); 163 return true; 164 } 165 166 // Finally, look for the conversion specifier. 167 const char *conversionPosition = I++; 168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 169 switch (*conversionPosition) { 170 default: 171 break; 172 // C99: 7.19.6.1 (section 8). 173 case '%': k = ConversionSpecifier::PercentArg; break; 174 case 'A': k = ConversionSpecifier::AArg; break; 175 case 'E': k = ConversionSpecifier::EArg; break; 176 case 'F': k = ConversionSpecifier::FArg; break; 177 case 'G': k = ConversionSpecifier::GArg; break; 178 case 'X': k = ConversionSpecifier::XArg; break; 179 case 'a': k = ConversionSpecifier::aArg; break; 180 case 'c': k = ConversionSpecifier::cArg; break; 181 case 'd': k = ConversionSpecifier::dArg; break; 182 case 'e': k = ConversionSpecifier::eArg; break; 183 case 'f': k = ConversionSpecifier::fArg; break; 184 case 'g': k = ConversionSpecifier::gArg; break; 185 case 'i': k = ConversionSpecifier::iArg; break; 186 case 'n': k = ConversionSpecifier::nArg; break; 187 case 'o': k = ConversionSpecifier::oArg; break; 188 case 'p': k = ConversionSpecifier::pArg; break; 189 case 's': k = ConversionSpecifier::sArg; break; 190 case 'u': k = ConversionSpecifier::uArg; break; 191 case 'x': k = ConversionSpecifier::xArg; break; 192 // POSIX specific. 193 case 'C': k = ConversionSpecifier::CArg; break; 194 case 'S': k = ConversionSpecifier::SArg; break; 195 // Objective-C. 196 case '@': k = ConversionSpecifier::ObjCObjArg; break; 197 // Glibc specific. 198 case 'm': k = ConversionSpecifier::PrintErrno; break; 199 // FreeBSD format extensions 200 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */ 201 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break; 202 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break; 203 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */ 204 } 205 PrintfConversionSpecifier CS(conversionPosition, k); 206 FS.setConversionSpecifier(CS); 207 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 208 FS.setArgIndex(argIndex++); 209 // FreeBSD extension 210 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg) 211 argIndex++; 212 213 if (k == ConversionSpecifier::InvalidSpecifier) { 214 // Assume the conversion takes one argument. 215 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 216 } 217 return PrintfSpecifierResult(Start, FS); 218} 219 220bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 221 const char *I, 222 const char *E, 223 bool FormatExtensions) { 224 225 unsigned argIndex = 0; 226 227 // Keep looking for a format specifier until we have exhausted the string. 228 while (I != E) { 229 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 230 FormatExtensions); 231 // Did a fail-stop error of any kind occur when parsing the specifier? 232 // If so, don't do any more processing. 233 if (FSR.shouldStop()) 234 return true;; 235 // Did we exhaust the string or encounter an error that 236 // we can recover from? 237 if (!FSR.hasValue()) 238 continue; 239 // We have a format specifier. Pass it to the callback. 240 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 241 I - FSR.getStart())) 242 return true; 243 } 244 assert(I == E && "Format string not exhausted"); 245 return false; 246} 247 248//===----------------------------------------------------------------------===// 249// Methods on ConversionSpecifier. 250//===----------------------------------------------------------------------===// 251const char *ConversionSpecifier::toString() const { 252 switch (kind) { 253 case dArg: return "d"; 254 case iArg: return "i"; 255 case oArg: return "o"; 256 case uArg: return "u"; 257 case xArg: return "x"; 258 case XArg: return "X"; 259 case fArg: return "f"; 260 case FArg: return "F"; 261 case eArg: return "e"; 262 case EArg: return "E"; 263 case gArg: return "g"; 264 case GArg: return "G"; 265 case aArg: return "a"; 266 case AArg: return "A"; 267 case cArg: return "c"; 268 case sArg: return "s"; 269 case pArg: return "p"; 270 case nArg: return "n"; 271 case PercentArg: return "%"; 272 case ScanListArg: return "["; 273 case InvalidSpecifier: return NULL; 274 275 // MacOS X unicode extensions. 276 case CArg: return "C"; 277 case SArg: return "S"; 278 279 // Objective-C specific specifiers. 280 case ObjCObjArg: return "@"; 281 282 // FreeBSD specific specifiers. 283 case bArg: return "b"; 284 case DArg: return "D"; 285 case rArg: return "r"; 286 287 // GlibC specific specifiers. 288 case PrintErrno: return "m"; 289 } 290 return NULL; 291} 292 293//===----------------------------------------------------------------------===// 294// Methods on PrintfSpecifier. 295//===----------------------------------------------------------------------===// 296 297ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 298 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 299 300 if (!CS.consumesDataArgument()) 301 return ArgTypeResult::Invalid(); 302 303 if (CS.getKind() == ConversionSpecifier::cArg) 304 switch (LM.getKind()) { 305 case LengthModifier::None: return Ctx.IntTy; 306 case LengthModifier::AsLong: return ArgTypeResult::WIntTy; 307 default: 308 return ArgTypeResult::Invalid(); 309 } 310 311 if (CS.isIntArg()) 312 switch (LM.getKind()) { 313 case LengthModifier::AsLongDouble: 314 return ArgTypeResult::Invalid(); 315 case LengthModifier::None: return Ctx.IntTy; 316 case LengthModifier::AsChar: return Ctx.SignedCharTy; 317 case LengthModifier::AsShort: return Ctx.ShortTy; 318 case LengthModifier::AsLong: return Ctx.LongTy; 319 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 320 case LengthModifier::AsIntMax: 321 // FIXME: Return unknown for now. 322 return ArgTypeResult(); 323 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 324 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 325 } 326 327 if (CS.isUIntArg()) 328 switch (LM.getKind()) { 329 case LengthModifier::AsLongDouble: 330 return ArgTypeResult::Invalid(); 331 case LengthModifier::None: return Ctx.UnsignedIntTy; 332 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 333 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 334 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 335 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 336 case LengthModifier::AsIntMax: 337 // FIXME: Return unknown for now. 338 return ArgTypeResult(); 339 case LengthModifier::AsSizeT: 340 // FIXME: How to get the corresponding unsigned 341 // version of size_t? 342 return ArgTypeResult(); 343 case LengthModifier::AsPtrDiff: 344 // FIXME: How to get the corresponding unsigned 345 // version of ptrdiff_t? 346 return ArgTypeResult(); 347 } 348 349 if (CS.isDoubleArg()) { 350 if (LM.getKind() == LengthModifier::AsLongDouble) 351 return Ctx.LongDoubleTy; 352 return Ctx.DoubleTy; 353 } 354 355 switch (CS.getKind()) { 356 case ConversionSpecifier::sArg: 357 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 358 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 359 case ConversionSpecifier::SArg: 360 // FIXME: This appears to be Mac OS X specific. 361 return ArgTypeResult::WCStrTy; 362 case ConversionSpecifier::CArg: 363 return Ctx.WCharTy; 364 case ConversionSpecifier::pArg: 365 return ArgTypeResult::CPointerTy; 366 default: 367 break; 368 } 369 370 // FIXME: Handle other cases. 371 return ArgTypeResult(); 372} 373 374bool PrintfSpecifier::fixType(QualType QT) { 375 // Handle strings first (char *, wchar_t *) 376 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 377 CS.setKind(ConversionSpecifier::sArg); 378 379 // Disable irrelevant flags 380 HasAlternativeForm = 0; 381 HasLeadingZeroes = 0; 382 383 // Set the long length modifier for wide characters 384 if (QT->getPointeeType()->isWideCharType()) 385 LM.setKind(LengthModifier::AsWideChar); 386 387 return true; 388 } 389 390 // We can only work with builtin types. 391 if (!QT->isBuiltinType()) 392 return false; 393 394 // Everything else should be a base type 395 const BuiltinType *BT = QT->getAs<BuiltinType>(); 396 397 // Set length modifier 398 switch (BT->getKind()) { 399 case BuiltinType::Bool: 400 case BuiltinType::WChar_U: 401 case BuiltinType::WChar_S: 402 case BuiltinType::Char16: 403 case BuiltinType::Char32: 404 case BuiltinType::UInt128: 405 case BuiltinType::Int128: 406 case BuiltinType::Half: 407 // Integral types which are non-trivial to correct. 408 return false; 409 410 case BuiltinType::Void: 411 case BuiltinType::NullPtr: 412 case BuiltinType::ObjCId: 413 case BuiltinType::ObjCClass: 414 case BuiltinType::ObjCSel: 415 case BuiltinType::Dependent: 416 case BuiltinType::Overload: 417 case BuiltinType::BoundMember: 418 case BuiltinType::UnknownAny: 419 // Misc other stuff which doesn't make sense here. 420 return false; 421 422 case BuiltinType::UInt: 423 case BuiltinType::Int: 424 case BuiltinType::Float: 425 case BuiltinType::Double: 426 LM.setKind(LengthModifier::None); 427 break; 428 429 case BuiltinType::Char_U: 430 case BuiltinType::UChar: 431 case BuiltinType::Char_S: 432 case BuiltinType::SChar: 433 LM.setKind(LengthModifier::AsChar); 434 break; 435 436 case BuiltinType::Short: 437 case BuiltinType::UShort: 438 LM.setKind(LengthModifier::AsShort); 439 break; 440 441 case BuiltinType::Long: 442 case BuiltinType::ULong: 443 LM.setKind(LengthModifier::AsLong); 444 break; 445 446 case BuiltinType::LongLong: 447 case BuiltinType::ULongLong: 448 LM.setKind(LengthModifier::AsLongLong); 449 break; 450 451 case BuiltinType::LongDouble: 452 LM.setKind(LengthModifier::AsLongDouble); 453 break; 454 } 455 456 // Set conversion specifier and disable any flags which do not apply to it. 457 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 458 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 459 CS.setKind(ConversionSpecifier::cArg); 460 LM.setKind(LengthModifier::None); 461 Precision.setHowSpecified(OptionalAmount::NotSpecified); 462 HasAlternativeForm = 0; 463 HasLeadingZeroes = 0; 464 HasPlusPrefix = 0; 465 } 466 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 467 else if (QT->isRealFloatingType()) { 468 CS.setKind(ConversionSpecifier::fArg); 469 } 470 else if (QT->isSignedIntegerType()) { 471 CS.setKind(ConversionSpecifier::dArg); 472 HasAlternativeForm = 0; 473 } 474 else if (QT->isUnsignedIntegerType()) { 475 // Preserve the original formatting, e.g. 'X', 'o'. 476 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg()) 477 CS.setKind(ConversionSpecifier::uArg); 478 HasAlternativeForm = 0; 479 HasPlusPrefix = 0; 480 } else { 481 llvm_unreachable("Unexpected type"); 482 } 483 484 return true; 485} 486 487void PrintfSpecifier::toString(raw_ostream &os) const { 488 // Whilst some features have no defined order, we are using the order 489 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 490 os << "%"; 491 492 // Positional args 493 if (usesPositionalArg()) { 494 os << getPositionalArgIndex() << "$"; 495 } 496 497 // Conversion flags 498 if (IsLeftJustified) os << "-"; 499 if (HasPlusPrefix) os << "+"; 500 if (HasSpacePrefix) os << " "; 501 if (HasAlternativeForm) os << "#"; 502 if (HasLeadingZeroes) os << "0"; 503 504 // Minimum field width 505 FieldWidth.toString(os); 506 // Precision 507 Precision.toString(os); 508 // Length modifier 509 os << LM.toString(); 510 // Conversion specifier 511 os << CS.toString(); 512} 513 514bool PrintfSpecifier::hasValidPlusPrefix() const { 515 if (!HasPlusPrefix) 516 return true; 517 518 // The plus prefix only makes sense for signed conversions 519 switch (CS.getKind()) { 520 case ConversionSpecifier::dArg: 521 case ConversionSpecifier::iArg: 522 case ConversionSpecifier::fArg: 523 case ConversionSpecifier::FArg: 524 case ConversionSpecifier::eArg: 525 case ConversionSpecifier::EArg: 526 case ConversionSpecifier::gArg: 527 case ConversionSpecifier::GArg: 528 case ConversionSpecifier::aArg: 529 case ConversionSpecifier::AArg: 530 case ConversionSpecifier::rArg: 531 return true; 532 533 default: 534 return false; 535 } 536} 537 538bool PrintfSpecifier::hasValidAlternativeForm() const { 539 if (!HasAlternativeForm) 540 return true; 541 542 // Alternate form flag only valid with the oxXaAeEfFgG conversions 543 switch (CS.getKind()) { 544 case ConversionSpecifier::oArg: 545 case ConversionSpecifier::xArg: 546 case ConversionSpecifier::XArg: 547 case ConversionSpecifier::aArg: 548 case ConversionSpecifier::AArg: 549 case ConversionSpecifier::eArg: 550 case ConversionSpecifier::EArg: 551 case ConversionSpecifier::fArg: 552 case ConversionSpecifier::FArg: 553 case ConversionSpecifier::gArg: 554 case ConversionSpecifier::GArg: 555 case ConversionSpecifier::rArg: 556 return true; 557 558 default: 559 return false; 560 } 561} 562 563bool PrintfSpecifier::hasValidLeadingZeros() const { 564 if (!HasLeadingZeroes) 565 return true; 566 567 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 568 switch (CS.getKind()) { 569 case ConversionSpecifier::dArg: 570 case ConversionSpecifier::iArg: 571 case ConversionSpecifier::oArg: 572 case ConversionSpecifier::uArg: 573 case ConversionSpecifier::xArg: 574 case ConversionSpecifier::XArg: 575 case ConversionSpecifier::aArg: 576 case ConversionSpecifier::AArg: 577 case ConversionSpecifier::eArg: 578 case ConversionSpecifier::EArg: 579 case ConversionSpecifier::fArg: 580 case ConversionSpecifier::FArg: 581 case ConversionSpecifier::gArg: 582 case ConversionSpecifier::GArg: 583 return true; 584 585 default: 586 return false; 587 } 588} 589 590bool PrintfSpecifier::hasValidSpacePrefix() const { 591 if (!HasSpacePrefix) 592 return true; 593 594 // The space prefix only makes sense for signed conversions 595 switch (CS.getKind()) { 596 case ConversionSpecifier::dArg: 597 case ConversionSpecifier::iArg: 598 case ConversionSpecifier::fArg: 599 case ConversionSpecifier::FArg: 600 case ConversionSpecifier::eArg: 601 case ConversionSpecifier::EArg: 602 case ConversionSpecifier::gArg: 603 case ConversionSpecifier::GArg: 604 case ConversionSpecifier::aArg: 605 case ConversionSpecifier::AArg: 606 return true; 607 608 default: 609 return false; 610 } 611} 612 613bool PrintfSpecifier::hasValidLeftJustified() const { 614 if (!IsLeftJustified) 615 return true; 616 617 // The left justified flag is valid for all conversions except n 618 switch (CS.getKind()) { 619 case ConversionSpecifier::nArg: 620 return false; 621 622 default: 623 return true; 624 } 625} 626 627bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 628 if (!HasThousandsGrouping) 629 return true; 630 631 switch (CS.getKind()) { 632 case ConversionSpecifier::dArg: 633 case ConversionSpecifier::iArg: 634 case ConversionSpecifier::uArg: 635 case ConversionSpecifier::fArg: 636 case ConversionSpecifier::FArg: 637 case ConversionSpecifier::gArg: 638 case ConversionSpecifier::GArg: 639 return true; 640 default: 641 return false; 642 } 643} 644 645bool PrintfSpecifier::hasValidPrecision() const { 646 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 647 return true; 648 649 // Precision is only valid with the diouxXaAeEfFgGs conversions 650 switch (CS.getKind()) { 651 case ConversionSpecifier::dArg: 652 case ConversionSpecifier::iArg: 653 case ConversionSpecifier::oArg: 654 case ConversionSpecifier::uArg: 655 case ConversionSpecifier::xArg: 656 case ConversionSpecifier::XArg: 657 case ConversionSpecifier::aArg: 658 case ConversionSpecifier::AArg: 659 case ConversionSpecifier::eArg: 660 case ConversionSpecifier::EArg: 661 case ConversionSpecifier::fArg: 662 case ConversionSpecifier::FArg: 663 case ConversionSpecifier::gArg: 664 case ConversionSpecifier::GArg: 665 case ConversionSpecifier::sArg: 666 return true; 667 668 default: 669 return false; 670 } 671} 672bool PrintfSpecifier::hasValidFieldWidth() const { 673 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 674 return true; 675 676 // The field width is valid for all conversions except n 677 switch (CS.getKind()) { 678 case ConversionSpecifier::nArg: 679 return false; 680 681 default: 682 return true; 683 } 684} 685