PrintfFormatString.cpp revision 212904
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } 42 else { 43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 44 analyze_format_string::PrecisionPos); 45 if (Amt.isInvalid()) 46 return true; 47 FS.setPrecision(Amt); 48 } 49 return false; 50} 51 52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 53 const char *&Beg, 54 const char *E, 55 unsigned &argIndex, 56 bool FormatExtensions) { 57 58 using namespace clang::analyze_format_string; 59 using namespace clang::analyze_printf; 60 61 const char *I = Beg; 62 const char *Start = 0; 63 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 64 65 // Look for a '%' character that indicates the start of a format specifier. 66 for ( ; I != E ; ++I) { 67 char c = *I; 68 if (c == '\0') { 69 // Detect spurious null characters, which are likely errors. 70 H.HandleNullChar(I); 71 return true; 72 } 73 if (c == '%') { 74 Start = I++; // Record the start of the format specifier. 75 break; 76 } 77 } 78 79 // No format specifier found? 80 if (!Start) 81 return false; 82 83 if (I == E) { 84 // No more characters left? 85 H.HandleIncompleteSpecifier(Start, E - Start); 86 return true; 87 } 88 89 PrintfSpecifier FS; 90 if (ParseArgPosition(H, FS, Start, I, E)) 91 return true; 92 93 if (I == E) { 94 // No more characters left? 95 H.HandleIncompleteSpecifier(Start, E - Start); 96 return true; 97 } 98 99 // Look for flags (if any). 100 bool hasMore = true; 101 for ( ; I != E; ++I) { 102 switch (*I) { 103 default: hasMore = false; break; 104 case '-': FS.setIsLeftJustified(I); break; 105 case '+': FS.setHasPlusPrefix(I); break; 106 case ' ': FS.setHasSpacePrefix(I); break; 107 case '#': FS.setHasAlternativeForm(I); break; 108 case '0': FS.setHasLeadingZeros(I); break; 109 } 110 if (!hasMore) 111 break; 112 } 113 114 if (I == E) { 115 // No more characters left? 116 H.HandleIncompleteSpecifier(Start, E - Start); 117 return true; 118 } 119 120 // Look for the field width (if any). 121 if (ParseFieldWidth(H, FS, Start, I, E, 122 FS.usesPositionalArg() ? 0 : &argIndex)) 123 return true; 124 125 if (I == E) { 126 // No more characters left? 127 H.HandleIncompleteSpecifier(Start, E - Start); 128 return true; 129 } 130 131 // Look for the precision (if any). 132 if (*I == '.') { 133 ++I; 134 if (I == E) { 135 H.HandleIncompleteSpecifier(Start, E - Start); 136 return true; 137 } 138 139 if (ParsePrecision(H, FS, Start, I, E, 140 FS.usesPositionalArg() ? 0 : &argIndex)) 141 return true; 142 143 if (I == E) { 144 // No more characters left? 145 H.HandleIncompleteSpecifier(Start, E - Start); 146 return true; 147 } 148 } 149 150 // Look for the length modifier. 151 if (ParseLengthModifier(FS, I, E) && I == E) { 152 // No more characters left? 153 H.HandleIncompleteSpecifier(Start, E - Start); 154 return true; 155 } 156 157 if (*I == '\0') { 158 // Detect spurious null characters, which are likely errors. 159 H.HandleNullChar(I); 160 return true; 161 } 162 163 // Finally, look for the conversion specifier. 164 const char *conversionPosition = I++; 165 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 166 switch (*conversionPosition) { 167 default: 168 break; 169 // C99: 7.19.6.1 (section 8). 170 case '%': k = ConversionSpecifier::PercentArg; break; 171 case 'A': k = ConversionSpecifier::AArg; break; 172 case 'E': k = ConversionSpecifier::EArg; break; 173 case 'F': k = ConversionSpecifier::FArg; break; 174 case 'G': k = ConversionSpecifier::GArg; break; 175 case 'X': k = ConversionSpecifier::XArg; break; 176 case 'a': k = ConversionSpecifier::aArg; break; 177 case 'c': k = ConversionSpecifier::cArg; break; 178 case 'd': k = ConversionSpecifier::dArg; break; 179 case 'e': k = ConversionSpecifier::eArg; break; 180 case 'f': k = ConversionSpecifier::fArg; break; 181 case 'g': k = ConversionSpecifier::gArg; break; 182 case 'i': k = ConversionSpecifier::iArg; break; 183 case 'n': k = ConversionSpecifier::nArg; break; 184 case 'o': k = ConversionSpecifier::oArg; break; 185 case 'p': k = ConversionSpecifier::pArg; break; 186 case 's': k = ConversionSpecifier::sArg; break; 187 case 'u': k = ConversionSpecifier::uArg; break; 188 case 'x': k = ConversionSpecifier::xArg; break; 189 // Mac OS X (unicode) specific 190 case 'C': k = ConversionSpecifier::CArg; break; 191 case 'S': k = ConversionSpecifier::SArg; break; 192 // Objective-C. 193 case '@': k = ConversionSpecifier::ObjCObjArg; break; 194 // Glibc specific. 195 case 'm': k = ConversionSpecifier::PrintErrno; break; 196 // FreeBSD format extensions 197 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */ 198 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */ 199 } 200 PrintfConversionSpecifier CS(conversionPosition, k); 201 FS.setConversionSpecifier(CS); 202 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 203 FS.setArgIndex(argIndex++); 204 // FreeBSD extension 205 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg) 206 argIndex++; 207 208 if (k == ConversionSpecifier::InvalidSpecifier) { 209 // Assume the conversion takes one argument. 210 return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg); 211 } 212 return PrintfSpecifierResult(Start, FS); 213} 214 215bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 216 const char *I, 217 const char *E, 218 bool FormatExtensions) { 219 220 unsigned argIndex = 0; 221 222 // Keep looking for a format specifier until we have exhausted the string. 223 while (I != E) { 224 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 225 FormatExtensions); 226 // Did a fail-stop error of any kind occur when parsing the specifier? 227 // If so, don't do any more processing. 228 if (FSR.shouldStop()) 229 return true;; 230 // Did we exhaust the string or encounter an error that 231 // we can recover from? 232 if (!FSR.hasValue()) 233 continue; 234 // We have a format specifier. Pass it to the callback. 235 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 236 I - FSR.getStart())) 237 return true; 238 } 239 assert(I == E && "Format string not exhausted"); 240 return false; 241} 242 243//===----------------------------------------------------------------------===// 244// Methods on ConversionSpecifier. 245//===----------------------------------------------------------------------===// 246const char *ConversionSpecifier::toString() const { 247 switch (kind) { 248 case dArg: return "d"; 249 case iArg: return "i"; 250 case oArg: return "o"; 251 case uArg: return "u"; 252 case xArg: return "x"; 253 case XArg: return "X"; 254 case fArg: return "f"; 255 case FArg: return "F"; 256 case eArg: return "e"; 257 case EArg: return "E"; 258 case gArg: return "g"; 259 case GArg: return "G"; 260 case aArg: return "a"; 261 case AArg: return "A"; 262 case cArg: return "c"; 263 case sArg: return "s"; 264 case pArg: return "p"; 265 case nArg: return "n"; 266 case PercentArg: return "%"; 267 case ScanListArg: return "["; 268 case InvalidSpecifier: return NULL; 269 270 // MacOS X unicode extensions. 271 case CArg: return "C"; 272 case SArg: return "S"; 273 274 // Objective-C specific specifiers. 275 case ObjCObjArg: return "@"; 276 277 // FreeBSD specific specifiers. 278 case bArg: return "b"; 279 case DArg: return "D"; 280 281 // GlibC specific specifiers. 282 case PrintErrno: return "m"; 283 } 284 return NULL; 285} 286 287//===----------------------------------------------------------------------===// 288// Methods on PrintfSpecifier. 289//===----------------------------------------------------------------------===// 290 291ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 292 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 293 294 if (!CS.consumesDataArgument()) 295 return ArgTypeResult::Invalid(); 296 297 if (CS.getKind() == ConversionSpecifier::cArg) 298 switch (LM.getKind()) { 299 case LengthModifier::None: return Ctx.IntTy; 300 case LengthModifier::AsLong: return ArgTypeResult::WIntTy; 301 default: 302 return ArgTypeResult::Invalid(); 303 } 304 305 if (CS.isIntArg()) 306 switch (LM.getKind()) { 307 case LengthModifier::AsLongDouble: 308 return ArgTypeResult::Invalid(); 309 case LengthModifier::None: return Ctx.IntTy; 310 case LengthModifier::AsChar: return Ctx.SignedCharTy; 311 case LengthModifier::AsShort: return Ctx.ShortTy; 312 case LengthModifier::AsLong: return Ctx.LongTy; 313 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 314 case LengthModifier::AsIntMax: 315 // FIXME: Return unknown for now. 316 return ArgTypeResult(); 317 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 318 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 319 } 320 321 if (CS.isUIntArg()) 322 switch (LM.getKind()) { 323 case LengthModifier::AsLongDouble: 324 return ArgTypeResult::Invalid(); 325 case LengthModifier::None: return Ctx.UnsignedIntTy; 326 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 327 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 328 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 329 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 330 case LengthModifier::AsIntMax: 331 // FIXME: Return unknown for now. 332 return ArgTypeResult(); 333 case LengthModifier::AsSizeT: 334 // FIXME: How to get the corresponding unsigned 335 // version of size_t? 336 return ArgTypeResult(); 337 case LengthModifier::AsPtrDiff: 338 // FIXME: How to get the corresponding unsigned 339 // version of ptrdiff_t? 340 return ArgTypeResult(); 341 } 342 343 if (CS.isDoubleArg()) { 344 if (LM.getKind() == LengthModifier::AsLongDouble) 345 return Ctx.LongDoubleTy; 346 return Ctx.DoubleTy; 347 } 348 349 switch (CS.getKind()) { 350 case ConversionSpecifier::sArg: 351 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 352 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 353 case ConversionSpecifier::SArg: 354 // FIXME: This appears to be Mac OS X specific. 355 return ArgTypeResult::WCStrTy; 356 case ConversionSpecifier::CArg: 357 return Ctx.WCharTy; 358 case ConversionSpecifier::pArg: 359 return ArgTypeResult::CPointerTy; 360 default: 361 break; 362 } 363 364 // FIXME: Handle other cases. 365 return ArgTypeResult(); 366} 367 368bool PrintfSpecifier::fixType(QualType QT) { 369 // Handle strings first (char *, wchar_t *) 370 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 371 CS.setKind(ConversionSpecifier::sArg); 372 373 // Disable irrelevant flags 374 HasAlternativeForm = 0; 375 HasLeadingZeroes = 0; 376 377 // Set the long length modifier for wide characters 378 if (QT->getPointeeType()->isWideCharType()) 379 LM.setKind(LengthModifier::AsWideChar); 380 381 return true; 382 } 383 384 // We can only work with builtin types. 385 if (!QT->isBuiltinType()) 386 return false; 387 388 // Everything else should be a base type 389 const BuiltinType *BT = QT->getAs<BuiltinType>(); 390 391 // Set length modifier 392 switch (BT->getKind()) { 393 default: 394 // The rest of the conversions are either optional or for non-builtin types 395 LM.setKind(LengthModifier::None); 396 break; 397 398 case BuiltinType::WChar: 399 case BuiltinType::Long: 400 case BuiltinType::ULong: 401 LM.setKind(LengthModifier::AsLong); 402 break; 403 404 case BuiltinType::LongLong: 405 case BuiltinType::ULongLong: 406 LM.setKind(LengthModifier::AsLongLong); 407 break; 408 409 case BuiltinType::LongDouble: 410 LM.setKind(LengthModifier::AsLongDouble); 411 break; 412 } 413 414 // Set conversion specifier and disable any flags which do not apply to it. 415 if (QT->isAnyCharacterType()) { 416 CS.setKind(ConversionSpecifier::cArg); 417 Precision.setHowSpecified(OptionalAmount::NotSpecified); 418 HasAlternativeForm = 0; 419 HasLeadingZeroes = 0; 420 HasPlusPrefix = 0; 421 } 422 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 423 else if (QT->isRealFloatingType()) { 424 CS.setKind(ConversionSpecifier::fArg); 425 } 426 else if (QT->isPointerType()) { 427 CS.setKind(ConversionSpecifier::pArg); 428 Precision.setHowSpecified(OptionalAmount::NotSpecified); 429 HasAlternativeForm = 0; 430 HasLeadingZeroes = 0; 431 HasPlusPrefix = 0; 432 } 433 else if (QT->isSignedIntegerType()) { 434 CS.setKind(ConversionSpecifier::dArg); 435 HasAlternativeForm = 0; 436 } 437 else if (QT->isUnsignedIntegerType()) { 438 CS.setKind(ConversionSpecifier::uArg); 439 HasAlternativeForm = 0; 440 HasPlusPrefix = 0; 441 } 442 else { 443 return false; 444 } 445 446 return true; 447} 448 449void PrintfSpecifier::toString(llvm::raw_ostream &os) const { 450 // Whilst some features have no defined order, we are using the order 451 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ��7.19.6.1) 452 os << "%"; 453 454 // Positional args 455 if (usesPositionalArg()) { 456 os << getPositionalArgIndex() << "$"; 457 } 458 459 // Conversion flags 460 if (IsLeftJustified) os << "-"; 461 if (HasPlusPrefix) os << "+"; 462 if (HasSpacePrefix) os << " "; 463 if (HasAlternativeForm) os << "#"; 464 if (HasLeadingZeroes) os << "0"; 465 466 // Minimum field width 467 FieldWidth.toString(os); 468 // Precision 469 Precision.toString(os); 470 // Length modifier 471 os << LM.toString(); 472 // Conversion specifier 473 os << CS.toString(); 474} 475 476bool PrintfSpecifier::hasValidPlusPrefix() const { 477 if (!HasPlusPrefix) 478 return true; 479 480 // The plus prefix only makes sense for signed conversions 481 switch (CS.getKind()) { 482 case ConversionSpecifier::dArg: 483 case ConversionSpecifier::iArg: 484 case ConversionSpecifier::fArg: 485 case ConversionSpecifier::FArg: 486 case ConversionSpecifier::eArg: 487 case ConversionSpecifier::EArg: 488 case ConversionSpecifier::gArg: 489 case ConversionSpecifier::GArg: 490 case ConversionSpecifier::aArg: 491 case ConversionSpecifier::AArg: 492 return true; 493 494 default: 495 return false; 496 } 497} 498 499bool PrintfSpecifier::hasValidAlternativeForm() const { 500 if (!HasAlternativeForm) 501 return true; 502 503 // Alternate form flag only valid with the oxaAeEfFgG conversions 504 switch (CS.getKind()) { 505 case ConversionSpecifier::oArg: 506 case ConversionSpecifier::xArg: 507 case ConversionSpecifier::aArg: 508 case ConversionSpecifier::AArg: 509 case ConversionSpecifier::eArg: 510 case ConversionSpecifier::EArg: 511 case ConversionSpecifier::fArg: 512 case ConversionSpecifier::FArg: 513 case ConversionSpecifier::gArg: 514 case ConversionSpecifier::GArg: 515 return true; 516 517 default: 518 return false; 519 } 520} 521 522bool PrintfSpecifier::hasValidLeadingZeros() const { 523 if (!HasLeadingZeroes) 524 return true; 525 526 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 527 switch (CS.getKind()) { 528 case ConversionSpecifier::dArg: 529 case ConversionSpecifier::iArg: 530 case ConversionSpecifier::oArg: 531 case ConversionSpecifier::uArg: 532 case ConversionSpecifier::xArg: 533 case ConversionSpecifier::XArg: 534 case ConversionSpecifier::aArg: 535 case ConversionSpecifier::AArg: 536 case ConversionSpecifier::eArg: 537 case ConversionSpecifier::EArg: 538 case ConversionSpecifier::fArg: 539 case ConversionSpecifier::FArg: 540 case ConversionSpecifier::gArg: 541 case ConversionSpecifier::GArg: 542 return true; 543 544 default: 545 return false; 546 } 547} 548 549bool PrintfSpecifier::hasValidSpacePrefix() const { 550 if (!HasSpacePrefix) 551 return true; 552 553 // The space prefix only makes sense for signed conversions 554 switch (CS.getKind()) { 555 case ConversionSpecifier::dArg: 556 case ConversionSpecifier::iArg: 557 case ConversionSpecifier::fArg: 558 case ConversionSpecifier::FArg: 559 case ConversionSpecifier::eArg: 560 case ConversionSpecifier::EArg: 561 case ConversionSpecifier::gArg: 562 case ConversionSpecifier::GArg: 563 case ConversionSpecifier::aArg: 564 case ConversionSpecifier::AArg: 565 return true; 566 567 default: 568 return false; 569 } 570} 571 572bool PrintfSpecifier::hasValidLeftJustified() const { 573 if (!IsLeftJustified) 574 return true; 575 576 // The left justified flag is valid for all conversions except n 577 switch (CS.getKind()) { 578 case ConversionSpecifier::nArg: 579 return false; 580 581 default: 582 return true; 583 } 584} 585 586bool PrintfSpecifier::hasValidPrecision() const { 587 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 588 return true; 589 590 // Precision is only valid with the diouxXaAeEfFgGs conversions 591 switch (CS.getKind()) { 592 case ConversionSpecifier::dArg: 593 case ConversionSpecifier::iArg: 594 case ConversionSpecifier::oArg: 595 case ConversionSpecifier::uArg: 596 case ConversionSpecifier::xArg: 597 case ConversionSpecifier::XArg: 598 case ConversionSpecifier::aArg: 599 case ConversionSpecifier::AArg: 600 case ConversionSpecifier::eArg: 601 case ConversionSpecifier::EArg: 602 case ConversionSpecifier::fArg: 603 case ConversionSpecifier::FArg: 604 case ConversionSpecifier::gArg: 605 case ConversionSpecifier::GArg: 606 case ConversionSpecifier::sArg: 607 return true; 608 609 default: 610 return false; 611 } 612} 613bool PrintfSpecifier::hasValidFieldWidth() const { 614 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 615 return true; 616 617 // The field width is valid for all conversions except n 618 switch (CS.getKind()) { 619 case ConversionSpecifier::nArg: 620 return false; 621 622 default: 623 return true; 624 } 625} 626