ScanfFormatString.cpp revision 261991
1187277Sdas//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2187277Sdas// 3187277Sdas// The LLVM Compiler Infrastructure 4187277Sdas// 5187277Sdas// This file is distributed under the University of Illinois Open Source 6187277Sdas// License. See LICENSE.TXT for details. 7187277Sdas// 8187277Sdas//===----------------------------------------------------------------------===// 9187277Sdas// 10187277Sdas// Handling of format string in scanf and friends. The structure of format 11187277Sdas// strings for fscanf() are described in C99 7.19.6.2. 12187277Sdas// 13187277Sdas//===----------------------------------------------------------------------===// 14187277Sdas 15187277Sdas#include "clang/Analysis/Analyses/FormatString.h" 16187277Sdas#include "FormatStringParsing.h" 17187277Sdas#include "clang/Basic/TargetInfo.h" 18187277Sdas 19187277Sdasusing clang::analyze_format_string::ArgType; 20187277Sdasusing clang::analyze_format_string::FormatStringHandler; 21187277Sdasusing clang::analyze_format_string::LengthModifier; 22187277Sdasusing clang::analyze_format_string::OptionalAmount; 23187277Sdasusing clang::analyze_format_string::ConversionSpecifier; 24187277Sdasusing clang::analyze_scanf::ScanfConversionSpecifier; 25187277Sdasusing clang::analyze_scanf::ScanfSpecifier; 26187277Sdasusing clang::UpdateOnReturn; 27187277Sdasusing namespace clang; 28187277Sdas 29187277Sdastypedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30187277Sdas ScanfSpecifierResult; 31187277Sdas 32187277Sdasstatic bool ParseScanList(FormatStringHandler &H, 33187277Sdas ScanfConversionSpecifier &CS, 34187277Sdas const char *&Beg, const char *E) { 35187277Sdas const char *I = Beg; 36187277Sdas const char *start = I - 1; 37187277Sdas UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38187277Sdas 39187277Sdas // No more characters? 40187284Sdas if (I == E) { 41187284Sdas H.HandleIncompleteScanList(start, I); 42187284Sdas return true; 43187284Sdas } 44187284Sdas 45187284Sdas // Special case: ']' is the first character. 46187284Sdas if (*I == ']') { 47187284Sdas if (++I == E) { 48187284Sdas H.HandleIncompleteScanList(start, I - 1); 49187284Sdas return true; 50187284Sdas } 51187284Sdas } 52187284Sdas 53187284Sdas // Look for a ']' character which denotes the end of the scan list. 54187284Sdas while (*I != ']') { 55187284Sdas if (++I == E) { 56187284Sdas H.HandleIncompleteScanList(start, I - 1); 57187284Sdas return true; 58187284Sdas } 59187284Sdas } 60187284Sdas 61187284Sdas CS.setEndScanList(I); 62187277Sdas return false; 63187277Sdas} 64187277Sdas 65187277Sdas// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66187277Sdas// We can possibly refactor. 67187277Sdasstatic ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68187277Sdas const char *&Beg, 69187277Sdas const char *E, 70187277Sdas unsigned &argIndex, 71187277Sdas const LangOptions &LO, 72187277Sdas const TargetInfo &Target) { 73187354Sdas 74187277Sdas using namespace clang::analyze_scanf; 75187277Sdas const char *I = Beg; 76187277Sdas const char *Start = 0; 77187277Sdas UpdateOnReturn <const char*> UpdateBeg(Beg, I); 78187277Sdas 79187277Sdas // Look for a '%' character that indicates the start of a format specifier. 80187277Sdas for ( ; I != E ; ++I) { 81187277Sdas char c = *I; 82187277Sdas if (c == '\0') { 83187277Sdas // Detect spurious null characters, which are likely errors. 84187277Sdas H.HandleNullChar(I); 85187277Sdas return true; 86187277Sdas } 87187354Sdas if (c == '%') { 88187354Sdas Start = I++; // Record the start of the format specifier. 89187277Sdas break; 90187354Sdas } 91187277Sdas } 92187354Sdas 93187354Sdas // No format specifier found? 94187277Sdas if (!Start) 95187277Sdas return false; 96187277Sdas 97187277Sdas if (I == E) { 98187277Sdas // No more characters left? 99187277Sdas H.HandleIncompleteSpecifier(Start, E - Start); 100187277Sdas return true; 101187277Sdas } 102187277Sdas 103187277Sdas ScanfSpecifier FS; 104187277Sdas if (ParseArgPosition(H, FS, Start, I, E)) 105187277Sdas return true; 106187277Sdas 107187277Sdas if (I == E) { 108187277Sdas // No more characters left? 109187277Sdas H.HandleIncompleteSpecifier(Start, E - Start); 110187277Sdas return true; 111187277Sdas } 112187277Sdas 113187277Sdas // Look for '*' flag if it is present. 114187354Sdas if (*I == '*') { 115187277Sdas FS.setSuppressAssignment(I); 116187354Sdas if (++I == E) { 117187354Sdas H.HandleIncompleteSpecifier(Start, E - Start); 118187354Sdas return true; 119187277Sdas } 120187354Sdas } 121187277Sdas 122187277Sdas // Look for the field width (if any). Unlike printf, this is either 123187277Sdas // a fixed integer or isn't present. 124187277Sdas const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 125187277Sdas if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 126187277Sdas assert(Amt.getHowSpecified() == OptionalAmount::Constant); 127187277Sdas FS.setFieldWidth(Amt); 128187277Sdas 129187277Sdas if (I == E) { 130187277Sdas // No more characters left? 131187277Sdas H.HandleIncompleteSpecifier(Start, E - Start); 132187277Sdas return true; 133187277Sdas } 134187277Sdas } 135187277Sdas 136187277Sdas // Look for the length modifier. 137187277Sdas if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 138187354Sdas // No more characters left? 139187354Sdas H.HandleIncompleteSpecifier(Start, E - Start); 140187354Sdas return true; 141187354Sdas } 142187354Sdas 143187354Sdas // Detect spurious null characters, which are likely errors. 144187354Sdas if (*I == '\0') { 145187277Sdas H.HandleNullChar(I); 146187277Sdas return true; 147187277Sdas } 148187277Sdas 149187277Sdas // Finally, look for the conversion specifier. 150187277Sdas const char *conversionPosition = I++; 151187277Sdas ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 152187277Sdas switch (*conversionPosition) { 153187284Sdas default: 154187284Sdas break; 155187284Sdas case '%': k = ConversionSpecifier::PercentArg; break; 156187284Sdas case 'A': k = ConversionSpecifier::AArg; break; 157187284Sdas case 'E': k = ConversionSpecifier::EArg; break; 158187284Sdas case 'F': k = ConversionSpecifier::FArg; break; 159187284Sdas case 'G': k = ConversionSpecifier::GArg; break; 160187284Sdas case 'X': k = ConversionSpecifier::XArg; break; 161187284Sdas case 'a': k = ConversionSpecifier::aArg; break; 162187284Sdas case 'd': k = ConversionSpecifier::dArg; break; 163187284Sdas case 'e': k = ConversionSpecifier::eArg; break; 164187284Sdas case 'f': k = ConversionSpecifier::fArg; break; 165187284Sdas case 'g': k = ConversionSpecifier::gArg; break; 166187284Sdas case 'i': k = ConversionSpecifier::iArg; break; 167187284Sdas case 'n': k = ConversionSpecifier::nArg; break; 168187284Sdas case 'c': k = ConversionSpecifier::cArg; break; 169187284Sdas case 'C': k = ConversionSpecifier::CArg; break; 170187284Sdas case 'S': k = ConversionSpecifier::SArg; break; 171187284Sdas case '[': k = ConversionSpecifier::ScanListArg; break; 172187284Sdas case 'u': k = ConversionSpecifier::uArg; break; 173187284Sdas case 'x': k = ConversionSpecifier::xArg; break; 174187284Sdas case 'o': k = ConversionSpecifier::oArg; break; 175187284Sdas case 's': k = ConversionSpecifier::sArg; break; 176187284Sdas case 'p': k = ConversionSpecifier::pArg; break; 177187284Sdas // Apple extensions 178187284Sdas // Apple-specific 179187284Sdas case 'D': 180187284Sdas if (Target.getTriple().isOSDarwin()) 181187284Sdas k = ConversionSpecifier::DArg; 182187284Sdas break; 183187284Sdas case 'O': 184187284Sdas if (Target.getTriple().isOSDarwin()) 185187284Sdas k = ConversionSpecifier::OArg; 186187284Sdas break; 187187284Sdas case 'U': 188187284Sdas if (Target.getTriple().isOSDarwin()) 189187284Sdas k = ConversionSpecifier::UArg; 190187284Sdas break; 191187284Sdas } 192187284Sdas ScanfConversionSpecifier CS(conversionPosition, k); 193187284Sdas if (k == ScanfConversionSpecifier::ScanListArg) { 194187284Sdas if (ParseScanList(H, CS, I, E)) 195187284Sdas return true; 196187284Sdas } 197187284Sdas FS.setConversionSpecifier(CS); 198187284Sdas if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 199187284Sdas && !FS.usesPositionalArg()) 200187284Sdas FS.setArgIndex(argIndex++); 201187284Sdas 202187284Sdas // FIXME: '%' and '*' doesn't make sense. Issue a warning. 203187284Sdas // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 204187284Sdas 205187284Sdas if (k == ScanfConversionSpecifier::InvalidSpecifier) { 206187284Sdas // Assume the conversion takes one argument. 207187284Sdas return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 208187284Sdas } 209187284Sdas return ScanfSpecifierResult(Start, FS); 210187284Sdas} 211187284Sdas 212187284SdasArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 213187284Sdas const ScanfConversionSpecifier &CS = getConversionSpecifier(); 214187284Sdas 215187284Sdas if (!CS.consumesDataArgument()) 216187284Sdas return ArgType::Invalid(); 217187284Sdas 218187284Sdas switch(CS.getKind()) { 219187284Sdas // Signed int. 220187284Sdas case ConversionSpecifier::dArg: 221187284Sdas case ConversionSpecifier::DArg: 222187284Sdas case ConversionSpecifier::iArg: 223187284Sdas switch (LM.getKind()) { 224187284Sdas case LengthModifier::None: 225187284Sdas return ArgType::PtrTo(Ctx.IntTy); 226187284Sdas case LengthModifier::AsChar: 227187284Sdas return ArgType::PtrTo(ArgType::AnyCharTy); 228187284Sdas case LengthModifier::AsShort: 229187284Sdas return ArgType::PtrTo(Ctx.ShortTy); 230187284Sdas case LengthModifier::AsLong: 231187284Sdas return ArgType::PtrTo(Ctx.LongTy); 232187284Sdas case LengthModifier::AsLongLong: 233187284Sdas case LengthModifier::AsQuad: 234187284Sdas return ArgType::PtrTo(Ctx.LongLongTy); 235187284Sdas case LengthModifier::AsInt64: 236187284Sdas return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 237187284Sdas case LengthModifier::AsIntMax: 238187284Sdas return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 239187284Sdas case LengthModifier::AsSizeT: 240187284Sdas // FIXME: ssize_t. 241187284Sdas return ArgType(); 242187284Sdas case LengthModifier::AsPtrDiff: 243187284Sdas return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 244187284Sdas case LengthModifier::AsLongDouble: 245187284Sdas // GNU extension. 246187284Sdas return ArgType::PtrTo(Ctx.LongLongTy); 247187284Sdas case LengthModifier::AsAllocate: 248187284Sdas case LengthModifier::AsMAllocate: 249187284Sdas case LengthModifier::AsInt32: 250187284Sdas case LengthModifier::AsInt3264: 251187284Sdas return ArgType::Invalid(); 252187284Sdas } 253187284Sdas 254187284Sdas // Unsigned int. 255187284Sdas case ConversionSpecifier::oArg: 256187284Sdas case ConversionSpecifier::OArg: 257187284Sdas case ConversionSpecifier::uArg: 258187284Sdas case ConversionSpecifier::UArg: 259187284Sdas case ConversionSpecifier::xArg: 260187284Sdas case ConversionSpecifier::XArg: 261187284Sdas switch (LM.getKind()) { 262187284Sdas case LengthModifier::None: 263187284Sdas return ArgType::PtrTo(Ctx.UnsignedIntTy); 264187284Sdas case LengthModifier::AsChar: 265187284Sdas return ArgType::PtrTo(Ctx.UnsignedCharTy); 266187284Sdas case LengthModifier::AsShort: 267187284Sdas return ArgType::PtrTo(Ctx.UnsignedShortTy); 268187284Sdas case LengthModifier::AsLong: 269187284Sdas return ArgType::PtrTo(Ctx.UnsignedLongTy); 270187284Sdas case LengthModifier::AsLongLong: 271187284Sdas case LengthModifier::AsQuad: 272187284Sdas return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 273187284Sdas case LengthModifier::AsInt64: 274187284Sdas return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 275187284Sdas case LengthModifier::AsIntMax: 276187284Sdas return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 277187284Sdas case LengthModifier::AsSizeT: 278187284Sdas return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 279187284Sdas case LengthModifier::AsPtrDiff: 280187284Sdas // FIXME: Unsigned version of ptrdiff_t? 281187284Sdas return ArgType(); 282187284Sdas case LengthModifier::AsLongDouble: 283187284Sdas // GNU extension. 284187284Sdas return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 285187284Sdas case LengthModifier::AsAllocate: 286187284Sdas case LengthModifier::AsMAllocate: 287187284Sdas case LengthModifier::AsInt32: 288187284Sdas case LengthModifier::AsInt3264: 289187284Sdas return ArgType::Invalid(); 290187284Sdas } 291187284Sdas 292187284Sdas // Float. 293187284Sdas case ConversionSpecifier::aArg: 294187284Sdas case ConversionSpecifier::AArg: 295187284Sdas case ConversionSpecifier::eArg: 296187284Sdas case ConversionSpecifier::EArg: 297187284Sdas case ConversionSpecifier::fArg: 298187284Sdas case ConversionSpecifier::FArg: 299187284Sdas case ConversionSpecifier::gArg: 300187284Sdas case ConversionSpecifier::GArg: 301187284Sdas switch (LM.getKind()) { 302187284Sdas case LengthModifier::None: 303187284Sdas return ArgType::PtrTo(Ctx.FloatTy); 304187284Sdas case LengthModifier::AsLong: 305187284Sdas return ArgType::PtrTo(Ctx.DoubleTy); 306187284Sdas case LengthModifier::AsLongDouble: 307187284Sdas return ArgType::PtrTo(Ctx.LongDoubleTy); 308187284Sdas default: 309187284Sdas return ArgType::Invalid(); 310187284Sdas } 311187284Sdas 312187284Sdas // Char, string and scanlist. 313187284Sdas case ConversionSpecifier::cArg: 314187284Sdas case ConversionSpecifier::sArg: 315187284Sdas case ConversionSpecifier::ScanListArg: 316187284Sdas switch (LM.getKind()) { 317187284Sdas case LengthModifier::None: 318187284Sdas return ArgType::PtrTo(ArgType::AnyCharTy); 319187284Sdas case LengthModifier::AsLong: 320187284Sdas return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 321187284Sdas case LengthModifier::AsAllocate: 322187284Sdas case LengthModifier::AsMAllocate: 323187284Sdas return ArgType::PtrTo(ArgType::CStrTy); 324187284Sdas default: 325187284Sdas return ArgType::Invalid(); 326187284Sdas } 327187284Sdas case ConversionSpecifier::CArg: 328187284Sdas case ConversionSpecifier::SArg: 329187284Sdas // FIXME: Mac OS X specific? 330187284Sdas switch (LM.getKind()) { 331187284Sdas case LengthModifier::None: 332187284Sdas return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 333187284Sdas case LengthModifier::AsAllocate: 334187284Sdas case LengthModifier::AsMAllocate: 335187284Sdas return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 336187284Sdas default: 337187284Sdas return ArgType::Invalid(); 338187284Sdas } 339187284Sdas 340187284Sdas // Pointer. 341187284Sdas case ConversionSpecifier::pArg: 342187284Sdas return ArgType::PtrTo(ArgType::CPointerTy); 343187284Sdas 344187284Sdas // Write-back. 345187284Sdas case ConversionSpecifier::nArg: 346187284Sdas switch (LM.getKind()) { 347 case LengthModifier::None: 348 return ArgType::PtrTo(Ctx.IntTy); 349 case LengthModifier::AsChar: 350 return ArgType::PtrTo(Ctx.SignedCharTy); 351 case LengthModifier::AsShort: 352 return ArgType::PtrTo(Ctx.ShortTy); 353 case LengthModifier::AsLong: 354 return ArgType::PtrTo(Ctx.LongTy); 355 case LengthModifier::AsLongLong: 356 case LengthModifier::AsQuad: 357 return ArgType::PtrTo(Ctx.LongLongTy); 358 case LengthModifier::AsInt64: 359 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 360 case LengthModifier::AsIntMax: 361 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 362 case LengthModifier::AsSizeT: 363 return ArgType(); // FIXME: ssize_t 364 case LengthModifier::AsPtrDiff: 365 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 366 case LengthModifier::AsLongDouble: 367 return ArgType(); // FIXME: Is this a known extension? 368 case LengthModifier::AsAllocate: 369 case LengthModifier::AsMAllocate: 370 case LengthModifier::AsInt32: 371 case LengthModifier::AsInt3264: 372 return ArgType::Invalid(); 373 } 374 375 default: 376 break; 377 } 378 379 return ArgType(); 380} 381 382bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 383 ASTContext &Ctx) { 384 if (!QT->isPointerType()) 385 return false; 386 387 // %n is different from other conversion specifiers; don't try to fix it. 388 if (CS.getKind() == ConversionSpecifier::nArg) 389 return false; 390 391 QualType PT = QT->getPointeeType(); 392 393 // If it's an enum, get its underlying type. 394 if (const EnumType *ETy = QT->getAs<EnumType>()) 395 QT = ETy->getDecl()->getIntegerType(); 396 397 const BuiltinType *BT = PT->getAs<BuiltinType>(); 398 if (!BT) 399 return false; 400 401 // Pointer to a character. 402 if (PT->isAnyCharacterType()) { 403 CS.setKind(ConversionSpecifier::sArg); 404 if (PT->isWideCharType()) 405 LM.setKind(LengthModifier::AsWideChar); 406 else 407 LM.setKind(LengthModifier::None); 408 return true; 409 } 410 411 // Figure out the length modifier. 412 switch (BT->getKind()) { 413 // no modifier 414 case BuiltinType::UInt: 415 case BuiltinType::Int: 416 case BuiltinType::Float: 417 LM.setKind(LengthModifier::None); 418 break; 419 420 // hh 421 case BuiltinType::Char_U: 422 case BuiltinType::UChar: 423 case BuiltinType::Char_S: 424 case BuiltinType::SChar: 425 LM.setKind(LengthModifier::AsChar); 426 break; 427 428 // h 429 case BuiltinType::Short: 430 case BuiltinType::UShort: 431 LM.setKind(LengthModifier::AsShort); 432 break; 433 434 // l 435 case BuiltinType::Long: 436 case BuiltinType::ULong: 437 case BuiltinType::Double: 438 LM.setKind(LengthModifier::AsLong); 439 break; 440 441 // ll 442 case BuiltinType::LongLong: 443 case BuiltinType::ULongLong: 444 LM.setKind(LengthModifier::AsLongLong); 445 break; 446 447 // L 448 case BuiltinType::LongDouble: 449 LM.setKind(LengthModifier::AsLongDouble); 450 break; 451 452 // Don't know. 453 default: 454 return false; 455 } 456 457 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 458 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 459 namedTypeToLengthModifier(PT, LM); 460 461 // If fixing the length modifier was enough, we are done. 462 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 463 const analyze_scanf::ArgType &AT = getArgType(Ctx); 464 if (AT.isValid() && AT.matchesType(Ctx, QT)) 465 return true; 466 } 467 468 // Figure out the conversion specifier. 469 if (PT->isRealFloatingType()) 470 CS.setKind(ConversionSpecifier::fArg); 471 else if (PT->isSignedIntegerType()) 472 CS.setKind(ConversionSpecifier::dArg); 473 else if (PT->isUnsignedIntegerType()) 474 CS.setKind(ConversionSpecifier::uArg); 475 else 476 llvm_unreachable("Unexpected type"); 477 478 return true; 479} 480 481void ScanfSpecifier::toString(raw_ostream &os) const { 482 os << "%"; 483 484 if (usesPositionalArg()) 485 os << getPositionalArgIndex() << "$"; 486 if (SuppressAssignment) 487 os << "*"; 488 489 FieldWidth.toString(os); 490 os << LM.toString(); 491 os << CS.toString(); 492} 493 494bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 495 const char *I, 496 const char *E, 497 const LangOptions &LO, 498 const TargetInfo &Target) { 499 500 unsigned argIndex = 0; 501 502 // Keep looking for a format specifier until we have exhausted the string. 503 while (I != E) { 504 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 505 LO, Target); 506 // Did a fail-stop error of any kind occur when parsing the specifier? 507 // If so, don't do any more processing. 508 if (FSR.shouldStop()) 509 return true; 510 // Did we exhaust the string or encounter an error that 511 // we can recover from? 512 if (!FSR.hasValue()) 513 continue; 514 // We have a format specifier. Pass it to the callback. 515 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 516 I - FSR.getStart())) { 517 return true; 518 } 519 } 520 assert(I == E && "Format string not exhausted"); 521 return false; 522} 523