1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17#include "clang/Basic/TargetInfo.h" 18 19using clang::analyze_format_string::ArgType; 20using clang::analyze_format_string::FormatStringHandler; 21using clang::analyze_format_string::LengthModifier; 22using clang::analyze_format_string::OptionalAmount; 23using clang::analyze_format_string::ConversionSpecifier; 24using clang::analyze_scanf::ScanfConversionSpecifier; 25using clang::analyze_scanf::ScanfSpecifier; 26using clang::UpdateOnReturn; 27using namespace clang; 28 29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63} 64 65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66// We can possibly refactor. 67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO, 72 const TargetInfo &Target) { 73 74 using namespace clang::analyze_scanf; 75 const char *I = Beg; 76 const char *Start = 0; 77 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 78 79 // Look for a '%' character that indicates the start of a format specifier. 80 for ( ; I != E ; ++I) { 81 char c = *I; 82 if (c == '\0') { 83 // Detect spurious null characters, which are likely errors. 84 H.HandleNullChar(I); 85 return true; 86 } 87 if (c == '%') { 88 Start = I++; // Record the start of the format specifier. 89 break; 90 } 91 } 92 93 // No format specifier found? 94 if (!Start) 95 return false; 96 97 if (I == E) { 98 // No more characters left? 99 H.HandleIncompleteSpecifier(Start, E - Start); 100 return true; 101 } 102 103 ScanfSpecifier FS; 104 if (ParseArgPosition(H, FS, Start, I, E)) 105 return true; 106 107 if (I == E) { 108 // No more characters left? 109 H.HandleIncompleteSpecifier(Start, E - Start); 110 return true; 111 } 112 113 // Look for '*' flag if it is present. 114 if (*I == '*') { 115 FS.setSuppressAssignment(I); 116 if (++I == E) { 117 H.HandleIncompleteSpecifier(Start, E - Start); 118 return true; 119 } 120 } 121 122 // Look for the field width (if any). Unlike printf, this is either 123 // a fixed integer or isn't present. 124 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 125 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 126 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 127 FS.setFieldWidth(Amt); 128 129 if (I == E) { 130 // No more characters left? 131 H.HandleIncompleteSpecifier(Start, E - Start); 132 return true; 133 } 134 } 135 136 // Look for the length modifier. 137 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 138 // No more characters left? 139 H.HandleIncompleteSpecifier(Start, E - Start); 140 return true; 141 } 142 143 // Detect spurious null characters, which are likely errors. 144 if (*I == '\0') { 145 H.HandleNullChar(I); 146 return true; 147 } 148 149 // Finally, look for the conversion specifier. 150 const char *conversionPosition = I++; 151 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 152 switch (*conversionPosition) { 153 default: 154 break; 155 case '%': k = ConversionSpecifier::PercentArg; break; 156 case 'A': k = ConversionSpecifier::AArg; break; 157 case 'E': k = ConversionSpecifier::EArg; break; 158 case 'F': k = ConversionSpecifier::FArg; break; 159 case 'G': k = ConversionSpecifier::GArg; break; 160 case 'X': k = ConversionSpecifier::XArg; break; 161 case 'a': k = ConversionSpecifier::aArg; break; 162 case 'd': k = ConversionSpecifier::dArg; break; 163 case 'e': k = ConversionSpecifier::eArg; break; 164 case 'f': k = ConversionSpecifier::fArg; break; 165 case 'g': k = ConversionSpecifier::gArg; break; 166 case 'i': k = ConversionSpecifier::iArg; break; 167 case 'n': k = ConversionSpecifier::nArg; break; 168 case 'c': k = ConversionSpecifier::cArg; break; 169 case 'C': k = ConversionSpecifier::CArg; break; 170 case 'S': k = ConversionSpecifier::SArg; break; 171 case '[': k = ConversionSpecifier::ScanListArg; break; 172 case 'u': k = ConversionSpecifier::uArg; break; 173 case 'x': k = ConversionSpecifier::xArg; break; 174 case 'o': k = ConversionSpecifier::oArg; break; 175 case 's': k = ConversionSpecifier::sArg; break; 176 case 'p': k = ConversionSpecifier::pArg; break; 177 // Apple extensions 178 // Apple-specific 179 case 'D': 180 if (Target.getTriple().isOSDarwin()) 181 k = ConversionSpecifier::DArg; 182 break; 183 case 'O': 184 if (Target.getTriple().isOSDarwin()) 185 k = ConversionSpecifier::OArg; 186 break; 187 case 'U': 188 if (Target.getTriple().isOSDarwin()) 189 k = ConversionSpecifier::UArg; 190 break; 191 } 192 ScanfConversionSpecifier CS(conversionPosition, k); 193 if (k == ScanfConversionSpecifier::ScanListArg) { 194 if (ParseScanList(H, CS, I, E)) 195 return true; 196 } 197 FS.setConversionSpecifier(CS); 198 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 199 && !FS.usesPositionalArg()) 200 FS.setArgIndex(argIndex++); 201 202 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 203 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 204 205 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 206 // Assume the conversion takes one argument. 207 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 208 } 209 return ScanfSpecifierResult(Start, FS); 210} 211 212ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 213 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 214 215 if (!CS.consumesDataArgument()) 216 return ArgType::Invalid(); 217 218 switch(CS.getKind()) { 219 // Signed int. 220 case ConversionSpecifier::dArg: 221 case ConversionSpecifier::DArg: 222 case ConversionSpecifier::iArg: 223 switch (LM.getKind()) { 224 case LengthModifier::None: 225 return ArgType::PtrTo(Ctx.IntTy); 226 case LengthModifier::AsChar: 227 return ArgType::PtrTo(ArgType::AnyCharTy); 228 case LengthModifier::AsShort: 229 return ArgType::PtrTo(Ctx.ShortTy); 230 case LengthModifier::AsLong: 231 return ArgType::PtrTo(Ctx.LongTy); 232 case LengthModifier::AsLongLong: 233 case LengthModifier::AsQuad: 234 return ArgType::PtrTo(Ctx.LongLongTy); 235 case LengthModifier::AsInt64: 236 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 237 case LengthModifier::AsIntMax: 238 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 239 case LengthModifier::AsSizeT: 240 // FIXME: ssize_t. 241 return ArgType(); 242 case LengthModifier::AsPtrDiff: 243 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 244 case LengthModifier::AsLongDouble: 245 // GNU extension. 246 return ArgType::PtrTo(Ctx.LongLongTy); 247 case LengthModifier::AsAllocate: 248 case LengthModifier::AsMAllocate: 249 case LengthModifier::AsInt32: 250 case LengthModifier::AsInt3264: 251 return ArgType::Invalid(); 252 } 253 254 // Unsigned int. 255 case ConversionSpecifier::oArg: 256 case ConversionSpecifier::OArg: 257 case ConversionSpecifier::uArg: 258 case ConversionSpecifier::UArg: 259 case ConversionSpecifier::xArg: 260 case ConversionSpecifier::XArg: 261 switch (LM.getKind()) { 262 case LengthModifier::None: 263 return ArgType::PtrTo(Ctx.UnsignedIntTy); 264 case LengthModifier::AsChar: 265 return ArgType::PtrTo(Ctx.UnsignedCharTy); 266 case LengthModifier::AsShort: 267 return ArgType::PtrTo(Ctx.UnsignedShortTy); 268 case LengthModifier::AsLong: 269 return ArgType::PtrTo(Ctx.UnsignedLongTy); 270 case LengthModifier::AsLongLong: 271 case LengthModifier::AsQuad: 272 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 273 case LengthModifier::AsInt64: 274 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 275 case LengthModifier::AsIntMax: 276 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 277 case LengthModifier::AsSizeT: 278 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 279 case LengthModifier::AsPtrDiff: 280 // FIXME: Unsigned version of ptrdiff_t? 281 return ArgType(); 282 case LengthModifier::AsLongDouble: 283 // GNU extension. 284 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 285 case LengthModifier::AsAllocate: 286 case LengthModifier::AsMAllocate: 287 case LengthModifier::AsInt32: 288 case LengthModifier::AsInt3264: 289 return ArgType::Invalid(); 290 } 291 292 // Float. 293 case ConversionSpecifier::aArg: 294 case ConversionSpecifier::AArg: 295 case ConversionSpecifier::eArg: 296 case ConversionSpecifier::EArg: 297 case ConversionSpecifier::fArg: 298 case ConversionSpecifier::FArg: 299 case ConversionSpecifier::gArg: 300 case ConversionSpecifier::GArg: 301 switch (LM.getKind()) { 302 case LengthModifier::None: 303 return ArgType::PtrTo(Ctx.FloatTy); 304 case LengthModifier::AsLong: 305 return ArgType::PtrTo(Ctx.DoubleTy); 306 case LengthModifier::AsLongDouble: 307 return ArgType::PtrTo(Ctx.LongDoubleTy); 308 default: 309 return ArgType::Invalid(); 310 } 311 312 // Char, string and scanlist. 313 case ConversionSpecifier::cArg: 314 case ConversionSpecifier::sArg: 315 case ConversionSpecifier::ScanListArg: 316 switch (LM.getKind()) { 317 case LengthModifier::None: 318 return ArgType::PtrTo(ArgType::AnyCharTy); 319 case LengthModifier::AsLong: 320 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 321 case LengthModifier::AsAllocate: 322 case LengthModifier::AsMAllocate: 323 return ArgType::PtrTo(ArgType::CStrTy); 324 default: 325 return ArgType::Invalid(); 326 } 327 case ConversionSpecifier::CArg: 328 case ConversionSpecifier::SArg: 329 // FIXME: Mac OS X specific? 330 switch (LM.getKind()) { 331 case LengthModifier::None: 332 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 333 case LengthModifier::AsAllocate: 334 case LengthModifier::AsMAllocate: 335 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 336 default: 337 return ArgType::Invalid(); 338 } 339 340 // Pointer. 341 case ConversionSpecifier::pArg: 342 return ArgType::PtrTo(ArgType::CPointerTy); 343 344 // Write-back. 345 case ConversionSpecifier::nArg: 346 switch (LM.getKind()) { 347 case LengthModifier::None: 348 return ArgType::PtrTo(Ctx.IntTy); 349 case LengthModifier::AsChar: 350 return ArgType::PtrTo(Ctx.SignedCharTy); 351 case LengthModifier::AsShort: 352 return ArgType::PtrTo(Ctx.ShortTy); 353 case LengthModifier::AsLong: 354 return ArgType::PtrTo(Ctx.LongTy); 355 case LengthModifier::AsLongLong: 356 case LengthModifier::AsQuad: 357 return ArgType::PtrTo(Ctx.LongLongTy); 358 case LengthModifier::AsInt64: 359 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 360 case LengthModifier::AsIntMax: 361 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 362 case LengthModifier::AsSizeT: 363 return ArgType(); // FIXME: ssize_t 364 case LengthModifier::AsPtrDiff: 365 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 366 case LengthModifier::AsLongDouble: 367 return ArgType(); // FIXME: Is this a known extension? 368 case LengthModifier::AsAllocate: 369 case LengthModifier::AsMAllocate: 370 case LengthModifier::AsInt32: 371 case LengthModifier::AsInt3264: 372 return ArgType::Invalid(); 373 } 374 375 default: 376 break; 377 } 378 379 return ArgType(); 380} 381 382bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 383 ASTContext &Ctx) { 384 if (!QT->isPointerType()) 385 return false; 386 387 // %n is different from other conversion specifiers; don't try to fix it. 388 if (CS.getKind() == ConversionSpecifier::nArg) 389 return false; 390 391 QualType PT = QT->getPointeeType(); 392 393 // If it's an enum, get its underlying type. 394 if (const EnumType *ETy = QT->getAs<EnumType>()) 395 QT = ETy->getDecl()->getIntegerType(); 396 397 const BuiltinType *BT = PT->getAs<BuiltinType>(); 398 if (!BT) 399 return false; 400 401 // Pointer to a character. 402 if (PT->isAnyCharacterType()) { 403 CS.setKind(ConversionSpecifier::sArg); 404 if (PT->isWideCharType()) 405 LM.setKind(LengthModifier::AsWideChar); 406 else 407 LM.setKind(LengthModifier::None); 408 return true; 409 } 410 411 // Figure out the length modifier. 412 switch (BT->getKind()) { 413 // no modifier 414 case BuiltinType::UInt: 415 case BuiltinType::Int: 416 case BuiltinType::Float: 417 LM.setKind(LengthModifier::None); 418 break; 419 420 // hh 421 case BuiltinType::Char_U: 422 case BuiltinType::UChar: 423 case BuiltinType::Char_S: 424 case BuiltinType::SChar: 425 LM.setKind(LengthModifier::AsChar); 426 break; 427 428 // h 429 case BuiltinType::Short: 430 case BuiltinType::UShort: 431 LM.setKind(LengthModifier::AsShort); 432 break; 433 434 // l 435 case BuiltinType::Long: 436 case BuiltinType::ULong: 437 case BuiltinType::Double: 438 LM.setKind(LengthModifier::AsLong); 439 break; 440 441 // ll 442 case BuiltinType::LongLong: 443 case BuiltinType::ULongLong: 444 LM.setKind(LengthModifier::AsLongLong); 445 break; 446 447 // L 448 case BuiltinType::LongDouble: 449 LM.setKind(LengthModifier::AsLongDouble); 450 break; 451 452 // Don't know. 453 default: 454 return false; 455 } 456 457 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 458 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 459 namedTypeToLengthModifier(PT, LM); 460 461 // If fixing the length modifier was enough, we are done. 462 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 463 const analyze_scanf::ArgType &AT = getArgType(Ctx); 464 if (AT.isValid() && AT.matchesType(Ctx, QT)) 465 return true; 466 } 467 468 // Figure out the conversion specifier. 469 if (PT->isRealFloatingType()) 470 CS.setKind(ConversionSpecifier::fArg); 471 else if (PT->isSignedIntegerType()) 472 CS.setKind(ConversionSpecifier::dArg); 473 else if (PT->isUnsignedIntegerType()) 474 CS.setKind(ConversionSpecifier::uArg); 475 else 476 llvm_unreachable("Unexpected type"); 477 478 return true; 479} 480 481void ScanfSpecifier::toString(raw_ostream &os) const { 482 os << "%"; 483 484 if (usesPositionalArg()) 485 os << getPositionalArgIndex() << "$"; 486 if (SuppressAssignment) 487 os << "*"; 488 489 FieldWidth.toString(os); 490 os << LM.toString(); 491 os << CS.toString(); 492} 493 494bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 495 const char *I, 496 const char *E, 497 const LangOptions &LO, 498 const TargetInfo &Target) { 499 500 unsigned argIndex = 0; 501 502 // Keep looking for a format specifier until we have exhausted the string. 503 while (I != E) { 504 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 505 LO, Target); 506 // Did a fail-stop error of any kind occur when parsing the specifier? 507 // If so, don't do any more processing. 508 if (FSR.shouldStop()) 509 return true; 510 // Did we exhaust the string or encounter an error that 511 // we can recover from? 512 if (!FSR.hasValue()) 513 continue; 514 // We have a format specifier. Pass it to the callback. 515 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 516 I - FSR.getStart())) { 517 return true; 518 } 519 } 520 assert(I == E && "Format string not exhausted"); 521 return false; 522} 523