ScanfFormatString.cpp revision 239462
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgType; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_scanf::ScanfConversionSpecifier; 24using clang::analyze_scanf::ScanfSpecifier; 25using clang::UpdateOnReturn; 26using namespace clang; 27 28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 29 ScanfSpecifierResult; 30 31static bool ParseScanList(FormatStringHandler &H, 32 ScanfConversionSpecifier &CS, 33 const char *&Beg, const char *E) { 34 const char *I = Beg; 35 const char *start = I - 1; 36 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 37 38 // No more characters? 39 if (I == E) { 40 H.HandleIncompleteScanList(start, I); 41 return true; 42 } 43 44 // Special case: ']' is the first character. 45 if (*I == ']') { 46 if (++I == E) { 47 H.HandleIncompleteScanList(start, I - 1); 48 return true; 49 } 50 } 51 52 // Look for a ']' character which denotes the end of the scan list. 53 while (*I != ']') { 54 if (++I == E) { 55 H.HandleIncompleteScanList(start, I - 1); 56 return true; 57 } 58 } 59 60 CS.setEndScanList(I); 61 return false; 62} 63 64// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 65// We can possibly refactor. 66static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 67 const char *&Beg, 68 const char *E, 69 unsigned &argIndex, 70 const LangOptions &LO) { 71 72 using namespace clang::analyze_scanf; 73 const char *I = Beg; 74 const char *Start = 0; 75 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 76 77 // Look for a '%' character that indicates the start of a format specifier. 78 for ( ; I != E ; ++I) { 79 char c = *I; 80 if (c == '\0') { 81 // Detect spurious null characters, which are likely errors. 82 H.HandleNullChar(I); 83 return true; 84 } 85 if (c == '%') { 86 Start = I++; // Record the start of the format specifier. 87 break; 88 } 89 } 90 91 // No format specifier found? 92 if (!Start) 93 return false; 94 95 if (I == E) { 96 // No more characters left? 97 H.HandleIncompleteSpecifier(Start, E - Start); 98 return true; 99 } 100 101 ScanfSpecifier FS; 102 if (ParseArgPosition(H, FS, Start, I, E)) 103 return true; 104 105 if (I == E) { 106 // No more characters left? 107 H.HandleIncompleteSpecifier(Start, E - Start); 108 return true; 109 } 110 111 // Look for '*' flag if it is present. 112 if (*I == '*') { 113 FS.setSuppressAssignment(I); 114 if (++I == E) { 115 H.HandleIncompleteSpecifier(Start, E - Start); 116 return true; 117 } 118 } 119 120 // Look for the field width (if any). Unlike printf, this is either 121 // a fixed integer or isn't present. 122 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 123 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 124 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 125 FS.setFieldWidth(Amt); 126 127 if (I == E) { 128 // No more characters left? 129 H.HandleIncompleteSpecifier(Start, E - Start); 130 return true; 131 } 132 } 133 134 // Look for the length modifier. 135 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 136 // No more characters left? 137 H.HandleIncompleteSpecifier(Start, E - Start); 138 return true; 139 } 140 141 // Detect spurious null characters, which are likely errors. 142 if (*I == '\0') { 143 H.HandleNullChar(I); 144 return true; 145 } 146 147 // Finally, look for the conversion specifier. 148 const char *conversionPosition = I++; 149 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 150 switch (*conversionPosition) { 151 default: 152 break; 153 case '%': k = ConversionSpecifier::PercentArg; break; 154 case 'A': k = ConversionSpecifier::AArg; break; 155 case 'E': k = ConversionSpecifier::EArg; break; 156 case 'F': k = ConversionSpecifier::FArg; break; 157 case 'G': k = ConversionSpecifier::GArg; break; 158 case 'X': k = ConversionSpecifier::XArg; break; 159 case 'a': k = ConversionSpecifier::aArg; break; 160 case 'd': k = ConversionSpecifier::dArg; break; 161 case 'e': k = ConversionSpecifier::eArg; break; 162 case 'f': k = ConversionSpecifier::fArg; break; 163 case 'g': k = ConversionSpecifier::gArg; break; 164 case 'i': k = ConversionSpecifier::iArg; break; 165 case 'n': k = ConversionSpecifier::nArg; break; 166 case 'c': k = ConversionSpecifier::cArg; break; 167 case 'C': k = ConversionSpecifier::CArg; break; 168 case 'S': k = ConversionSpecifier::SArg; break; 169 case '[': k = ConversionSpecifier::ScanListArg; break; 170 case 'u': k = ConversionSpecifier::uArg; break; 171 case 'x': k = ConversionSpecifier::xArg; break; 172 case 'o': k = ConversionSpecifier::oArg; break; 173 case 's': k = ConversionSpecifier::sArg; break; 174 case 'p': k = ConversionSpecifier::pArg; break; 175 } 176 ScanfConversionSpecifier CS(conversionPosition, k); 177 if (k == ScanfConversionSpecifier::ScanListArg) { 178 if (ParseScanList(H, CS, I, E)) 179 return true; 180 } 181 FS.setConversionSpecifier(CS); 182 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 183 && !FS.usesPositionalArg()) 184 FS.setArgIndex(argIndex++); 185 186 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 187 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 188 189 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 190 // Assume the conversion takes one argument. 191 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 192 } 193 return ScanfSpecifierResult(Start, FS); 194} 195 196ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 197 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 198 199 if (!CS.consumesDataArgument()) 200 return ArgType::Invalid(); 201 202 switch(CS.getKind()) { 203 // Signed int. 204 case ConversionSpecifier::dArg: 205 case ConversionSpecifier::iArg: 206 switch (LM.getKind()) { 207 case LengthModifier::None: 208 return ArgType::PtrTo(Ctx.IntTy); 209 case LengthModifier::AsChar: 210 return ArgType::PtrTo(ArgType::AnyCharTy); 211 case LengthModifier::AsShort: 212 return ArgType::PtrTo(Ctx.ShortTy); 213 case LengthModifier::AsLong: 214 return ArgType::PtrTo(Ctx.LongTy); 215 case LengthModifier::AsLongLong: 216 case LengthModifier::AsQuad: 217 return ArgType::PtrTo(Ctx.LongLongTy); 218 case LengthModifier::AsIntMax: 219 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 220 case LengthModifier::AsSizeT: 221 // FIXME: ssize_t. 222 return ArgType(); 223 case LengthModifier::AsPtrDiff: 224 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 225 case LengthModifier::AsLongDouble: 226 // GNU extension. 227 return ArgType::PtrTo(Ctx.LongLongTy); 228 case LengthModifier::AsAllocate: 229 return ArgType::Invalid(); 230 case LengthModifier::AsMAllocate: 231 return ArgType::Invalid(); 232 } 233 234 // Unsigned int. 235 case ConversionSpecifier::oArg: 236 case ConversionSpecifier::uArg: 237 case ConversionSpecifier::xArg: 238 case ConversionSpecifier::XArg: 239 switch (LM.getKind()) { 240 case LengthModifier::None: 241 return ArgType::PtrTo(Ctx.UnsignedIntTy); 242 case LengthModifier::AsChar: 243 return ArgType::PtrTo(Ctx.UnsignedCharTy); 244 case LengthModifier::AsShort: 245 return ArgType::PtrTo(Ctx.UnsignedShortTy); 246 case LengthModifier::AsLong: 247 return ArgType::PtrTo(Ctx.UnsignedLongTy); 248 case LengthModifier::AsLongLong: 249 case LengthModifier::AsQuad: 250 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 251 case LengthModifier::AsIntMax: 252 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 253 case LengthModifier::AsSizeT: 254 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 255 case LengthModifier::AsPtrDiff: 256 // FIXME: Unsigned version of ptrdiff_t? 257 return ArgType(); 258 case LengthModifier::AsLongDouble: 259 // GNU extension. 260 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 261 case LengthModifier::AsAllocate: 262 return ArgType::Invalid(); 263 case LengthModifier::AsMAllocate: 264 return ArgType::Invalid(); 265 } 266 267 // Float. 268 case ConversionSpecifier::aArg: 269 case ConversionSpecifier::AArg: 270 case ConversionSpecifier::eArg: 271 case ConversionSpecifier::EArg: 272 case ConversionSpecifier::fArg: 273 case ConversionSpecifier::FArg: 274 case ConversionSpecifier::gArg: 275 case ConversionSpecifier::GArg: 276 switch (LM.getKind()) { 277 case LengthModifier::None: 278 return ArgType::PtrTo(Ctx.FloatTy); 279 case LengthModifier::AsLong: 280 return ArgType::PtrTo(Ctx.DoubleTy); 281 case LengthModifier::AsLongDouble: 282 return ArgType::PtrTo(Ctx.LongDoubleTy); 283 default: 284 return ArgType::Invalid(); 285 } 286 287 // Char, string and scanlist. 288 case ConversionSpecifier::cArg: 289 case ConversionSpecifier::sArg: 290 case ConversionSpecifier::ScanListArg: 291 switch (LM.getKind()) { 292 case LengthModifier::None: 293 return ArgType::PtrTo(ArgType::AnyCharTy); 294 case LengthModifier::AsLong: 295 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t")); 296 case LengthModifier::AsAllocate: 297 case LengthModifier::AsMAllocate: 298 return ArgType::PtrTo(ArgType::CStrTy); 299 default: 300 return ArgType::Invalid(); 301 } 302 case ConversionSpecifier::CArg: 303 case ConversionSpecifier::SArg: 304 // FIXME: Mac OS X specific? 305 switch (LM.getKind()) { 306 case LengthModifier::None: 307 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t")); 308 case LengthModifier::AsAllocate: 309 case LengthModifier::AsMAllocate: 310 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 311 default: 312 return ArgType::Invalid(); 313 } 314 315 // Pointer. 316 case ConversionSpecifier::pArg: 317 return ArgType::PtrTo(ArgType::CPointerTy); 318 319 // Write-back. 320 case ConversionSpecifier::nArg: 321 switch (LM.getKind()) { 322 case LengthModifier::None: 323 return ArgType::PtrTo(Ctx.IntTy); 324 case LengthModifier::AsChar: 325 return ArgType::PtrTo(Ctx.SignedCharTy); 326 case LengthModifier::AsShort: 327 return ArgType::PtrTo(Ctx.ShortTy); 328 case LengthModifier::AsLong: 329 return ArgType::PtrTo(Ctx.LongTy); 330 case LengthModifier::AsLongLong: 331 case LengthModifier::AsQuad: 332 return ArgType::PtrTo(Ctx.LongLongTy); 333 case LengthModifier::AsIntMax: 334 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 335 case LengthModifier::AsSizeT: 336 return ArgType(); // FIXME: ssize_t 337 case LengthModifier::AsPtrDiff: 338 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 339 case LengthModifier::AsLongDouble: 340 return ArgType(); // FIXME: Is this a known extension? 341 case LengthModifier::AsAllocate: 342 case LengthModifier::AsMAllocate: 343 return ArgType::Invalid(); 344 } 345 346 default: 347 break; 348 } 349 350 return ArgType(); 351} 352 353bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 354 ASTContext &Ctx) { 355 if (!QT->isPointerType()) 356 return false; 357 358 // %n is different from other conversion specifiers; don't try to fix it. 359 if (CS.getKind() == ConversionSpecifier::nArg) 360 return false; 361 362 QualType PT = QT->getPointeeType(); 363 364 // If it's an enum, get its underlying type. 365 if (const EnumType *ETy = QT->getAs<EnumType>()) 366 QT = ETy->getDecl()->getIntegerType(); 367 368 const BuiltinType *BT = PT->getAs<BuiltinType>(); 369 if (!BT) 370 return false; 371 372 // Pointer to a character. 373 if (PT->isAnyCharacterType()) { 374 CS.setKind(ConversionSpecifier::sArg); 375 if (PT->isWideCharType()) 376 LM.setKind(LengthModifier::AsWideChar); 377 else 378 LM.setKind(LengthModifier::None); 379 return true; 380 } 381 382 // Figure out the length modifier. 383 switch (BT->getKind()) { 384 // no modifier 385 case BuiltinType::UInt: 386 case BuiltinType::Int: 387 case BuiltinType::Float: 388 LM.setKind(LengthModifier::None); 389 break; 390 391 // hh 392 case BuiltinType::Char_U: 393 case BuiltinType::UChar: 394 case BuiltinType::Char_S: 395 case BuiltinType::SChar: 396 LM.setKind(LengthModifier::AsChar); 397 break; 398 399 // h 400 case BuiltinType::Short: 401 case BuiltinType::UShort: 402 LM.setKind(LengthModifier::AsShort); 403 break; 404 405 // l 406 case BuiltinType::Long: 407 case BuiltinType::ULong: 408 case BuiltinType::Double: 409 LM.setKind(LengthModifier::AsLong); 410 break; 411 412 // ll 413 case BuiltinType::LongLong: 414 case BuiltinType::ULongLong: 415 LM.setKind(LengthModifier::AsLongLong); 416 break; 417 418 // L 419 case BuiltinType::LongDouble: 420 LM.setKind(LengthModifier::AsLongDouble); 421 break; 422 423 // Don't know. 424 default: 425 return false; 426 } 427 428 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 429 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) 430 namedTypeToLengthModifier(PT, LM); 431 432 // If fixing the length modifier was enough, we are done. 433 const analyze_scanf::ArgType &AT = getArgType(Ctx); 434 if (hasValidLengthModifier() && AT.isValid() && AT.matchesType(Ctx, QT)) 435 return true; 436 437 // Figure out the conversion specifier. 438 if (PT->isRealFloatingType()) 439 CS.setKind(ConversionSpecifier::fArg); 440 else if (PT->isSignedIntegerType()) 441 CS.setKind(ConversionSpecifier::dArg); 442 else if (PT->isUnsignedIntegerType()) 443 CS.setKind(ConversionSpecifier::uArg); 444 else 445 llvm_unreachable("Unexpected type"); 446 447 return true; 448} 449 450void ScanfSpecifier::toString(raw_ostream &os) const { 451 os << "%"; 452 453 if (usesPositionalArg()) 454 os << getPositionalArgIndex() << "$"; 455 if (SuppressAssignment) 456 os << "*"; 457 458 FieldWidth.toString(os); 459 os << LM.toString(); 460 os << CS.toString(); 461} 462 463bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 464 const char *I, 465 const char *E, 466 const LangOptions &LO) { 467 468 unsigned argIndex = 0; 469 470 // Keep looking for a format specifier until we have exhausted the string. 471 while (I != E) { 472 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 473 LO); 474 // Did a fail-stop error of any kind occur when parsing the specifier? 475 // If so, don't do any more processing. 476 if (FSR.shouldStop()) 477 return true;; 478 // Did we exhaust the string or encounter an error that 479 // we can recover from? 480 if (!FSR.hasValue()) 481 continue; 482 // We have a format specifier. Pass it to the callback. 483 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 484 I - FSR.getStart())) { 485 return true; 486 } 487 } 488 assert(I == E && "Format string not exhausted"); 489 return false; 490} 491