ScanfFormatString.cpp revision 212795
1212795Sdim//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2212795Sdim// 3212795Sdim// The LLVM Compiler Infrastructure 4212795Sdim// 5212795Sdim// This file is distributed under the University of Illinois Open Source 6212795Sdim// License. See LICENSE.TXT for details. 7212795Sdim// 8212795Sdim//===----------------------------------------------------------------------===// 9212795Sdim// 10212795Sdim// Handling of format string in scanf and friends. The structure of format 11212795Sdim// strings for fscanf() are described in C99 7.19.6.2. 12212795Sdim// 13212795Sdim//===----------------------------------------------------------------------===// 14212795Sdim 15212795Sdim#include "clang/Analysis/Analyses/FormatString.h" 16212795Sdim#include "FormatStringParsing.h" 17212795Sdim 18212795Sdimusing clang::analyze_format_string::ArgTypeResult; 19212795Sdimusing clang::analyze_format_string::FormatStringHandler; 20212795Sdimusing clang::analyze_format_string::LengthModifier; 21212795Sdimusing clang::analyze_format_string::OptionalAmount; 22212795Sdimusing clang::analyze_format_string::ConversionSpecifier; 23212795Sdimusing clang::analyze_scanf::ScanfConversionSpecifier; 24212795Sdimusing clang::analyze_scanf::ScanfSpecifier; 25212795Sdimusing clang::UpdateOnReturn; 26212795Sdim 27212795Sdimtypedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 28212795Sdim ScanfSpecifierResult; 29212795Sdim 30212795Sdimstatic bool ParseScanList(FormatStringHandler &H, 31212795Sdim ScanfConversionSpecifier &CS, 32212795Sdim const char *&Beg, const char *E) { 33212795Sdim const char *I = Beg; 34212795Sdim const char *start = I - 1; 35212795Sdim UpdateOnReturn <const char*> UpdateBeg(Beg, I); 36212795Sdim 37212795Sdim // No more characters? 38212795Sdim if (I == E) { 39212795Sdim H.HandleIncompleteScanList(start, I); 40212795Sdim return true; 41212795Sdim } 42212795Sdim 43212795Sdim // Special case: ']' is the first character. 44212795Sdim if (*I == ']') { 45212795Sdim if (++I == E) { 46212795Sdim H.HandleIncompleteScanList(start, I - 1); 47212795Sdim return true; 48212795Sdim } 49212795Sdim } 50212795Sdim 51212795Sdim // Look for a ']' character which denotes the end of the scan list. 52212795Sdim while (*I != ']') { 53212795Sdim if (++I == E) { 54212795Sdim H.HandleIncompleteScanList(start, I - 1); 55212795Sdim return true; 56212795Sdim } 57212795Sdim } 58212795Sdim 59212795Sdim CS.setEndScanList(I); 60212795Sdim return false; 61212795Sdim} 62212795Sdim 63212795Sdim// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 64212795Sdim// We can possibly refactor. 65212795Sdimstatic ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 66212795Sdim const char *&Beg, 67212795Sdim const char *E, 68212795Sdim unsigned &argIndex) { 69212795Sdim 70212795Sdim using namespace clang::analyze_scanf; 71212795Sdim const char *I = Beg; 72212795Sdim const char *Start = 0; 73212795Sdim UpdateOnReturn <const char*> UpdateBeg(Beg, I); 74212795Sdim 75212795Sdim // Look for a '%' character that indicates the start of a format specifier. 76212795Sdim for ( ; I != E ; ++I) { 77212795Sdim char c = *I; 78212795Sdim if (c == '\0') { 79212795Sdim // Detect spurious null characters, which are likely errors. 80212795Sdim H.HandleNullChar(I); 81212795Sdim return true; 82212795Sdim } 83212795Sdim if (c == '%') { 84212795Sdim Start = I++; // Record the start of the format specifier. 85212795Sdim break; 86212795Sdim } 87212795Sdim } 88212795Sdim 89212795Sdim // No format specifier found? 90212795Sdim if (!Start) 91212795Sdim return false; 92212795Sdim 93212795Sdim if (I == E) { 94212795Sdim // No more characters left? 95212795Sdim H.HandleIncompleteSpecifier(Start, E - Start); 96212795Sdim return true; 97212795Sdim } 98212795Sdim 99212795Sdim ScanfSpecifier FS; 100212795Sdim if (ParseArgPosition(H, FS, Start, I, E)) 101212795Sdim return true; 102212795Sdim 103212795Sdim if (I == E) { 104212795Sdim // No more characters left? 105212795Sdim H.HandleIncompleteSpecifier(Start, E - Start); 106212795Sdim return true; 107212795Sdim } 108212795Sdim 109212795Sdim // Look for '*' flag if it is present. 110212795Sdim if (*I == '*') { 111212795Sdim FS.setSuppressAssignment(I); 112212795Sdim if (++I == E) { 113212795Sdim H.HandleIncompleteSpecifier(Start, E - Start); 114212795Sdim return true; 115212795Sdim } 116212795Sdim } 117212795Sdim 118212795Sdim // Look for the field width (if any). Unlike printf, this is either 119212795Sdim // a fixed integer or isn't present. 120212795Sdim const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 121212795Sdim if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 122212795Sdim assert(Amt.getHowSpecified() == OptionalAmount::Constant); 123212795Sdim FS.setFieldWidth(Amt); 124212795Sdim 125212795Sdim if (I == E) { 126212795Sdim // No more characters left? 127212795Sdim H.HandleIncompleteSpecifier(Start, E - Start); 128212795Sdim return true; 129212795Sdim } 130212795Sdim } 131212795Sdim 132212795Sdim // Look for the length modifier. 133212795Sdim if (ParseLengthModifier(FS, I, E) && I == E) { 134212795Sdim // No more characters left? 135212795Sdim H.HandleIncompleteSpecifier(Start, E - Start); 136212795Sdim return true; 137212795Sdim } 138212795Sdim 139212795Sdim // Detect spurious null characters, which are likely errors. 140212795Sdim if (*I == '\0') { 141212795Sdim H.HandleNullChar(I); 142212795Sdim return true; 143212795Sdim } 144212795Sdim 145212795Sdim // Finally, look for the conversion specifier. 146212795Sdim const char *conversionPosition = I++; 147212795Sdim ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 148212795Sdim switch (*conversionPosition) { 149212795Sdim default: 150212795Sdim break; 151212795Sdim case '%': k = ConversionSpecifier::PercentArg; break; 152212795Sdim case 'A': k = ConversionSpecifier::AArg; break; 153212795Sdim case 'E': k = ConversionSpecifier::EArg; break; 154212795Sdim case 'F': k = ConversionSpecifier::FArg; break; 155212795Sdim case 'G': k = ConversionSpecifier::GArg; break; 156212795Sdim case 'X': k = ConversionSpecifier::XArg; break; 157212795Sdim case 'a': k = ConversionSpecifier::aArg; break; 158212795Sdim case 'd': k = ConversionSpecifier::dArg; break; 159212795Sdim case 'e': k = ConversionSpecifier::eArg; break; 160212795Sdim case 'f': k = ConversionSpecifier::fArg; break; 161212795Sdim case 'g': k = ConversionSpecifier::gArg; break; 162212795Sdim case 'i': k = ConversionSpecifier::iArg; break; 163212795Sdim case 'n': k = ConversionSpecifier::nArg; break; 164212795Sdim case 'c': k = ConversionSpecifier::cArg; break; 165212795Sdim case 'C': k = ConversionSpecifier::CArg; break; 166212795Sdim case 'S': k = ConversionSpecifier::SArg; break; 167212795Sdim case '[': k = ConversionSpecifier::ScanListArg; break; 168212795Sdim case 'u': k = ConversionSpecifier::uArg; break; 169212795Sdim case 'x': k = ConversionSpecifier::xArg; break; 170212795Sdim case 'o': k = ConversionSpecifier::oArg; break; 171212795Sdim case 's': k = ConversionSpecifier::sArg; break; 172212795Sdim case 'p': k = ConversionSpecifier::pArg; break; 173212795Sdim } 174212795Sdim ScanfConversionSpecifier CS(conversionPosition, k); 175212795Sdim if (k == ScanfConversionSpecifier::ScanListArg) { 176212795Sdim if (!ParseScanList(H, CS, I, E)) 177212795Sdim return true; 178212795Sdim } 179212795Sdim FS.setConversionSpecifier(CS); 180212795Sdim if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 181212795Sdim && !FS.usesPositionalArg()) 182212795Sdim FS.setArgIndex(argIndex++); 183212795Sdim 184212795Sdim // FIXME: '%' and '*' doesn't make sense. Issue a warning. 185212795Sdim // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 186212795Sdim 187212795Sdim if (k == ScanfConversionSpecifier::InvalidSpecifier) { 188212795Sdim // Assume the conversion takes one argument. 189212795Sdim return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 190212795Sdim } 191212795Sdim return ScanfSpecifierResult(Start, FS); 192212795Sdim} 193212795Sdim 194212795Sdimbool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 195212795Sdim const char *I, 196212795Sdim const char *E) { 197212795Sdim 198212795Sdim unsigned argIndex = 0; 199212795Sdim 200212795Sdim // Keep looking for a format specifier until we have exhausted the string. 201212795Sdim while (I != E) { 202212795Sdim const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex); 203212795Sdim // Did a fail-stop error of any kind occur when parsing the specifier? 204212795Sdim // If so, don't do any more processing. 205212795Sdim if (FSR.shouldStop()) 206212795Sdim return true;; 207212795Sdim // Did we exhaust the string or encounter an error that 208212795Sdim // we can recover from? 209212795Sdim if (!FSR.hasValue()) 210212795Sdim continue; 211212795Sdim // We have a format specifier. Pass it to the callback. 212212795Sdim if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 213212795Sdim I - FSR.getStart())) { 214212795Sdim return true; 215212795Sdim } 216212795Sdim } 217212795Sdim assert(I == E && "Format string not exhausted"); 218212795Sdim return false; 219212795Sdim} 220212795Sdim 221212795Sdim 222