ScanfFormatString.cpp revision 212795
1212795Sdim//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2212795Sdim//
3212795Sdim//                     The LLVM Compiler Infrastructure
4212795Sdim//
5212795Sdim// This file is distributed under the University of Illinois Open Source
6212795Sdim// License. See LICENSE.TXT for details.
7212795Sdim//
8212795Sdim//===----------------------------------------------------------------------===//
9212795Sdim//
10212795Sdim// Handling of format string in scanf and friends.  The structure of format
11212795Sdim// strings for fscanf() are described in C99 7.19.6.2.
12212795Sdim//
13212795Sdim//===----------------------------------------------------------------------===//
14212795Sdim
15212795Sdim#include "clang/Analysis/Analyses/FormatString.h"
16212795Sdim#include "FormatStringParsing.h"
17212795Sdim
18212795Sdimusing clang::analyze_format_string::ArgTypeResult;
19212795Sdimusing clang::analyze_format_string::FormatStringHandler;
20212795Sdimusing clang::analyze_format_string::LengthModifier;
21212795Sdimusing clang::analyze_format_string::OptionalAmount;
22212795Sdimusing clang::analyze_format_string::ConversionSpecifier;
23212795Sdimusing clang::analyze_scanf::ScanfConversionSpecifier;
24212795Sdimusing clang::analyze_scanf::ScanfSpecifier;
25212795Sdimusing clang::UpdateOnReturn;
26212795Sdim
27212795Sdimtypedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
28212795Sdim        ScanfSpecifierResult;
29212795Sdim
30212795Sdimstatic bool ParseScanList(FormatStringHandler &H,
31212795Sdim                          ScanfConversionSpecifier &CS,
32212795Sdim                          const char *&Beg, const char *E) {
33212795Sdim  const char *I = Beg;
34212795Sdim  const char *start = I - 1;
35212795Sdim  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
36212795Sdim
37212795Sdim  // No more characters?
38212795Sdim  if (I == E) {
39212795Sdim    H.HandleIncompleteScanList(start, I);
40212795Sdim    return true;
41212795Sdim  }
42212795Sdim
43212795Sdim  // Special case: ']' is the first character.
44212795Sdim  if (*I == ']') {
45212795Sdim    if (++I == E) {
46212795Sdim      H.HandleIncompleteScanList(start, I - 1);
47212795Sdim      return true;
48212795Sdim    }
49212795Sdim  }
50212795Sdim
51212795Sdim  // Look for a ']' character which denotes the end of the scan list.
52212795Sdim  while (*I != ']') {
53212795Sdim    if (++I == E) {
54212795Sdim      H.HandleIncompleteScanList(start, I - 1);
55212795Sdim      return true;
56212795Sdim    }
57212795Sdim  }
58212795Sdim
59212795Sdim  CS.setEndScanList(I);
60212795Sdim  return false;
61212795Sdim}
62212795Sdim
63212795Sdim// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
64212795Sdim// We can possibly refactor.
65212795Sdimstatic ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
66212795Sdim                                                const char *&Beg,
67212795Sdim                                                const char *E,
68212795Sdim                                                unsigned &argIndex) {
69212795Sdim
70212795Sdim  using namespace clang::analyze_scanf;
71212795Sdim  const char *I = Beg;
72212795Sdim  const char *Start = 0;
73212795Sdim  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
74212795Sdim
75212795Sdim    // Look for a '%' character that indicates the start of a format specifier.
76212795Sdim  for ( ; I != E ; ++I) {
77212795Sdim    char c = *I;
78212795Sdim    if (c == '\0') {
79212795Sdim        // Detect spurious null characters, which are likely errors.
80212795Sdim      H.HandleNullChar(I);
81212795Sdim      return true;
82212795Sdim    }
83212795Sdim    if (c == '%') {
84212795Sdim      Start = I++;  // Record the start of the format specifier.
85212795Sdim      break;
86212795Sdim    }
87212795Sdim  }
88212795Sdim
89212795Sdim    // No format specifier found?
90212795Sdim  if (!Start)
91212795Sdim    return false;
92212795Sdim
93212795Sdim  if (I == E) {
94212795Sdim      // No more characters left?
95212795Sdim    H.HandleIncompleteSpecifier(Start, E - Start);
96212795Sdim    return true;
97212795Sdim  }
98212795Sdim
99212795Sdim  ScanfSpecifier FS;
100212795Sdim  if (ParseArgPosition(H, FS, Start, I, E))
101212795Sdim    return true;
102212795Sdim
103212795Sdim  if (I == E) {
104212795Sdim      // No more characters left?
105212795Sdim    H.HandleIncompleteSpecifier(Start, E - Start);
106212795Sdim    return true;
107212795Sdim  }
108212795Sdim
109212795Sdim  // Look for '*' flag if it is present.
110212795Sdim  if (*I == '*') {
111212795Sdim    FS.setSuppressAssignment(I);
112212795Sdim    if (++I == E) {
113212795Sdim      H.HandleIncompleteSpecifier(Start, E - Start);
114212795Sdim      return true;
115212795Sdim    }
116212795Sdim  }
117212795Sdim
118212795Sdim  // Look for the field width (if any).  Unlike printf, this is either
119212795Sdim  // a fixed integer or isn't present.
120212795Sdim  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
121212795Sdim  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
122212795Sdim    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
123212795Sdim    FS.setFieldWidth(Amt);
124212795Sdim
125212795Sdim    if (I == E) {
126212795Sdim      // No more characters left?
127212795Sdim      H.HandleIncompleteSpecifier(Start, E - Start);
128212795Sdim      return true;
129212795Sdim    }
130212795Sdim  }
131212795Sdim
132212795Sdim  // Look for the length modifier.
133212795Sdim  if (ParseLengthModifier(FS, I, E) && I == E) {
134212795Sdim      // No more characters left?
135212795Sdim    H.HandleIncompleteSpecifier(Start, E - Start);
136212795Sdim    return true;
137212795Sdim  }
138212795Sdim
139212795Sdim  // Detect spurious null characters, which are likely errors.
140212795Sdim  if (*I == '\0') {
141212795Sdim    H.HandleNullChar(I);
142212795Sdim    return true;
143212795Sdim  }
144212795Sdim
145212795Sdim  // Finally, look for the conversion specifier.
146212795Sdim  const char *conversionPosition = I++;
147212795Sdim  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
148212795Sdim  switch (*conversionPosition) {
149212795Sdim    default:
150212795Sdim      break;
151212795Sdim    case '%': k = ConversionSpecifier::PercentArg;   break;
152212795Sdim    case 'A': k = ConversionSpecifier::AArg; break;
153212795Sdim    case 'E': k = ConversionSpecifier::EArg; break;
154212795Sdim    case 'F': k = ConversionSpecifier::FArg; break;
155212795Sdim    case 'G': k = ConversionSpecifier::GArg; break;
156212795Sdim    case 'X': k = ConversionSpecifier::XArg; break;
157212795Sdim    case 'a': k = ConversionSpecifier::aArg; break;
158212795Sdim    case 'd': k = ConversionSpecifier::dArg; break;
159212795Sdim    case 'e': k = ConversionSpecifier::eArg; break;
160212795Sdim    case 'f': k = ConversionSpecifier::fArg; break;
161212795Sdim    case 'g': k = ConversionSpecifier::gArg; break;
162212795Sdim    case 'i': k = ConversionSpecifier::iArg; break;
163212795Sdim    case 'n': k = ConversionSpecifier::nArg; break;
164212795Sdim    case 'c': k = ConversionSpecifier::cArg; break;
165212795Sdim    case 'C': k = ConversionSpecifier::CArg; break;
166212795Sdim    case 'S': k = ConversionSpecifier::SArg; break;
167212795Sdim    case '[': k = ConversionSpecifier::ScanListArg; break;
168212795Sdim    case 'u': k = ConversionSpecifier::uArg; break;
169212795Sdim    case 'x': k = ConversionSpecifier::xArg; break;
170212795Sdim    case 'o': k = ConversionSpecifier::oArg; break;
171212795Sdim    case 's': k = ConversionSpecifier::sArg; break;
172212795Sdim    case 'p': k = ConversionSpecifier::pArg; break;
173212795Sdim  }
174212795Sdim  ScanfConversionSpecifier CS(conversionPosition, k);
175212795Sdim  if (k == ScanfConversionSpecifier::ScanListArg) {
176212795Sdim    if (!ParseScanList(H, CS, I, E))
177212795Sdim      return true;
178212795Sdim  }
179212795Sdim  FS.setConversionSpecifier(CS);
180212795Sdim  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
181212795Sdim      && !FS.usesPositionalArg())
182212795Sdim    FS.setArgIndex(argIndex++);
183212795Sdim
184212795Sdim  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
185212795Sdim  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
186212795Sdim
187212795Sdim  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
188212795Sdim    // Assume the conversion takes one argument.
189212795Sdim    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
190212795Sdim  }
191212795Sdim  return ScanfSpecifierResult(Start, FS);
192212795Sdim}
193212795Sdim
194212795Sdimbool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
195212795Sdim                                                    const char *I,
196212795Sdim                                                    const char *E) {
197212795Sdim
198212795Sdim  unsigned argIndex = 0;
199212795Sdim
200212795Sdim  // Keep looking for a format specifier until we have exhausted the string.
201212795Sdim  while (I != E) {
202212795Sdim    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
203212795Sdim    // Did a fail-stop error of any kind occur when parsing the specifier?
204212795Sdim    // If so, don't do any more processing.
205212795Sdim    if (FSR.shouldStop())
206212795Sdim      return true;;
207212795Sdim      // Did we exhaust the string or encounter an error that
208212795Sdim      // we can recover from?
209212795Sdim    if (!FSR.hasValue())
210212795Sdim      continue;
211212795Sdim      // We have a format specifier.  Pass it to the callback.
212212795Sdim    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
213212795Sdim                                I - FSR.getStart())) {
214212795Sdim      return true;
215212795Sdim    }
216212795Sdim  }
217212795Sdim  assert(I == E && "Format string not exhausted");
218212795Sdim  return false;
219212795Sdim}
220212795Sdim
221212795Sdim
222