1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17#include "clang/Basic/TargetInfo.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO,
72                                                const TargetInfo &Target) {
73
74  using namespace clang::analyze_scanf;
75  const char *I = Beg;
76  const char *Start = 0;
77  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78
79    // Look for a '%' character that indicates the start of a format specifier.
80  for ( ; I != E ; ++I) {
81    char c = *I;
82    if (c == '\0') {
83        // Detect spurious null characters, which are likely errors.
84      H.HandleNullChar(I);
85      return true;
86    }
87    if (c == '%') {
88      Start = I++;  // Record the start of the format specifier.
89      break;
90    }
91  }
92
93    // No format specifier found?
94  if (!Start)
95    return false;
96
97  if (I == E) {
98      // No more characters left?
99    H.HandleIncompleteSpecifier(Start, E - Start);
100    return true;
101  }
102
103  ScanfSpecifier FS;
104  if (ParseArgPosition(H, FS, Start, I, E))
105    return true;
106
107  if (I == E) {
108      // No more characters left?
109    H.HandleIncompleteSpecifier(Start, E - Start);
110    return true;
111  }
112
113  // Look for '*' flag if it is present.
114  if (*I == '*') {
115    FS.setSuppressAssignment(I);
116    if (++I == E) {
117      H.HandleIncompleteSpecifier(Start, E - Start);
118      return true;
119    }
120  }
121
122  // Look for the field width (if any).  Unlike printf, this is either
123  // a fixed integer or isn't present.
124  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127    FS.setFieldWidth(Amt);
128
129    if (I == E) {
130      // No more characters left?
131      H.HandleIncompleteSpecifier(Start, E - Start);
132      return true;
133    }
134  }
135
136  // Look for the length modifier.
137  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138      // No more characters left?
139    H.HandleIncompleteSpecifier(Start, E - Start);
140    return true;
141  }
142
143  // Detect spurious null characters, which are likely errors.
144  if (*I == '\0') {
145    H.HandleNullChar(I);
146    return true;
147  }
148
149  // Finally, look for the conversion specifier.
150  const char *conversionPosition = I++;
151  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152  switch (*conversionPosition) {
153    default:
154      break;
155    case '%': k = ConversionSpecifier::PercentArg;   break;
156    case 'A': k = ConversionSpecifier::AArg; break;
157    case 'E': k = ConversionSpecifier::EArg; break;
158    case 'F': k = ConversionSpecifier::FArg; break;
159    case 'G': k = ConversionSpecifier::GArg; break;
160    case 'X': k = ConversionSpecifier::XArg; break;
161    case 'a': k = ConversionSpecifier::aArg; break;
162    case 'd': k = ConversionSpecifier::dArg; break;
163    case 'e': k = ConversionSpecifier::eArg; break;
164    case 'f': k = ConversionSpecifier::fArg; break;
165    case 'g': k = ConversionSpecifier::gArg; break;
166    case 'i': k = ConversionSpecifier::iArg; break;
167    case 'n': k = ConversionSpecifier::nArg; break;
168    case 'c': k = ConversionSpecifier::cArg; break;
169    case 'C': k = ConversionSpecifier::CArg; break;
170    case 'S': k = ConversionSpecifier::SArg; break;
171    case '[': k = ConversionSpecifier::ScanListArg; break;
172    case 'u': k = ConversionSpecifier::uArg; break;
173    case 'x': k = ConversionSpecifier::xArg; break;
174    case 'o': k = ConversionSpecifier::oArg; break;
175    case 's': k = ConversionSpecifier::sArg; break;
176    case 'p': k = ConversionSpecifier::pArg; break;
177    // Apple extensions
178      // Apple-specific
179    case 'D':
180      if (Target.getTriple().isOSDarwin())
181        k = ConversionSpecifier::DArg;
182      break;
183    case 'O':
184      if (Target.getTriple().isOSDarwin())
185        k = ConversionSpecifier::OArg;
186      break;
187    case 'U':
188      if (Target.getTriple().isOSDarwin())
189        k = ConversionSpecifier::UArg;
190      break;
191  }
192  ScanfConversionSpecifier CS(conversionPosition, k);
193  if (k == ScanfConversionSpecifier::ScanListArg) {
194    if (ParseScanList(H, CS, I, E))
195      return true;
196  }
197  FS.setConversionSpecifier(CS);
198  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199      && !FS.usesPositionalArg())
200    FS.setArgIndex(argIndex++);
201
202  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
203  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
204
205  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206    // Assume the conversion takes one argument.
207    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
208  }
209  return ScanfSpecifierResult(Start, FS);
210}
211
212ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213  const ScanfConversionSpecifier &CS = getConversionSpecifier();
214
215  if (!CS.consumesDataArgument())
216    return ArgType::Invalid();
217
218  switch(CS.getKind()) {
219    // Signed int.
220    case ConversionSpecifier::dArg:
221    case ConversionSpecifier::DArg:
222    case ConversionSpecifier::iArg:
223      switch (LM.getKind()) {
224        case LengthModifier::None:
225          return ArgType::PtrTo(Ctx.IntTy);
226        case LengthModifier::AsChar:
227          return ArgType::PtrTo(ArgType::AnyCharTy);
228        case LengthModifier::AsShort:
229          return ArgType::PtrTo(Ctx.ShortTy);
230        case LengthModifier::AsLong:
231          return ArgType::PtrTo(Ctx.LongTy);
232        case LengthModifier::AsLongLong:
233        case LengthModifier::AsQuad:
234          return ArgType::PtrTo(Ctx.LongLongTy);
235        case LengthModifier::AsInt64:
236          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
237        case LengthModifier::AsIntMax:
238          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
239        case LengthModifier::AsSizeT:
240          // FIXME: ssize_t.
241          return ArgType();
242        case LengthModifier::AsPtrDiff:
243          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
244        case LengthModifier::AsLongDouble:
245          // GNU extension.
246          return ArgType::PtrTo(Ctx.LongLongTy);
247        case LengthModifier::AsAllocate:
248        case LengthModifier::AsMAllocate:
249        case LengthModifier::AsInt32:
250        case LengthModifier::AsInt3264:
251          return ArgType::Invalid();
252      }
253
254    // Unsigned int.
255    case ConversionSpecifier::oArg:
256    case ConversionSpecifier::OArg:
257    case ConversionSpecifier::uArg:
258    case ConversionSpecifier::UArg:
259    case ConversionSpecifier::xArg:
260    case ConversionSpecifier::XArg:
261      switch (LM.getKind()) {
262        case LengthModifier::None:
263          return ArgType::PtrTo(Ctx.UnsignedIntTy);
264        case LengthModifier::AsChar:
265          return ArgType::PtrTo(Ctx.UnsignedCharTy);
266        case LengthModifier::AsShort:
267          return ArgType::PtrTo(Ctx.UnsignedShortTy);
268        case LengthModifier::AsLong:
269          return ArgType::PtrTo(Ctx.UnsignedLongTy);
270        case LengthModifier::AsLongLong:
271        case LengthModifier::AsQuad:
272          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
273        case LengthModifier::AsInt64:
274          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
275        case LengthModifier::AsIntMax:
276          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
277        case LengthModifier::AsSizeT:
278          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
279        case LengthModifier::AsPtrDiff:
280          // FIXME: Unsigned version of ptrdiff_t?
281          return ArgType();
282        case LengthModifier::AsLongDouble:
283          // GNU extension.
284          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
285        case LengthModifier::AsAllocate:
286        case LengthModifier::AsMAllocate:
287        case LengthModifier::AsInt32:
288        case LengthModifier::AsInt3264:
289          return ArgType::Invalid();
290      }
291
292    // Float.
293    case ConversionSpecifier::aArg:
294    case ConversionSpecifier::AArg:
295    case ConversionSpecifier::eArg:
296    case ConversionSpecifier::EArg:
297    case ConversionSpecifier::fArg:
298    case ConversionSpecifier::FArg:
299    case ConversionSpecifier::gArg:
300    case ConversionSpecifier::GArg:
301      switch (LM.getKind()) {
302        case LengthModifier::None:
303          return ArgType::PtrTo(Ctx.FloatTy);
304        case LengthModifier::AsLong:
305          return ArgType::PtrTo(Ctx.DoubleTy);
306        case LengthModifier::AsLongDouble:
307          return ArgType::PtrTo(Ctx.LongDoubleTy);
308        default:
309          return ArgType::Invalid();
310      }
311
312    // Char, string and scanlist.
313    case ConversionSpecifier::cArg:
314    case ConversionSpecifier::sArg:
315    case ConversionSpecifier::ScanListArg:
316      switch (LM.getKind()) {
317        case LengthModifier::None:
318          return ArgType::PtrTo(ArgType::AnyCharTy);
319        case LengthModifier::AsLong:
320          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
321        case LengthModifier::AsAllocate:
322        case LengthModifier::AsMAllocate:
323          return ArgType::PtrTo(ArgType::CStrTy);
324        default:
325          return ArgType::Invalid();
326      }
327    case ConversionSpecifier::CArg:
328    case ConversionSpecifier::SArg:
329      // FIXME: Mac OS X specific?
330      switch (LM.getKind()) {
331        case LengthModifier::None:
332          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333        case LengthModifier::AsAllocate:
334        case LengthModifier::AsMAllocate:
335          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
336        default:
337          return ArgType::Invalid();
338      }
339
340    // Pointer.
341    case ConversionSpecifier::pArg:
342      return ArgType::PtrTo(ArgType::CPointerTy);
343
344    // Write-back.
345    case ConversionSpecifier::nArg:
346      switch (LM.getKind()) {
347        case LengthModifier::None:
348          return ArgType::PtrTo(Ctx.IntTy);
349        case LengthModifier::AsChar:
350          return ArgType::PtrTo(Ctx.SignedCharTy);
351        case LengthModifier::AsShort:
352          return ArgType::PtrTo(Ctx.ShortTy);
353        case LengthModifier::AsLong:
354          return ArgType::PtrTo(Ctx.LongTy);
355        case LengthModifier::AsLongLong:
356        case LengthModifier::AsQuad:
357          return ArgType::PtrTo(Ctx.LongLongTy);
358        case LengthModifier::AsInt64:
359          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
360        case LengthModifier::AsIntMax:
361          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
362        case LengthModifier::AsSizeT:
363          return ArgType(); // FIXME: ssize_t
364        case LengthModifier::AsPtrDiff:
365          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
366        case LengthModifier::AsLongDouble:
367          return ArgType(); // FIXME: Is this a known extension?
368        case LengthModifier::AsAllocate:
369        case LengthModifier::AsMAllocate:
370        case LengthModifier::AsInt32:
371        case LengthModifier::AsInt3264:
372          return ArgType::Invalid();
373        }
374
375    default:
376      break;
377  }
378
379  return ArgType();
380}
381
382bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
383                             ASTContext &Ctx) {
384  if (!QT->isPointerType())
385    return false;
386
387  // %n is different from other conversion specifiers; don't try to fix it.
388  if (CS.getKind() == ConversionSpecifier::nArg)
389    return false;
390
391  QualType PT = QT->getPointeeType();
392
393  // If it's an enum, get its underlying type.
394  if (const EnumType *ETy = QT->getAs<EnumType>())
395    QT = ETy->getDecl()->getIntegerType();
396
397  const BuiltinType *BT = PT->getAs<BuiltinType>();
398  if (!BT)
399    return false;
400
401  // Pointer to a character.
402  if (PT->isAnyCharacterType()) {
403    CS.setKind(ConversionSpecifier::sArg);
404    if (PT->isWideCharType())
405      LM.setKind(LengthModifier::AsWideChar);
406    else
407      LM.setKind(LengthModifier::None);
408    return true;
409  }
410
411  // Figure out the length modifier.
412  switch (BT->getKind()) {
413    // no modifier
414    case BuiltinType::UInt:
415    case BuiltinType::Int:
416    case BuiltinType::Float:
417      LM.setKind(LengthModifier::None);
418      break;
419
420    // hh
421    case BuiltinType::Char_U:
422    case BuiltinType::UChar:
423    case BuiltinType::Char_S:
424    case BuiltinType::SChar:
425      LM.setKind(LengthModifier::AsChar);
426      break;
427
428    // h
429    case BuiltinType::Short:
430    case BuiltinType::UShort:
431      LM.setKind(LengthModifier::AsShort);
432      break;
433
434    // l
435    case BuiltinType::Long:
436    case BuiltinType::ULong:
437    case BuiltinType::Double:
438      LM.setKind(LengthModifier::AsLong);
439      break;
440
441    // ll
442    case BuiltinType::LongLong:
443    case BuiltinType::ULongLong:
444      LM.setKind(LengthModifier::AsLongLong);
445      break;
446
447    // L
448    case BuiltinType::LongDouble:
449      LM.setKind(LengthModifier::AsLongDouble);
450      break;
451
452    // Don't know.
453    default:
454      return false;
455  }
456
457  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
458  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
459    namedTypeToLengthModifier(PT, LM);
460
461  // If fixing the length modifier was enough, we are done.
462  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
463    const analyze_scanf::ArgType &AT = getArgType(Ctx);
464    if (AT.isValid() && AT.matchesType(Ctx, QT))
465      return true;
466  }
467
468  // Figure out the conversion specifier.
469  if (PT->isRealFloatingType())
470    CS.setKind(ConversionSpecifier::fArg);
471  else if (PT->isSignedIntegerType())
472    CS.setKind(ConversionSpecifier::dArg);
473  else if (PT->isUnsignedIntegerType())
474    CS.setKind(ConversionSpecifier::uArg);
475  else
476    llvm_unreachable("Unexpected type");
477
478  return true;
479}
480
481void ScanfSpecifier::toString(raw_ostream &os) const {
482  os << "%";
483
484  if (usesPositionalArg())
485    os << getPositionalArgIndex() << "$";
486  if (SuppressAssignment)
487    os << "*";
488
489  FieldWidth.toString(os);
490  os << LM.toString();
491  os << CS.toString();
492}
493
494bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
495                                                    const char *I,
496                                                    const char *E,
497                                                    const LangOptions &LO,
498                                                    const TargetInfo &Target) {
499
500  unsigned argIndex = 0;
501
502  // Keep looking for a format specifier until we have exhausted the string.
503  while (I != E) {
504    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
505                                                          LO, Target);
506    // Did a fail-stop error of any kind occur when parsing the specifier?
507    // If so, don't do any more processing.
508    if (FSR.shouldStop())
509      return true;
510      // Did we exhaust the string or encounter an error that
511      // we can recover from?
512    if (!FSR.hasValue())
513      continue;
514      // We have a format specifier.  Pass it to the callback.
515    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
516                                I - FSR.getStart())) {
517      return true;
518    }
519  }
520  assert(I == E && "Format string not exhausted");
521  return false;
522}
523