PrintfFormatString.cpp revision 208954
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::ArgTypeResult;
19using clang::analyze_printf::FormatSpecifier;
20using clang::analyze_printf::FormatStringHandler;
21using clang::analyze_printf::OptionalAmount;
22using clang::analyze_printf::PositionContext;
23
24using namespace clang;
25
26namespace {
27class FormatSpecifierResult {
28  FormatSpecifier FS;
29  const char *Start;
30  bool Stop;
31public:
32  FormatSpecifierResult(bool stop = false)
33    : Start(0), Stop(stop) {}
34  FormatSpecifierResult(const char *start,
35                        const FormatSpecifier &fs)
36    : FS(fs), Start(start), Stop(false) {}
37
38
39  const char *getStart() const { return Start; }
40  bool shouldStop() const { return Stop; }
41  bool hasValue() const { return Start != 0; }
42  const FormatSpecifier &getValue() const {
43    assert(hasValue());
44    return FS;
45  }
46  const FormatSpecifier &getValue() { return FS; }
47};
48} // end anonymous namespace
49
50template <typename T>
51class UpdateOnReturn {
52  T &ValueToUpdate;
53  const T &ValueToCopy;
54public:
55  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
56    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
57
58  ~UpdateOnReturn() {
59    ValueToUpdate = ValueToCopy;
60  }
61};
62
63//===----------------------------------------------------------------------===//
64// Methods for parsing format strings.
65//===----------------------------------------------------------------------===//
66
67static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
68  const char *I = Beg;
69  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
70
71  unsigned accumulator = 0;
72  bool hasDigits = false;
73
74  for ( ; I != E; ++I) {
75    char c = *I;
76    if (c >= '0' && c <= '9') {
77      hasDigits = true;
78      accumulator = (accumulator * 10) + (c - '0');
79      continue;
80    }
81
82    if (hasDigits)
83      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
84
85    break;
86  }
87
88  return OptionalAmount();
89}
90
91static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
92                                             unsigned &argIndex) {
93  if (*Beg == '*') {
94    ++Beg;
95    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
96  }
97
98  return ParseAmount(Beg, E);
99}
100
101static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
102                                          const char *Start,
103                                          const char *&Beg, const char *E,
104                                          PositionContext p) {
105  if (*Beg == '*') {
106    const char *I = Beg + 1;
107    const OptionalAmount &Amt = ParseAmount(I, E);
108
109    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
110      H.HandleInvalidPosition(Beg, I - Beg, p);
111      return OptionalAmount(false);
112    }
113
114    if (I== E) {
115      // No more characters left?
116      H.HandleIncompleteFormatSpecifier(Start, E - Start);
117      return OptionalAmount(false);
118    }
119
120    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
121
122    if (*I == '$') {
123      // Special case: '*0$', since this is an easy mistake.
124      if (Amt.getConstantAmount() == 0) {
125        H.HandleZeroPosition(Beg, I - Beg + 1);
126        return OptionalAmount(false);
127      }
128
129      const char *Tmp = Beg;
130      Beg = ++I;
131
132      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
133                            Tmp);
134    }
135
136    H.HandleInvalidPosition(Beg, I - Beg, p);
137    return OptionalAmount(false);
138  }
139
140  return ParseAmount(Beg, E);
141}
142
143static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
144                           const char *Start, const char *&Beg, const char *E,
145                           unsigned *argIndex) {
146  if (argIndex) {
147    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
148  }
149  else {
150    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
151                                                  analyze_printf::PrecisionPos);
152    if (Amt.isInvalid())
153      return true;
154    FS.setPrecision(Amt);
155  }
156  return false;
157}
158
159static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
160                            const char *Start, const char *&Beg, const char *E,
161                            unsigned *argIndex) {
162  // FIXME: Support negative field widths.
163  if (argIndex) {
164    FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
165  }
166  else {
167    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
168                                                 analyze_printf::FieldWidthPos);
169    if (Amt.isInvalid())
170      return true;
171    FS.setFieldWidth(Amt);
172  }
173  return false;
174}
175
176
177static bool ParseArgPosition(FormatStringHandler &H,
178                             FormatSpecifier &FS, const char *Start,
179                             const char *&Beg, const char *E) {
180
181  using namespace clang::analyze_printf;
182  const char *I = Beg;
183
184  const OptionalAmount &Amt = ParseAmount(I, E);
185
186  if (I == E) {
187    // No more characters left?
188    H.HandleIncompleteFormatSpecifier(Start, E - Start);
189    return true;
190  }
191
192  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
193    // Special case: '%0$', since this is an easy mistake.
194    if (Amt.getConstantAmount() == 0) {
195      H.HandleZeroPosition(Start, I - Start);
196      return true;
197    }
198
199    FS.setArgIndex(Amt.getConstantAmount() - 1);
200    FS.setUsesPositionalArg();
201    // Update the caller's pointer if we decided to consume
202    // these characters.
203    Beg = I;
204    return false;
205  }
206
207  return false;
208}
209
210static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
211                                                  const char *&Beg,
212                                                  const char *E,
213                                                  unsigned &argIndex) {
214
215  using namespace clang::analyze_printf;
216
217  const char *I = Beg;
218  const char *Start = 0;
219  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
220
221  // Look for a '%' character that indicates the start of a format specifier.
222  for ( ; I != E ; ++I) {
223    char c = *I;
224    if (c == '\0') {
225      // Detect spurious null characters, which are likely errors.
226      H.HandleNullChar(I);
227      return true;
228    }
229    if (c == '%') {
230      Start = I++;  // Record the start of the format specifier.
231      break;
232    }
233  }
234
235  // No format specifier found?
236  if (!Start)
237    return false;
238
239  if (I == E) {
240    // No more characters left?
241    H.HandleIncompleteFormatSpecifier(Start, E - Start);
242    return true;
243  }
244
245  FormatSpecifier FS;
246  if (ParseArgPosition(H, FS, Start, I, E))
247    return true;
248
249  if (I == E) {
250    // No more characters left?
251    H.HandleIncompleteFormatSpecifier(Start, E - Start);
252    return true;
253  }
254
255  // Look for flags (if any).
256  bool hasMore = true;
257  for ( ; I != E; ++I) {
258    switch (*I) {
259      default: hasMore = false; break;
260      case '-': FS.setIsLeftJustified(); break;
261      case '+': FS.setHasPlusPrefix(); break;
262      case ' ': FS.setHasSpacePrefix(); break;
263      case '#': FS.setHasAlternativeForm(); break;
264      case '0': FS.setHasLeadingZeros(); break;
265    }
266    if (!hasMore)
267      break;
268  }
269
270  if (I == E) {
271    // No more characters left?
272    H.HandleIncompleteFormatSpecifier(Start, E - Start);
273    return true;
274  }
275
276  // Look for the field width (if any).
277  if (ParseFieldWidth(H, FS, Start, I, E,
278                      FS.usesPositionalArg() ? 0 : &argIndex))
279    return true;
280
281  if (I == E) {
282    // No more characters left?
283    H.HandleIncompleteFormatSpecifier(Start, E - Start);
284    return true;
285  }
286
287  // Look for the precision (if any).
288  if (*I == '.') {
289    ++I;
290    if (I == E) {
291      H.HandleIncompleteFormatSpecifier(Start, E - Start);
292      return true;
293    }
294
295    if (ParsePrecision(H, FS, Start, I, E,
296                       FS.usesPositionalArg() ? 0 : &argIndex))
297      return true;
298
299    if (I == E) {
300      // No more characters left?
301      H.HandleIncompleteFormatSpecifier(Start, E - Start);
302      return true;
303    }
304  }
305
306  // Look for the length modifier.
307  LengthModifier lm = None;
308  switch (*I) {
309    default:
310      break;
311    case 'h':
312      ++I;
313      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
314      break;
315    case 'l':
316      ++I;
317      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
318      break;
319    case 'j': lm = AsIntMax;     ++I; break;
320    case 'z': lm = AsSizeT;      ++I; break;
321    case 't': lm = AsPtrDiff;    ++I; break;
322    case 'L': lm = AsLongDouble; ++I; break;
323    case 'q': lm = AsLongLong;   ++I; break;
324  }
325  FS.setLengthModifier(lm);
326
327  if (I == E) {
328    // No more characters left?
329    H.HandleIncompleteFormatSpecifier(Start, E - Start);
330    return true;
331  }
332
333  if (*I == '\0') {
334    // Detect spurious null characters, which are likely errors.
335    H.HandleNullChar(I);
336    return true;
337  }
338
339  // Finally, look for the conversion specifier.
340  const char *conversionPosition = I++;
341  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
342  switch (*conversionPosition) {
343    default:
344      break;
345    // C99: 7.19.6.1 (section 8).
346    case '%': k = ConversionSpecifier::PercentArg;   break;
347    case 'A': k = ConversionSpecifier::AArg; break;
348    case 'E': k = ConversionSpecifier::EArg; break;
349    case 'F': k = ConversionSpecifier::FArg; break;
350    case 'G': k = ConversionSpecifier::GArg; break;
351    case 'X': k = ConversionSpecifier::XArg; break;
352    case 'a': k = ConversionSpecifier::aArg; break;
353    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
354    case 'd': k = ConversionSpecifier::dArg; break;
355    case 'e': k = ConversionSpecifier::eArg; break;
356    case 'f': k = ConversionSpecifier::fArg; break;
357    case 'g': k = ConversionSpecifier::gArg; break;
358    case 'i': k = ConversionSpecifier::iArg; break;
359    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
360    case 'o': k = ConversionSpecifier::oArg; break;
361    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
362    case 's': k = ConversionSpecifier::CStrArg;      break;
363    case 'u': k = ConversionSpecifier::uArg; break;
364    case 'x': k = ConversionSpecifier::xArg; break;
365    // Mac OS X (unicode) specific
366    case 'C': k = ConversionSpecifier::CArg; break;
367    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
368    // Objective-C.
369    case '@': k = ConversionSpecifier::ObjCObjArg; break;
370    // Glibc specific.
371    case 'm': k = ConversionSpecifier::PrintErrno; break;
372  }
373  ConversionSpecifier CS(conversionPosition, k);
374  FS.setConversionSpecifier(CS);
375  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
376    FS.setArgIndex(argIndex++);
377
378  if (k == ConversionSpecifier::InvalidSpecifier) {
379    // Assume the conversion takes one argument.
380    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
381  }
382  return FormatSpecifierResult(Start, FS);
383}
384
385bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
386                       const char *I, const char *E) {
387
388  unsigned argIndex = 0;
389
390  // Keep looking for a format specifier until we have exhausted the string.
391  while (I != E) {
392    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
393    // Did a fail-stop error of any kind occur when parsing the specifier?
394    // If so, don't do any more processing.
395    if (FSR.shouldStop())
396      return true;;
397    // Did we exhaust the string or encounter an error that
398    // we can recover from?
399    if (!FSR.hasValue())
400      continue;
401    // We have a format specifier.  Pass it to the callback.
402    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
403                                 I - FSR.getStart()))
404      return true;
405  }
406  assert(I == E && "Format string not exhausted");
407  return false;
408}
409
410FormatStringHandler::~FormatStringHandler() {}
411
412//===----------------------------------------------------------------------===//
413// Methods on ArgTypeResult.
414//===----------------------------------------------------------------------===//
415
416bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
417  assert(isValid());
418
419  if (K == UnknownTy)
420    return true;
421
422  if (K == SpecificTy) {
423    argTy = C.getCanonicalType(argTy).getUnqualifiedType();
424
425    if (T == argTy)
426      return true;
427
428    if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
429      switch (BT->getKind()) {
430        default:
431          break;
432        case BuiltinType::Char_S:
433        case BuiltinType::SChar:
434          return T == C.UnsignedCharTy;
435        case BuiltinType::Char_U:
436        case BuiltinType::UChar:
437          return T == C.SignedCharTy;
438        case BuiltinType::Short:
439          return T == C.UnsignedShortTy;
440        case BuiltinType::UShort:
441          return T == C.ShortTy;
442        case BuiltinType::Int:
443          return T == C.UnsignedIntTy;
444        case BuiltinType::UInt:
445          return T == C.IntTy;
446        case BuiltinType::Long:
447          return T == C.UnsignedLongTy;
448        case BuiltinType::ULong:
449          return T == C.LongTy;
450        case BuiltinType::LongLong:
451          return T == C.UnsignedLongLongTy;
452        case BuiltinType::ULongLong:
453          return T == C.LongLongTy;
454      }
455
456    return false;
457  }
458
459  if (K == CStrTy) {
460    const PointerType *PT = argTy->getAs<PointerType>();
461    if (!PT)
462      return false;
463
464    QualType pointeeTy = PT->getPointeeType();
465
466    if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
467      switch (BT->getKind()) {
468        case BuiltinType::Void:
469        case BuiltinType::Char_U:
470        case BuiltinType::UChar:
471        case BuiltinType::Char_S:
472        case BuiltinType::SChar:
473          return true;
474        default:
475          break;
476      }
477
478    return false;
479  }
480
481  if (K == WCStrTy) {
482    const PointerType *PT = argTy->getAs<PointerType>();
483    if (!PT)
484      return false;
485
486    QualType pointeeTy =
487      C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
488
489    return pointeeTy == C.getWCharType();
490  }
491
492  return false;
493}
494
495QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
496  assert(isValid());
497  if (K == SpecificTy)
498    return T;
499  if (K == CStrTy)
500    return C.getPointerType(C.CharTy);
501  if (K == WCStrTy)
502    return C.getPointerType(C.getWCharType());
503  if (K == ObjCPointerTy)
504    return C.ObjCBuiltinIdTy;
505
506  return QualType();
507}
508
509//===----------------------------------------------------------------------===//
510// Methods on OptionalAmount.
511//===----------------------------------------------------------------------===//
512
513ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
514  return Ctx.IntTy;
515}
516
517//===----------------------------------------------------------------------===//
518// Methods on FormatSpecifier.
519//===----------------------------------------------------------------------===//
520
521ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
522  if (!CS.consumesDataArgument())
523    return ArgTypeResult::Invalid();
524
525  if (CS.isIntArg())
526    switch (LM) {
527      case AsLongDouble:
528        return ArgTypeResult::Invalid();
529      case None: return Ctx.IntTy;
530      case AsChar: return Ctx.SignedCharTy;
531      case AsShort: return Ctx.ShortTy;
532      case AsLong: return Ctx.LongTy;
533      case AsLongLong: return Ctx.LongLongTy;
534      case AsIntMax:
535        // FIXME: Return unknown for now.
536        return ArgTypeResult();
537      case AsSizeT: return Ctx.getSizeType();
538      case AsPtrDiff: return Ctx.getPointerDiffType();
539    }
540
541  if (CS.isUIntArg())
542    switch (LM) {
543      case AsLongDouble:
544        return ArgTypeResult::Invalid();
545      case None: return Ctx.UnsignedIntTy;
546      case AsChar: return Ctx.UnsignedCharTy;
547      case AsShort: return Ctx.UnsignedShortTy;
548      case AsLong: return Ctx.UnsignedLongTy;
549      case AsLongLong: return Ctx.UnsignedLongLongTy;
550      case AsIntMax:
551        // FIXME: Return unknown for now.
552        return ArgTypeResult();
553      case AsSizeT:
554        // FIXME: How to get the corresponding unsigned
555        // version of size_t?
556        return ArgTypeResult();
557      case AsPtrDiff:
558        // FIXME: How to get the corresponding unsigned
559        // version of ptrdiff_t?
560        return ArgTypeResult();
561    }
562
563  if (CS.isDoubleArg()) {
564    if (LM == AsLongDouble)
565      return Ctx.LongDoubleTy;
566    return Ctx.DoubleTy;
567  }
568
569  switch (CS.getKind()) {
570    case ConversionSpecifier::CStrArg:
571      return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy                                            : ArgTypeResult::CStrTy);
572    case ConversionSpecifier::UnicodeStrArg:
573      // FIXME: This appears to be Mac OS X specific.
574      return ArgTypeResult::WCStrTy;
575    case ConversionSpecifier::CArg:
576      return Ctx.WCharTy;
577    default:
578      break;
579  }
580
581  // FIXME: Handle other cases.
582  return ArgTypeResult();
583}
584
585