PrintfFormatString.cpp revision 208987
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::ArgTypeResult;
19using clang::analyze_printf::FormatSpecifier;
20using clang::analyze_printf::FormatStringHandler;
21using clang::analyze_printf::OptionalAmount;
22using clang::analyze_printf::PositionContext;
23
24using namespace clang;
25
26namespace {
27class FormatSpecifierResult {
28  FormatSpecifier FS;
29  const char *Start;
30  bool Stop;
31public:
32  FormatSpecifierResult(bool stop = false)
33    : Start(0), Stop(stop) {}
34  FormatSpecifierResult(const char *start,
35                        const FormatSpecifier &fs)
36    : FS(fs), Start(start), Stop(false) {}
37
38
39  const char *getStart() const { return Start; }
40  bool shouldStop() const { return Stop; }
41  bool hasValue() const { return Start != 0; }
42  const FormatSpecifier &getValue() const {
43    assert(hasValue());
44    return FS;
45  }
46  const FormatSpecifier &getValue() { return FS; }
47};
48} // end anonymous namespace
49
50template <typename T>
51class UpdateOnReturn {
52  T &ValueToUpdate;
53  const T &ValueToCopy;
54public:
55  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
56    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
57
58  ~UpdateOnReturn() {
59    ValueToUpdate = ValueToCopy;
60  }
61};
62
63//===----------------------------------------------------------------------===//
64// Methods for parsing format strings.
65//===----------------------------------------------------------------------===//
66
67static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
68  const char *I = Beg;
69  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
70
71  unsigned accumulator = 0;
72  bool hasDigits = false;
73
74  for ( ; I != E; ++I) {
75    char c = *I;
76    if (c >= '0' && c <= '9') {
77      hasDigits = true;
78      accumulator = (accumulator * 10) + (c - '0');
79      continue;
80    }
81
82    if (hasDigits)
83      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
84
85    break;
86  }
87
88  return OptionalAmount();
89}
90
91static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
92                                             unsigned &argIndex) {
93  if (*Beg == '*') {
94    ++Beg;
95    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
96  }
97
98  return ParseAmount(Beg, E);
99}
100
101static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
102                                          const char *Start,
103                                          const char *&Beg, const char *E,
104                                          PositionContext p) {
105  if (*Beg == '*') {
106    const char *I = Beg + 1;
107    const OptionalAmount &Amt = ParseAmount(I, E);
108
109    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
110      H.HandleInvalidPosition(Beg, I - Beg, p);
111      return OptionalAmount(false);
112    }
113
114    if (I== E) {
115      // No more characters left?
116      H.HandleIncompleteFormatSpecifier(Start, E - Start);
117      return OptionalAmount(false);
118    }
119
120    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
121
122    if (*I == '$') {
123      // Special case: '*0$', since this is an easy mistake.
124      if (Amt.getConstantAmount() == 0) {
125        H.HandleZeroPosition(Beg, I - Beg + 1);
126        return OptionalAmount(false);
127      }
128
129      const char *Tmp = Beg;
130      Beg = ++I;
131
132      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
133                            Tmp);
134    }
135
136    H.HandleInvalidPosition(Beg, I - Beg, p);
137    return OptionalAmount(false);
138  }
139
140  return ParseAmount(Beg, E);
141}
142
143static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
144                           const char *Start, const char *&Beg, const char *E,
145                           unsigned *argIndex) {
146  if (argIndex) {
147    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
148  }
149  else {
150    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
151                                                  analyze_printf::PrecisionPos);
152    if (Amt.isInvalid())
153      return true;
154    FS.setPrecision(Amt);
155  }
156  return false;
157}
158
159static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
160                            const char *Start, const char *&Beg, const char *E,
161                            unsigned *argIndex) {
162  // FIXME: Support negative field widths.
163  if (argIndex) {
164    FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
165  }
166  else {
167    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
168                                                 analyze_printf::FieldWidthPos);
169    if (Amt.isInvalid())
170      return true;
171    FS.setFieldWidth(Amt);
172  }
173  return false;
174}
175
176
177static bool ParseArgPosition(FormatStringHandler &H,
178                             FormatSpecifier &FS, const char *Start,
179                             const char *&Beg, const char *E) {
180
181  using namespace clang::analyze_printf;
182  const char *I = Beg;
183
184  const OptionalAmount &Amt = ParseAmount(I, E);
185
186  if (I == E) {
187    // No more characters left?
188    H.HandleIncompleteFormatSpecifier(Start, E - Start);
189    return true;
190  }
191
192  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
193    // Special case: '%0$', since this is an easy mistake.
194    if (Amt.getConstantAmount() == 0) {
195      H.HandleZeroPosition(Start, I - Start);
196      return true;
197    }
198
199    FS.setArgIndex(Amt.getConstantAmount() - 1);
200    FS.setUsesPositionalArg();
201    // Update the caller's pointer if we decided to consume
202    // these characters.
203    Beg = I;
204    return false;
205  }
206
207  return false;
208}
209
210static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
211                                                  const char *&Beg,
212                                                  const char *E,
213                                                  unsigned &argIndex,
214						  bool FormatExtensions) {
215
216  using namespace clang::analyze_printf;
217
218  const char *I = Beg;
219  const char *Start = 0;
220  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
221
222  // Look for a '%' character that indicates the start of a format specifier.
223  for ( ; I != E ; ++I) {
224    char c = *I;
225    if (c == '\0') {
226      // Detect spurious null characters, which are likely errors.
227      H.HandleNullChar(I);
228      return true;
229    }
230    if (c == '%') {
231      Start = I++;  // Record the start of the format specifier.
232      break;
233    }
234  }
235
236  // No format specifier found?
237  if (!Start)
238    return false;
239
240  if (I == E) {
241    // No more characters left?
242    H.HandleIncompleteFormatSpecifier(Start, E - Start);
243    return true;
244  }
245
246  FormatSpecifier FS;
247  if (ParseArgPosition(H, FS, Start, I, E))
248    return true;
249
250  if (I == E) {
251    // No more characters left?
252    H.HandleIncompleteFormatSpecifier(Start, E - Start);
253    return true;
254  }
255
256  // Look for flags (if any).
257  bool hasMore = true;
258  for ( ; I != E; ++I) {
259    switch (*I) {
260      default: hasMore = false; break;
261      case '-': FS.setIsLeftJustified(); break;
262      case '+': FS.setHasPlusPrefix(); break;
263      case ' ': FS.setHasSpacePrefix(); break;
264      case '#': FS.setHasAlternativeForm(); break;
265      case '0': FS.setHasLeadingZeros(); break;
266    }
267    if (!hasMore)
268      break;
269  }
270
271  if (I == E) {
272    // No more characters left?
273    H.HandleIncompleteFormatSpecifier(Start, E - Start);
274    return true;
275  }
276
277  // Look for the field width (if any).
278  if (ParseFieldWidth(H, FS, Start, I, E,
279                      FS.usesPositionalArg() ? 0 : &argIndex))
280    return true;
281
282  if (I == E) {
283    // No more characters left?
284    H.HandleIncompleteFormatSpecifier(Start, E - Start);
285    return true;
286  }
287
288  // Look for the precision (if any).
289  if (*I == '.') {
290    ++I;
291    if (I == E) {
292      H.HandleIncompleteFormatSpecifier(Start, E - Start);
293      return true;
294    }
295
296    if (ParsePrecision(H, FS, Start, I, E,
297                       FS.usesPositionalArg() ? 0 : &argIndex))
298      return true;
299
300    if (I == E) {
301      // No more characters left?
302      H.HandleIncompleteFormatSpecifier(Start, E - Start);
303      return true;
304    }
305  }
306
307  // Look for the length modifier.
308  LengthModifier lm = None;
309  switch (*I) {
310    default:
311      break;
312    case 'h':
313      ++I;
314      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
315      break;
316    case 'l':
317      ++I;
318      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
319      break;
320    case 'j': lm = AsIntMax;     ++I; break;
321    case 'z': lm = AsSizeT;      ++I; break;
322    case 't': lm = AsPtrDiff;    ++I; break;
323    case 'L': lm = AsLongDouble; ++I; break;
324    case 'q': lm = AsLongLong;   ++I; break;
325  }
326  FS.setLengthModifier(lm);
327
328  if (I == E) {
329    // No more characters left?
330    H.HandleIncompleteFormatSpecifier(Start, E - Start);
331    return true;
332  }
333
334  if (*I == '\0') {
335    // Detect spurious null characters, which are likely errors.
336    H.HandleNullChar(I);
337    return true;
338  }
339
340  // Finally, look for the conversion specifier.
341  const char *conversionPosition = I++;
342  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
343  switch (*conversionPosition) {
344    default:
345      break;
346    // C99: 7.19.6.1 (section 8).
347    case '%': k = ConversionSpecifier::PercentArg;   break;
348    case 'A': k = ConversionSpecifier::AArg; break;
349    case 'E': k = ConversionSpecifier::EArg; break;
350    case 'F': k = ConversionSpecifier::FArg; break;
351    case 'G': k = ConversionSpecifier::GArg; break;
352    case 'X': k = ConversionSpecifier::XArg; break;
353    case 'a': k = ConversionSpecifier::aArg; break;
354    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
355    case 'd': k = ConversionSpecifier::dArg; break;
356    case 'e': k = ConversionSpecifier::eArg; break;
357    case 'f': k = ConversionSpecifier::fArg; break;
358    case 'g': k = ConversionSpecifier::gArg; break;
359    case 'i': k = ConversionSpecifier::iArg; break;
360    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
361    case 'o': k = ConversionSpecifier::oArg; break;
362    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
363    case 's': k = ConversionSpecifier::CStrArg;      break;
364    case 'u': k = ConversionSpecifier::uArg; break;
365    case 'x': k = ConversionSpecifier::xArg; break;
366    // Mac OS X (unicode) specific
367    case 'C': k = ConversionSpecifier::CArg; break;
368    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
369    // Objective-C.
370    case '@': k = ConversionSpecifier::ObjCObjArg; break;
371    // Glibc specific.
372    case 'm': k = ConversionSpecifier::PrintErrno; break;
373    // FreeBSD format extensions
374    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
375    case 'r': if (FormatExtensions) k = ConversionSpecifier::xArg; break;
376    case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
377    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
378  }
379  ConversionSpecifier CS(conversionPosition, k);
380  FS.setConversionSpecifier(CS);
381  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
382    FS.setArgIndex(argIndex++);
383  // FreeBSD extension
384  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
385    argIndex++;
386
387  if (k == ConversionSpecifier::InvalidSpecifier) {
388    // Assume the conversion takes one argument.
389    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
390  }
391  return FormatSpecifierResult(Start, FS);
392}
393
394bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
395                       const char *I, const char *E, bool FormatExtensions) {
396
397  unsigned argIndex = 0;
398
399  // Keep looking for a format specifier until we have exhausted the string.
400  while (I != E) {
401    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex, FormatExtensions);
402    // Did a fail-stop error of any kind occur when parsing the specifier?
403    // If so, don't do any more processing.
404    if (FSR.shouldStop())
405      return true;;
406    // Did we exhaust the string or encounter an error that
407    // we can recover from?
408    if (!FSR.hasValue())
409      continue;
410    // We have a format specifier.  Pass it to the callback.
411    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
412                                 I - FSR.getStart()))
413      return true;
414  }
415  assert(I == E && "Format string not exhausted");
416  return false;
417}
418
419FormatStringHandler::~FormatStringHandler() {}
420
421//===----------------------------------------------------------------------===//
422// Methods on ArgTypeResult.
423//===----------------------------------------------------------------------===//
424
425bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
426  assert(isValid());
427
428  if (K == UnknownTy)
429    return true;
430
431  if (K == SpecificTy) {
432    argTy = C.getCanonicalType(argTy).getUnqualifiedType();
433
434    if (T == argTy)
435      return true;
436
437    if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
438      switch (BT->getKind()) {
439        default:
440          break;
441        case BuiltinType::Char_S:
442        case BuiltinType::SChar:
443          return T == C.UnsignedCharTy;
444        case BuiltinType::Char_U:
445        case BuiltinType::UChar:
446          return T == C.SignedCharTy;
447        case BuiltinType::Short:
448          return T == C.UnsignedShortTy;
449        case BuiltinType::UShort:
450          return T == C.ShortTy;
451        case BuiltinType::Int:
452          return T == C.UnsignedIntTy;
453        case BuiltinType::UInt:
454          return T == C.IntTy;
455        case BuiltinType::Long:
456          return T == C.UnsignedLongTy;
457        case BuiltinType::ULong:
458          return T == C.LongTy;
459        case BuiltinType::LongLong:
460          return T == C.UnsignedLongLongTy;
461        case BuiltinType::ULongLong:
462          return T == C.LongLongTy;
463      }
464
465    return false;
466  }
467
468  if (K == CStrTy) {
469    const PointerType *PT = argTy->getAs<PointerType>();
470    if (!PT)
471      return false;
472
473    QualType pointeeTy = PT->getPointeeType();
474
475    if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
476      switch (BT->getKind()) {
477        case BuiltinType::Void:
478        case BuiltinType::Char_U:
479        case BuiltinType::UChar:
480        case BuiltinType::Char_S:
481        case BuiltinType::SChar:
482          return true;
483        default:
484          break;
485      }
486
487    return false;
488  }
489
490  if (K == WCStrTy) {
491    const PointerType *PT = argTy->getAs<PointerType>();
492    if (!PT)
493      return false;
494
495    QualType pointeeTy =
496      C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
497
498    return pointeeTy == C.getWCharType();
499  }
500
501  return false;
502}
503
504QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
505  assert(isValid());
506  if (K == SpecificTy)
507    return T;
508  if (K == CStrTy)
509    return C.getPointerType(C.CharTy);
510  if (K == WCStrTy)
511    return C.getPointerType(C.getWCharType());
512  if (K == ObjCPointerTy)
513    return C.ObjCBuiltinIdTy;
514
515  return QualType();
516}
517
518//===----------------------------------------------------------------------===//
519// Methods on OptionalAmount.
520//===----------------------------------------------------------------------===//
521
522ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
523  return Ctx.IntTy;
524}
525
526//===----------------------------------------------------------------------===//
527// Methods on FormatSpecifier.
528//===----------------------------------------------------------------------===//
529
530ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
531  if (!CS.consumesDataArgument())
532    return ArgTypeResult::Invalid();
533
534  if (CS.isIntArg())
535    switch (LM) {
536      case AsLongDouble:
537        return ArgTypeResult::Invalid();
538      case None: return Ctx.IntTy;
539      case AsChar: return Ctx.SignedCharTy;
540      case AsShort: return Ctx.ShortTy;
541      case AsLong: return Ctx.LongTy;
542      case AsLongLong: return Ctx.LongLongTy;
543      case AsIntMax:
544        // FIXME: Return unknown for now.
545        return ArgTypeResult();
546      case AsSizeT: return Ctx.getSizeType();
547      case AsPtrDiff: return Ctx.getPointerDiffType();
548    }
549
550  if (CS.isUIntArg())
551    switch (LM) {
552      case AsLongDouble:
553        return ArgTypeResult::Invalid();
554      case None: return Ctx.UnsignedIntTy;
555      case AsChar: return Ctx.UnsignedCharTy;
556      case AsShort: return Ctx.UnsignedShortTy;
557      case AsLong: return Ctx.UnsignedLongTy;
558      case AsLongLong: return Ctx.UnsignedLongLongTy;
559      case AsIntMax:
560        // FIXME: Return unknown for now.
561        return ArgTypeResult();
562      case AsSizeT:
563        // FIXME: How to get the corresponding unsigned
564        // version of size_t?
565        return ArgTypeResult();
566      case AsPtrDiff:
567        // FIXME: How to get the corresponding unsigned
568        // version of ptrdiff_t?
569        return ArgTypeResult();
570    }
571
572  if (CS.isDoubleArg()) {
573    if (LM == AsLongDouble)
574      return Ctx.LongDoubleTy;
575    return Ctx.DoubleTy;
576  }
577
578  switch (CS.getKind()) {
579    case ConversionSpecifier::CStrArg:
580      return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy                                            : ArgTypeResult::CStrTy);
581    case ConversionSpecifier::UnicodeStrArg:
582      // FIXME: This appears to be Mac OS X specific.
583      return ArgTypeResult::WCStrTy;
584    case ConversionSpecifier::CArg:
585      return Ctx.WCharTy;
586    default:
587      break;
588  }
589
590  // FIXME: Handle other cases.
591  return ArgTypeResult();
592}
593
594