PrintfFormatString.cpp revision 212904
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_printf::PrintfSpecifier;
24
25using namespace clang;
26
27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28        PrintfSpecifierResult;
29
30//===----------------------------------------------------------------------===//
31// Methods for parsing format strings.
32//===----------------------------------------------------------------------===//
33
34using analyze_format_string::ParseNonPositionAmount;
35
36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                           const char *Start, const char *&Beg, const char *E,
38                           unsigned *argIndex) {
39  if (argIndex) {
40    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41  }
42  else {
43    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                           analyze_format_string::PrecisionPos);
45    if (Amt.isInvalid())
46      return true;
47    FS.setPrecision(Amt);
48  }
49  return false;
50}
51
52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                  const char *&Beg,
54                                                  const char *E,
55                                                  unsigned &argIndex,
56                                                  bool FormatExtensions) {
57
58  using namespace clang::analyze_format_string;
59  using namespace clang::analyze_printf;
60
61  const char *I = Beg;
62  const char *Start = 0;
63  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65  // Look for a '%' character that indicates the start of a format specifier.
66  for ( ; I != E ; ++I) {
67    char c = *I;
68    if (c == '\0') {
69      // Detect spurious null characters, which are likely errors.
70      H.HandleNullChar(I);
71      return true;
72    }
73    if (c == '%') {
74      Start = I++;  // Record the start of the format specifier.
75      break;
76    }
77  }
78
79  // No format specifier found?
80  if (!Start)
81    return false;
82
83  if (I == E) {
84    // No more characters left?
85    H.HandleIncompleteSpecifier(Start, E - Start);
86    return true;
87  }
88
89  PrintfSpecifier FS;
90  if (ParseArgPosition(H, FS, Start, I, E))
91    return true;
92
93  if (I == E) {
94    // No more characters left?
95    H.HandleIncompleteSpecifier(Start, E - Start);
96    return true;
97  }
98
99  // Look for flags (if any).
100  bool hasMore = true;
101  for ( ; I != E; ++I) {
102    switch (*I) {
103      default: hasMore = false; break;
104      case '-': FS.setIsLeftJustified(I); break;
105      case '+': FS.setHasPlusPrefix(I); break;
106      case ' ': FS.setHasSpacePrefix(I); break;
107      case '#': FS.setHasAlternativeForm(I); break;
108      case '0': FS.setHasLeadingZeros(I); break;
109    }
110    if (!hasMore)
111      break;
112  }
113
114  if (I == E) {
115    // No more characters left?
116    H.HandleIncompleteSpecifier(Start, E - Start);
117    return true;
118  }
119
120  // Look for the field width (if any).
121  if (ParseFieldWidth(H, FS, Start, I, E,
122                      FS.usesPositionalArg() ? 0 : &argIndex))
123    return true;
124
125  if (I == E) {
126    // No more characters left?
127    H.HandleIncompleteSpecifier(Start, E - Start);
128    return true;
129  }
130
131  // Look for the precision (if any).
132  if (*I == '.') {
133    ++I;
134    if (I == E) {
135      H.HandleIncompleteSpecifier(Start, E - Start);
136      return true;
137    }
138
139    if (ParsePrecision(H, FS, Start, I, E,
140                       FS.usesPositionalArg() ? 0 : &argIndex))
141      return true;
142
143    if (I == E) {
144      // No more characters left?
145      H.HandleIncompleteSpecifier(Start, E - Start);
146      return true;
147    }
148  }
149
150  // Look for the length modifier.
151  if (ParseLengthModifier(FS, I, E) && I == E) {
152    // No more characters left?
153    H.HandleIncompleteSpecifier(Start, E - Start);
154    return true;
155  }
156
157  if (*I == '\0') {
158    // Detect spurious null characters, which are likely errors.
159    H.HandleNullChar(I);
160    return true;
161  }
162
163  // Finally, look for the conversion specifier.
164  const char *conversionPosition = I++;
165  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
166  switch (*conversionPosition) {
167    default:
168      break;
169    // C99: 7.19.6.1 (section 8).
170    case '%': k = ConversionSpecifier::PercentArg;   break;
171    case 'A': k = ConversionSpecifier::AArg; break;
172    case 'E': k = ConversionSpecifier::EArg; break;
173    case 'F': k = ConversionSpecifier::FArg; break;
174    case 'G': k = ConversionSpecifier::GArg; break;
175    case 'X': k = ConversionSpecifier::XArg; break;
176    case 'a': k = ConversionSpecifier::aArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'd': k = ConversionSpecifier::dArg; break;
179    case 'e': k = ConversionSpecifier::eArg; break;
180    case 'f': k = ConversionSpecifier::fArg; break;
181    case 'g': k = ConversionSpecifier::gArg; break;
182    case 'i': k = ConversionSpecifier::iArg; break;
183    case 'n': k = ConversionSpecifier::nArg; break;
184    case 'o': k = ConversionSpecifier::oArg; break;
185    case 'p': k = ConversionSpecifier::pArg;   break;
186    case 's': k = ConversionSpecifier::sArg;      break;
187    case 'u': k = ConversionSpecifier::uArg; break;
188    case 'x': k = ConversionSpecifier::xArg; break;
189    // Mac OS X (unicode) specific
190    case 'C': k = ConversionSpecifier::CArg; break;
191    case 'S': k = ConversionSpecifier::SArg; break;
192    // Objective-C.
193    case '@': k = ConversionSpecifier::ObjCObjArg; break;
194    // Glibc specific.
195    case 'm': k = ConversionSpecifier::PrintErrno; break;
196    // FreeBSD format extensions
197    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
198    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
199  }
200  PrintfConversionSpecifier CS(conversionPosition, k);
201  FS.setConversionSpecifier(CS);
202  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
203    FS.setArgIndex(argIndex++);
204  // FreeBSD extension
205  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
206    argIndex++;
207
208  if (k == ConversionSpecifier::InvalidSpecifier) {
209    // Assume the conversion takes one argument.
210    return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
211  }
212  return PrintfSpecifierResult(Start, FS);
213}
214
215bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
216                                                     const char *I,
217                                                     const char *E,
218                                                     bool FormatExtensions) {
219
220  unsigned argIndex = 0;
221
222  // Keep looking for a format specifier until we have exhausted the string.
223  while (I != E) {
224    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
225                                                            FormatExtensions);
226    // Did a fail-stop error of any kind occur when parsing the specifier?
227    // If so, don't do any more processing.
228    if (FSR.shouldStop())
229      return true;;
230    // Did we exhaust the string or encounter an error that
231    // we can recover from?
232    if (!FSR.hasValue())
233      continue;
234    // We have a format specifier.  Pass it to the callback.
235    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
236                                 I - FSR.getStart()))
237      return true;
238  }
239  assert(I == E && "Format string not exhausted");
240  return false;
241}
242
243//===----------------------------------------------------------------------===//
244// Methods on ConversionSpecifier.
245//===----------------------------------------------------------------------===//
246const char *ConversionSpecifier::toString() const {
247  switch (kind) {
248  case dArg: return "d";
249  case iArg: return "i";
250  case oArg: return "o";
251  case uArg: return "u";
252  case xArg: return "x";
253  case XArg: return "X";
254  case fArg: return "f";
255  case FArg: return "F";
256  case eArg: return "e";
257  case EArg: return "E";
258  case gArg: return "g";
259  case GArg: return "G";
260  case aArg: return "a";
261  case AArg: return "A";
262  case cArg: return "c";
263  case sArg: return "s";
264  case pArg: return "p";
265  case nArg: return "n";
266  case PercentArg:  return "%";
267  case ScanListArg: return "[";
268  case InvalidSpecifier: return NULL;
269
270  // MacOS X unicode extensions.
271  case CArg: return "C";
272  case SArg: return "S";
273
274  // Objective-C specific specifiers.
275  case ObjCObjArg: return "@";
276
277  // FreeBSD specific specifiers.
278  case bArg: return "b";
279  case DArg: return "D";
280
281  // GlibC specific specifiers.
282  case PrintErrno: return "m";
283  }
284  return NULL;
285}
286
287//===----------------------------------------------------------------------===//
288// Methods on PrintfSpecifier.
289//===----------------------------------------------------------------------===//
290
291ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
292  const PrintfConversionSpecifier &CS = getConversionSpecifier();
293
294  if (!CS.consumesDataArgument())
295    return ArgTypeResult::Invalid();
296
297  if (CS.getKind() == ConversionSpecifier::cArg)
298    switch (LM.getKind()) {
299      case LengthModifier::None: return Ctx.IntTy;
300      case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
301      default:
302        return ArgTypeResult::Invalid();
303    }
304
305  if (CS.isIntArg())
306    switch (LM.getKind()) {
307      case LengthModifier::AsLongDouble:
308        return ArgTypeResult::Invalid();
309      case LengthModifier::None: return Ctx.IntTy;
310      case LengthModifier::AsChar: return Ctx.SignedCharTy;
311      case LengthModifier::AsShort: return Ctx.ShortTy;
312      case LengthModifier::AsLong: return Ctx.LongTy;
313      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
314      case LengthModifier::AsIntMax:
315        // FIXME: Return unknown for now.
316        return ArgTypeResult();
317      case LengthModifier::AsSizeT: return Ctx.getSizeType();
318      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
319    }
320
321  if (CS.isUIntArg())
322    switch (LM.getKind()) {
323      case LengthModifier::AsLongDouble:
324        return ArgTypeResult::Invalid();
325      case LengthModifier::None: return Ctx.UnsignedIntTy;
326      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
327      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
328      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
329      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
330      case LengthModifier::AsIntMax:
331        // FIXME: Return unknown for now.
332        return ArgTypeResult();
333      case LengthModifier::AsSizeT:
334        // FIXME: How to get the corresponding unsigned
335        // version of size_t?
336        return ArgTypeResult();
337      case LengthModifier::AsPtrDiff:
338        // FIXME: How to get the corresponding unsigned
339        // version of ptrdiff_t?
340        return ArgTypeResult();
341    }
342
343  if (CS.isDoubleArg()) {
344    if (LM.getKind() == LengthModifier::AsLongDouble)
345      return Ctx.LongDoubleTy;
346    return Ctx.DoubleTy;
347  }
348
349  switch (CS.getKind()) {
350    case ConversionSpecifier::sArg:
351      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
352          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
353    case ConversionSpecifier::SArg:
354      // FIXME: This appears to be Mac OS X specific.
355      return ArgTypeResult::WCStrTy;
356    case ConversionSpecifier::CArg:
357      return Ctx.WCharTy;
358    case ConversionSpecifier::pArg:
359      return ArgTypeResult::CPointerTy;
360    default:
361      break;
362  }
363
364  // FIXME: Handle other cases.
365  return ArgTypeResult();
366}
367
368bool PrintfSpecifier::fixType(QualType QT) {
369  // Handle strings first (char *, wchar_t *)
370  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
371    CS.setKind(ConversionSpecifier::sArg);
372
373    // Disable irrelevant flags
374    HasAlternativeForm = 0;
375    HasLeadingZeroes = 0;
376
377    // Set the long length modifier for wide characters
378    if (QT->getPointeeType()->isWideCharType())
379      LM.setKind(LengthModifier::AsWideChar);
380
381    return true;
382  }
383
384  // We can only work with builtin types.
385  if (!QT->isBuiltinType())
386    return false;
387
388  // Everything else should be a base type
389  const BuiltinType *BT = QT->getAs<BuiltinType>();
390
391  // Set length modifier
392  switch (BT->getKind()) {
393  default:
394    // The rest of the conversions are either optional or for non-builtin types
395    LM.setKind(LengthModifier::None);
396    break;
397
398  case BuiltinType::WChar:
399  case BuiltinType::Long:
400  case BuiltinType::ULong:
401    LM.setKind(LengthModifier::AsLong);
402    break;
403
404  case BuiltinType::LongLong:
405  case BuiltinType::ULongLong:
406    LM.setKind(LengthModifier::AsLongLong);
407    break;
408
409  case BuiltinType::LongDouble:
410    LM.setKind(LengthModifier::AsLongDouble);
411    break;
412  }
413
414  // Set conversion specifier and disable any flags which do not apply to it.
415  if (QT->isAnyCharacterType()) {
416    CS.setKind(ConversionSpecifier::cArg);
417    Precision.setHowSpecified(OptionalAmount::NotSpecified);
418    HasAlternativeForm = 0;
419    HasLeadingZeroes = 0;
420    HasPlusPrefix = 0;
421  }
422  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
423  else if (QT->isRealFloatingType()) {
424    CS.setKind(ConversionSpecifier::fArg);
425  }
426  else if (QT->isPointerType()) {
427    CS.setKind(ConversionSpecifier::pArg);
428    Precision.setHowSpecified(OptionalAmount::NotSpecified);
429    HasAlternativeForm = 0;
430    HasLeadingZeroes = 0;
431    HasPlusPrefix = 0;
432  }
433  else if (QT->isSignedIntegerType()) {
434    CS.setKind(ConversionSpecifier::dArg);
435    HasAlternativeForm = 0;
436  }
437  else if (QT->isUnsignedIntegerType()) {
438    CS.setKind(ConversionSpecifier::uArg);
439    HasAlternativeForm = 0;
440    HasPlusPrefix = 0;
441  }
442  else {
443    return false;
444  }
445
446  return true;
447}
448
449void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
450  // Whilst some features have no defined order, we are using the order
451  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ��7.19.6.1)
452  os << "%";
453
454  // Positional args
455  if (usesPositionalArg()) {
456    os << getPositionalArgIndex() << "$";
457  }
458
459  // Conversion flags
460  if (IsLeftJustified)    os << "-";
461  if (HasPlusPrefix)      os << "+";
462  if (HasSpacePrefix)     os << " ";
463  if (HasAlternativeForm) os << "#";
464  if (HasLeadingZeroes)   os << "0";
465
466  // Minimum field width
467  FieldWidth.toString(os);
468  // Precision
469  Precision.toString(os);
470  // Length modifier
471  os << LM.toString();
472  // Conversion specifier
473  os << CS.toString();
474}
475
476bool PrintfSpecifier::hasValidPlusPrefix() const {
477  if (!HasPlusPrefix)
478    return true;
479
480  // The plus prefix only makes sense for signed conversions
481  switch (CS.getKind()) {
482  case ConversionSpecifier::dArg:
483  case ConversionSpecifier::iArg:
484  case ConversionSpecifier::fArg:
485  case ConversionSpecifier::FArg:
486  case ConversionSpecifier::eArg:
487  case ConversionSpecifier::EArg:
488  case ConversionSpecifier::gArg:
489  case ConversionSpecifier::GArg:
490  case ConversionSpecifier::aArg:
491  case ConversionSpecifier::AArg:
492    return true;
493
494  default:
495    return false;
496  }
497}
498
499bool PrintfSpecifier::hasValidAlternativeForm() const {
500  if (!HasAlternativeForm)
501    return true;
502
503  // Alternate form flag only valid with the oxaAeEfFgG conversions
504  switch (CS.getKind()) {
505  case ConversionSpecifier::oArg:
506  case ConversionSpecifier::xArg:
507  case ConversionSpecifier::aArg:
508  case ConversionSpecifier::AArg:
509  case ConversionSpecifier::eArg:
510  case ConversionSpecifier::EArg:
511  case ConversionSpecifier::fArg:
512  case ConversionSpecifier::FArg:
513  case ConversionSpecifier::gArg:
514  case ConversionSpecifier::GArg:
515    return true;
516
517  default:
518    return false;
519  }
520}
521
522bool PrintfSpecifier::hasValidLeadingZeros() const {
523  if (!HasLeadingZeroes)
524    return true;
525
526  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
527  switch (CS.getKind()) {
528  case ConversionSpecifier::dArg:
529  case ConversionSpecifier::iArg:
530  case ConversionSpecifier::oArg:
531  case ConversionSpecifier::uArg:
532  case ConversionSpecifier::xArg:
533  case ConversionSpecifier::XArg:
534  case ConversionSpecifier::aArg:
535  case ConversionSpecifier::AArg:
536  case ConversionSpecifier::eArg:
537  case ConversionSpecifier::EArg:
538  case ConversionSpecifier::fArg:
539  case ConversionSpecifier::FArg:
540  case ConversionSpecifier::gArg:
541  case ConversionSpecifier::GArg:
542    return true;
543
544  default:
545    return false;
546  }
547}
548
549bool PrintfSpecifier::hasValidSpacePrefix() const {
550  if (!HasSpacePrefix)
551    return true;
552
553  // The space prefix only makes sense for signed conversions
554  switch (CS.getKind()) {
555  case ConversionSpecifier::dArg:
556  case ConversionSpecifier::iArg:
557  case ConversionSpecifier::fArg:
558  case ConversionSpecifier::FArg:
559  case ConversionSpecifier::eArg:
560  case ConversionSpecifier::EArg:
561  case ConversionSpecifier::gArg:
562  case ConversionSpecifier::GArg:
563  case ConversionSpecifier::aArg:
564  case ConversionSpecifier::AArg:
565    return true;
566
567  default:
568    return false;
569  }
570}
571
572bool PrintfSpecifier::hasValidLeftJustified() const {
573  if (!IsLeftJustified)
574    return true;
575
576  // The left justified flag is valid for all conversions except n
577  switch (CS.getKind()) {
578  case ConversionSpecifier::nArg:
579    return false;
580
581  default:
582    return true;
583  }
584}
585
586bool PrintfSpecifier::hasValidPrecision() const {
587  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
588    return true;
589
590  // Precision is only valid with the diouxXaAeEfFgGs conversions
591  switch (CS.getKind()) {
592  case ConversionSpecifier::dArg:
593  case ConversionSpecifier::iArg:
594  case ConversionSpecifier::oArg:
595  case ConversionSpecifier::uArg:
596  case ConversionSpecifier::xArg:
597  case ConversionSpecifier::XArg:
598  case ConversionSpecifier::aArg:
599  case ConversionSpecifier::AArg:
600  case ConversionSpecifier::eArg:
601  case ConversionSpecifier::EArg:
602  case ConversionSpecifier::fArg:
603  case ConversionSpecifier::FArg:
604  case ConversionSpecifier::gArg:
605  case ConversionSpecifier::GArg:
606  case ConversionSpecifier::sArg:
607    return true;
608
609  default:
610    return false;
611  }
612}
613bool PrintfSpecifier::hasValidFieldWidth() const {
614  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
615      return true;
616
617  // The field width is valid for all conversions except n
618  switch (CS.getKind()) {
619  case ConversionSpecifier::nArg:
620    return false;
621
622  default:
623    return true;
624  }
625}
626