PrintfFormatString.cpp revision 213694
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_printf::PrintfSpecifier;
24
25using namespace clang;
26
27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28        PrintfSpecifierResult;
29
30//===----------------------------------------------------------------------===//
31// Methods for parsing format strings.
32//===----------------------------------------------------------------------===//
33
34using analyze_format_string::ParseNonPositionAmount;
35
36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                           const char *Start, const char *&Beg, const char *E,
38                           unsigned *argIndex) {
39  if (argIndex) {
40    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41  }
42  else {
43    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                           analyze_format_string::PrecisionPos);
45    if (Amt.isInvalid())
46      return true;
47    FS.setPrecision(Amt);
48  }
49  return false;
50}
51
52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                  const char *&Beg,
54                                                  const char *E,
55                                                  unsigned &argIndex,
56                                                  bool FormatExtensions) {
57
58  using namespace clang::analyze_format_string;
59  using namespace clang::analyze_printf;
60
61  const char *I = Beg;
62  const char *Start = 0;
63  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65  // Look for a '%' character that indicates the start of a format specifier.
66  for ( ; I != E ; ++I) {
67    char c = *I;
68    if (c == '\0') {
69      // Detect spurious null characters, which are likely errors.
70      H.HandleNullChar(I);
71      return true;
72    }
73    if (c == '%') {
74      Start = I++;  // Record the start of the format specifier.
75      break;
76    }
77  }
78
79  // No format specifier found?
80  if (!Start)
81    return false;
82
83  if (I == E) {
84    // No more characters left?
85    H.HandleIncompleteSpecifier(Start, E - Start);
86    return true;
87  }
88
89  PrintfSpecifier FS;
90  if (ParseArgPosition(H, FS, Start, I, E))
91    return true;
92
93  if (I == E) {
94    // No more characters left?
95    H.HandleIncompleteSpecifier(Start, E - Start);
96    return true;
97  }
98
99  // Look for flags (if any).
100  bool hasMore = true;
101  for ( ; I != E; ++I) {
102    switch (*I) {
103      default: hasMore = false; break;
104      case '-': FS.setIsLeftJustified(I); break;
105      case '+': FS.setHasPlusPrefix(I); break;
106      case ' ': FS.setHasSpacePrefix(I); break;
107      case '#': FS.setHasAlternativeForm(I); break;
108      case '0': FS.setHasLeadingZeros(I); break;
109    }
110    if (!hasMore)
111      break;
112  }
113
114  if (I == E) {
115    // No more characters left?
116    H.HandleIncompleteSpecifier(Start, E - Start);
117    return true;
118  }
119
120  // Look for the field width (if any).
121  if (ParseFieldWidth(H, FS, Start, I, E,
122                      FS.usesPositionalArg() ? 0 : &argIndex))
123    return true;
124
125  if (I == E) {
126    // No more characters left?
127    H.HandleIncompleteSpecifier(Start, E - Start);
128    return true;
129  }
130
131  // Look for the precision (if any).
132  if (*I == '.') {
133    ++I;
134    if (I == E) {
135      H.HandleIncompleteSpecifier(Start, E - Start);
136      return true;
137    }
138
139    if (ParsePrecision(H, FS, Start, I, E,
140                       FS.usesPositionalArg() ? 0 : &argIndex))
141      return true;
142
143    if (I == E) {
144      // No more characters left?
145      H.HandleIncompleteSpecifier(Start, E - Start);
146      return true;
147    }
148  }
149
150  // Look for the length modifier.
151  if (ParseLengthModifier(FS, I, E) && I == E) {
152    // No more characters left?
153    H.HandleIncompleteSpecifier(Start, E - Start);
154    return true;
155  }
156
157  if (*I == '\0') {
158    // Detect spurious null characters, which are likely errors.
159    H.HandleNullChar(I);
160    return true;
161  }
162
163  // Finally, look for the conversion specifier.
164  const char *conversionPosition = I++;
165  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
166  switch (*conversionPosition) {
167    default:
168      break;
169    // C99: 7.19.6.1 (section 8).
170    case '%': k = ConversionSpecifier::PercentArg;   break;
171    case 'A': k = ConversionSpecifier::AArg; break;
172    case 'E': k = ConversionSpecifier::EArg; break;
173    case 'F': k = ConversionSpecifier::FArg; break;
174    case 'G': k = ConversionSpecifier::GArg; break;
175    case 'X': k = ConversionSpecifier::XArg; break;
176    case 'a': k = ConversionSpecifier::aArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'd': k = ConversionSpecifier::dArg; break;
179    case 'e': k = ConversionSpecifier::eArg; break;
180    case 'f': k = ConversionSpecifier::fArg; break;
181    case 'g': k = ConversionSpecifier::gArg; break;
182    case 'i': k = ConversionSpecifier::iArg; break;
183    case 'n': k = ConversionSpecifier::nArg; break;
184    case 'o': k = ConversionSpecifier::oArg; break;
185    case 'p': k = ConversionSpecifier::pArg;   break;
186    case 's': k = ConversionSpecifier::sArg;      break;
187    case 'u': k = ConversionSpecifier::uArg; break;
188    case 'x': k = ConversionSpecifier::xArg; break;
189    // Mac OS X (unicode) specific
190    case 'C': k = ConversionSpecifier::CArg; break;
191    case 'S': k = ConversionSpecifier::SArg; break;
192    // Objective-C.
193    case '@': k = ConversionSpecifier::ObjCObjArg; break;
194    // Glibc specific.
195    case 'm': k = ConversionSpecifier::PrintErrno; break;
196    // FreeBSD format extensions
197    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
198    case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
199    case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
200    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
201  }
202  PrintfConversionSpecifier CS(conversionPosition, k);
203  FS.setConversionSpecifier(CS);
204  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
205    FS.setArgIndex(argIndex++);
206  // FreeBSD extension
207  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
208    argIndex++;
209
210  if (k == ConversionSpecifier::InvalidSpecifier) {
211    // Assume the conversion takes one argument.
212    return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
213  }
214  return PrintfSpecifierResult(Start, FS);
215}
216
217bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
218                                                     const char *I,
219                                                     const char *E,
220                                                     bool FormatExtensions) {
221
222  unsigned argIndex = 0;
223
224  // Keep looking for a format specifier until we have exhausted the string.
225  while (I != E) {
226    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
227                                                            FormatExtensions);
228    // Did a fail-stop error of any kind occur when parsing the specifier?
229    // If so, don't do any more processing.
230    if (FSR.shouldStop())
231      return true;;
232    // Did we exhaust the string or encounter an error that
233    // we can recover from?
234    if (!FSR.hasValue())
235      continue;
236    // We have a format specifier.  Pass it to the callback.
237    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
238                                 I - FSR.getStart()))
239      return true;
240  }
241  assert(I == E && "Format string not exhausted");
242  return false;
243}
244
245//===----------------------------------------------------------------------===//
246// Methods on ConversionSpecifier.
247//===----------------------------------------------------------------------===//
248const char *ConversionSpecifier::toString() const {
249  switch (kind) {
250  case dArg: return "d";
251  case iArg: return "i";
252  case oArg: return "o";
253  case uArg: return "u";
254  case xArg: return "x";
255  case XArg: return "X";
256  case fArg: return "f";
257  case FArg: return "F";
258  case eArg: return "e";
259  case EArg: return "E";
260  case gArg: return "g";
261  case GArg: return "G";
262  case aArg: return "a";
263  case AArg: return "A";
264  case cArg: return "c";
265  case sArg: return "s";
266  case pArg: return "p";
267  case nArg: return "n";
268  case PercentArg:  return "%";
269  case ScanListArg: return "[";
270  case InvalidSpecifier: return NULL;
271
272  // MacOS X unicode extensions.
273  case CArg: return "C";
274  case SArg: return "S";
275
276  // Objective-C specific specifiers.
277  case ObjCObjArg: return "@";
278
279  // FreeBSD specific specifiers.
280  case bArg: return "b";
281  case DArg: return "D";
282  case rArg: return "r";
283
284  // GlibC specific specifiers.
285  case PrintErrno: return "m";
286  }
287  return NULL;
288}
289
290//===----------------------------------------------------------------------===//
291// Methods on PrintfSpecifier.
292//===----------------------------------------------------------------------===//
293
294ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
295  const PrintfConversionSpecifier &CS = getConversionSpecifier();
296
297  if (!CS.consumesDataArgument())
298    return ArgTypeResult::Invalid();
299
300  if (CS.getKind() == ConversionSpecifier::cArg)
301    switch (LM.getKind()) {
302      case LengthModifier::None: return Ctx.IntTy;
303      case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
304      default:
305        return ArgTypeResult::Invalid();
306    }
307
308  if (CS.isIntArg())
309    switch (LM.getKind()) {
310      case LengthModifier::AsLongDouble:
311        return ArgTypeResult::Invalid();
312      case LengthModifier::None: return Ctx.IntTy;
313      case LengthModifier::AsChar: return Ctx.SignedCharTy;
314      case LengthModifier::AsShort: return Ctx.ShortTy;
315      case LengthModifier::AsLong: return Ctx.LongTy;
316      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
317      case LengthModifier::AsIntMax:
318        // FIXME: Return unknown for now.
319        return ArgTypeResult();
320      case LengthModifier::AsSizeT: return Ctx.getSizeType();
321      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
322    }
323
324  if (CS.isUIntArg())
325    switch (LM.getKind()) {
326      case LengthModifier::AsLongDouble:
327        return ArgTypeResult::Invalid();
328      case LengthModifier::None: return Ctx.UnsignedIntTy;
329      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
330      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
331      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
332      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
333      case LengthModifier::AsIntMax:
334        // FIXME: Return unknown for now.
335        return ArgTypeResult();
336      case LengthModifier::AsSizeT:
337        // FIXME: How to get the corresponding unsigned
338        // version of size_t?
339        return ArgTypeResult();
340      case LengthModifier::AsPtrDiff:
341        // FIXME: How to get the corresponding unsigned
342        // version of ptrdiff_t?
343        return ArgTypeResult();
344    }
345
346  if (CS.isDoubleArg()) {
347    if (LM.getKind() == LengthModifier::AsLongDouble)
348      return Ctx.LongDoubleTy;
349    return Ctx.DoubleTy;
350  }
351
352  switch (CS.getKind()) {
353    case ConversionSpecifier::sArg:
354      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
355          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
356    case ConversionSpecifier::SArg:
357      // FIXME: This appears to be Mac OS X specific.
358      return ArgTypeResult::WCStrTy;
359    case ConversionSpecifier::CArg:
360      return Ctx.WCharTy;
361    case ConversionSpecifier::pArg:
362      return ArgTypeResult::CPointerTy;
363    default:
364      break;
365  }
366
367  // FIXME: Handle other cases.
368  return ArgTypeResult();
369}
370
371bool PrintfSpecifier::fixType(QualType QT) {
372  // Handle strings first (char *, wchar_t *)
373  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
374    CS.setKind(ConversionSpecifier::sArg);
375
376    // Disable irrelevant flags
377    HasAlternativeForm = 0;
378    HasLeadingZeroes = 0;
379
380    // Set the long length modifier for wide characters
381    if (QT->getPointeeType()->isWideCharType())
382      LM.setKind(LengthModifier::AsWideChar);
383
384    return true;
385  }
386
387  // We can only work with builtin types.
388  if (!QT->isBuiltinType())
389    return false;
390
391  // Everything else should be a base type
392  const BuiltinType *BT = QT->getAs<BuiltinType>();
393
394  // Set length modifier
395  switch (BT->getKind()) {
396  default:
397    // The rest of the conversions are either optional or for non-builtin types
398    LM.setKind(LengthModifier::None);
399    break;
400
401  case BuiltinType::WChar:
402  case BuiltinType::Long:
403  case BuiltinType::ULong:
404    LM.setKind(LengthModifier::AsLong);
405    break;
406
407  case BuiltinType::LongLong:
408  case BuiltinType::ULongLong:
409    LM.setKind(LengthModifier::AsLongLong);
410    break;
411
412  case BuiltinType::LongDouble:
413    LM.setKind(LengthModifier::AsLongDouble);
414    break;
415  }
416
417  // Set conversion specifier and disable any flags which do not apply to it.
418  if (QT->isAnyCharacterType()) {
419    CS.setKind(ConversionSpecifier::cArg);
420    Precision.setHowSpecified(OptionalAmount::NotSpecified);
421    HasAlternativeForm = 0;
422    HasLeadingZeroes = 0;
423    HasPlusPrefix = 0;
424  }
425  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
426  else if (QT->isRealFloatingType()) {
427    CS.setKind(ConversionSpecifier::fArg);
428  }
429  else if (QT->isPointerType()) {
430    CS.setKind(ConversionSpecifier::pArg);
431    Precision.setHowSpecified(OptionalAmount::NotSpecified);
432    HasAlternativeForm = 0;
433    HasLeadingZeroes = 0;
434    HasPlusPrefix = 0;
435  }
436  else if (QT->isSignedIntegerType()) {
437    CS.setKind(ConversionSpecifier::dArg);
438    HasAlternativeForm = 0;
439  }
440  else if (QT->isUnsignedIntegerType()) {
441    CS.setKind(ConversionSpecifier::uArg);
442    HasAlternativeForm = 0;
443    HasPlusPrefix = 0;
444  }
445  else {
446    return false;
447  }
448
449  return true;
450}
451
452void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
453  // Whilst some features have no defined order, we are using the order
454  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ��7.19.6.1)
455  os << "%";
456
457  // Positional args
458  if (usesPositionalArg()) {
459    os << getPositionalArgIndex() << "$";
460  }
461
462  // Conversion flags
463  if (IsLeftJustified)    os << "-";
464  if (HasPlusPrefix)      os << "+";
465  if (HasSpacePrefix)     os << " ";
466  if (HasAlternativeForm) os << "#";
467  if (HasLeadingZeroes)   os << "0";
468
469  // Minimum field width
470  FieldWidth.toString(os);
471  // Precision
472  Precision.toString(os);
473  // Length modifier
474  os << LM.toString();
475  // Conversion specifier
476  os << CS.toString();
477}
478
479bool PrintfSpecifier::hasValidPlusPrefix() const {
480  if (!HasPlusPrefix)
481    return true;
482
483  // The plus prefix only makes sense for signed conversions
484  switch (CS.getKind()) {
485  case ConversionSpecifier::dArg:
486  case ConversionSpecifier::iArg:
487  case ConversionSpecifier::fArg:
488  case ConversionSpecifier::FArg:
489  case ConversionSpecifier::eArg:
490  case ConversionSpecifier::EArg:
491  case ConversionSpecifier::gArg:
492  case ConversionSpecifier::GArg:
493  case ConversionSpecifier::aArg:
494  case ConversionSpecifier::AArg:
495  case ConversionSpecifier::rArg:
496    return true;
497
498  default:
499    return false;
500  }
501}
502
503bool PrintfSpecifier::hasValidAlternativeForm() const {
504  if (!HasAlternativeForm)
505    return true;
506
507  // Alternate form flag only valid with the oxaAeEfFgG conversions
508  switch (CS.getKind()) {
509  case ConversionSpecifier::oArg:
510  case ConversionSpecifier::xArg:
511  case ConversionSpecifier::aArg:
512  case ConversionSpecifier::AArg:
513  case ConversionSpecifier::eArg:
514  case ConversionSpecifier::EArg:
515  case ConversionSpecifier::fArg:
516  case ConversionSpecifier::FArg:
517  case ConversionSpecifier::gArg:
518  case ConversionSpecifier::GArg:
519  case ConversionSpecifier::rArg:
520    return true;
521
522  default:
523    return false;
524  }
525}
526
527bool PrintfSpecifier::hasValidLeadingZeros() const {
528  if (!HasLeadingZeroes)
529    return true;
530
531  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
532  switch (CS.getKind()) {
533  case ConversionSpecifier::dArg:
534  case ConversionSpecifier::iArg:
535  case ConversionSpecifier::oArg:
536  case ConversionSpecifier::uArg:
537  case ConversionSpecifier::xArg:
538  case ConversionSpecifier::XArg:
539  case ConversionSpecifier::aArg:
540  case ConversionSpecifier::AArg:
541  case ConversionSpecifier::eArg:
542  case ConversionSpecifier::EArg:
543  case ConversionSpecifier::fArg:
544  case ConversionSpecifier::FArg:
545  case ConversionSpecifier::gArg:
546  case ConversionSpecifier::GArg:
547    return true;
548
549  default:
550    return false;
551  }
552}
553
554bool PrintfSpecifier::hasValidSpacePrefix() const {
555  if (!HasSpacePrefix)
556    return true;
557
558  // The space prefix only makes sense for signed conversions
559  switch (CS.getKind()) {
560  case ConversionSpecifier::dArg:
561  case ConversionSpecifier::iArg:
562  case ConversionSpecifier::fArg:
563  case ConversionSpecifier::FArg:
564  case ConversionSpecifier::eArg:
565  case ConversionSpecifier::EArg:
566  case ConversionSpecifier::gArg:
567  case ConversionSpecifier::GArg:
568  case ConversionSpecifier::aArg:
569  case ConversionSpecifier::AArg:
570    return true;
571
572  default:
573    return false;
574  }
575}
576
577bool PrintfSpecifier::hasValidLeftJustified() const {
578  if (!IsLeftJustified)
579    return true;
580
581  // The left justified flag is valid for all conversions except n
582  switch (CS.getKind()) {
583  case ConversionSpecifier::nArg:
584    return false;
585
586  default:
587    return true;
588  }
589}
590
591bool PrintfSpecifier::hasValidPrecision() const {
592  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
593    return true;
594
595  // Precision is only valid with the diouxXaAeEfFgGs conversions
596  switch (CS.getKind()) {
597  case ConversionSpecifier::dArg:
598  case ConversionSpecifier::iArg:
599  case ConversionSpecifier::oArg:
600  case ConversionSpecifier::uArg:
601  case ConversionSpecifier::xArg:
602  case ConversionSpecifier::XArg:
603  case ConversionSpecifier::aArg:
604  case ConversionSpecifier::AArg:
605  case ConversionSpecifier::eArg:
606  case ConversionSpecifier::EArg:
607  case ConversionSpecifier::fArg:
608  case ConversionSpecifier::FArg:
609  case ConversionSpecifier::gArg:
610  case ConversionSpecifier::GArg:
611  case ConversionSpecifier::sArg:
612    return true;
613
614  default:
615    return false;
616  }
617}
618bool PrintfSpecifier::hasValidFieldWidth() const {
619  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
620      return true;
621
622  // The field width is valid for all conversions except n
623  switch (CS.getKind()) {
624  case ConversionSpecifier::nArg:
625    return false;
626
627  default:
628    return true;
629  }
630}
631