PrintfFormatString.cpp revision 218893
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_printf::PrintfSpecifier;
24
25using namespace clang;
26
27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28        PrintfSpecifierResult;
29
30//===----------------------------------------------------------------------===//
31// Methods for parsing format strings.
32//===----------------------------------------------------------------------===//
33
34using analyze_format_string::ParseNonPositionAmount;
35
36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                           const char *Start, const char *&Beg, const char *E,
38                           unsigned *argIndex) {
39  if (argIndex) {
40    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41  }
42  else {
43    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                           analyze_format_string::PrecisionPos);
45    if (Amt.isInvalid())
46      return true;
47    FS.setPrecision(Amt);
48  }
49  return false;
50}
51
52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                  const char *&Beg,
54                                                  const char *E,
55                                                  unsigned &argIndex,
56                                                  bool FormatExtensions) {
57
58  using namespace clang::analyze_format_string;
59  using namespace clang::analyze_printf;
60
61  const char *I = Beg;
62  const char *Start = 0;
63  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65  // Look for a '%' character that indicates the start of a format specifier.
66  for ( ; I != E ; ++I) {
67    char c = *I;
68    if (c == '\0') {
69      // Detect spurious null characters, which are likely errors.
70      H.HandleNullChar(I);
71      return true;
72    }
73    if (c == '%') {
74      Start = I++;  // Record the start of the format specifier.
75      break;
76    }
77  }
78
79  // No format specifier found?
80  if (!Start)
81    return false;
82
83  if (I == E) {
84    // No more characters left?
85    H.HandleIncompleteSpecifier(Start, E - Start);
86    return true;
87  }
88
89  PrintfSpecifier FS;
90  if (ParseArgPosition(H, FS, Start, I, E))
91    return true;
92
93  if (I == E) {
94    // No more characters left?
95    H.HandleIncompleteSpecifier(Start, E - Start);
96    return true;
97  }
98
99  // Look for flags (if any).
100  bool hasMore = true;
101  for ( ; I != E; ++I) {
102    switch (*I) {
103      default: hasMore = false; break;
104      case '\'':
105        // FIXME: POSIX specific.  Always accept?
106        FS.setHasThousandsGrouping(I);
107        break;
108      case '-': FS.setIsLeftJustified(I); break;
109      case '+': FS.setHasPlusPrefix(I); break;
110      case ' ': FS.setHasSpacePrefix(I); break;
111      case '#': FS.setHasAlternativeForm(I); break;
112      case '0': FS.setHasLeadingZeros(I); break;
113    }
114    if (!hasMore)
115      break;
116  }
117
118  if (I == E) {
119    // No more characters left?
120    H.HandleIncompleteSpecifier(Start, E - Start);
121    return true;
122  }
123
124  // Look for the field width (if any).
125  if (ParseFieldWidth(H, FS, Start, I, E,
126                      FS.usesPositionalArg() ? 0 : &argIndex))
127    return true;
128
129  if (I == E) {
130    // No more characters left?
131    H.HandleIncompleteSpecifier(Start, E - Start);
132    return true;
133  }
134
135  // Look for the precision (if any).
136  if (*I == '.') {
137    ++I;
138    if (I == E) {
139      H.HandleIncompleteSpecifier(Start, E - Start);
140      return true;
141    }
142
143    if (ParsePrecision(H, FS, Start, I, E,
144                       FS.usesPositionalArg() ? 0 : &argIndex))
145      return true;
146
147    if (I == E) {
148      // No more characters left?
149      H.HandleIncompleteSpecifier(Start, E - Start);
150      return true;
151    }
152  }
153
154  // Look for the length modifier.
155  if (ParseLengthModifier(FS, I, E) && I == E) {
156    // No more characters left?
157    H.HandleIncompleteSpecifier(Start, E - Start);
158    return true;
159  }
160
161  if (*I == '\0') {
162    // Detect spurious null characters, which are likely errors.
163    H.HandleNullChar(I);
164    return true;
165  }
166
167  // Finally, look for the conversion specifier.
168  const char *conversionPosition = I++;
169  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
170  switch (*conversionPosition) {
171    default:
172      break;
173    // C99: 7.19.6.1 (section 8).
174    case '%': k = ConversionSpecifier::PercentArg;   break;
175    case 'A': k = ConversionSpecifier::AArg; break;
176    case 'E': k = ConversionSpecifier::EArg; break;
177    case 'F': k = ConversionSpecifier::FArg; break;
178    case 'G': k = ConversionSpecifier::GArg; break;
179    case 'X': k = ConversionSpecifier::XArg; break;
180    case 'a': k = ConversionSpecifier::aArg; break;
181    case 'c': k = ConversionSpecifier::cArg; break;
182    case 'd': k = ConversionSpecifier::dArg; break;
183    case 'e': k = ConversionSpecifier::eArg; break;
184    case 'f': k = ConversionSpecifier::fArg; break;
185    case 'g': k = ConversionSpecifier::gArg; break;
186    case 'i': k = ConversionSpecifier::iArg; break;
187    case 'n': k = ConversionSpecifier::nArg; break;
188    case 'o': k = ConversionSpecifier::oArg; break;
189    case 'p': k = ConversionSpecifier::pArg;   break;
190    case 's': k = ConversionSpecifier::sArg;      break;
191    case 'u': k = ConversionSpecifier::uArg; break;
192    case 'x': k = ConversionSpecifier::xArg; break;
193    // POSIX specific.
194    case 'C': k = ConversionSpecifier::CArg; break;
195    case 'S': k = ConversionSpecifier::SArg; break;
196    // Objective-C.
197    case '@': k = ConversionSpecifier::ObjCObjArg; break;
198    // Glibc specific.
199    case 'm': k = ConversionSpecifier::PrintErrno; break;
200    // FreeBSD format extensions
201    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
202    case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
203    case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
204    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
205  }
206  PrintfConversionSpecifier CS(conversionPosition, k);
207  FS.setConversionSpecifier(CS);
208  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
209    FS.setArgIndex(argIndex++);
210  // FreeBSD extension
211  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
212    argIndex++;
213
214  if (k == ConversionSpecifier::InvalidSpecifier) {
215    // Assume the conversion takes one argument.
216    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
217  }
218  return PrintfSpecifierResult(Start, FS);
219}
220
221bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
222                                                     const char *I,
223                                                     const char *E,
224                                                     bool FormatExtensions) {
225
226  unsigned argIndex = 0;
227
228  // Keep looking for a format specifier until we have exhausted the string.
229  while (I != E) {
230    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
231                                                            FormatExtensions);
232    // Did a fail-stop error of any kind occur when parsing the specifier?
233    // If so, don't do any more processing.
234    if (FSR.shouldStop())
235      return true;;
236    // Did we exhaust the string or encounter an error that
237    // we can recover from?
238    if (!FSR.hasValue())
239      continue;
240    // We have a format specifier.  Pass it to the callback.
241    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
242                                 I - FSR.getStart()))
243      return true;
244  }
245  assert(I == E && "Format string not exhausted");
246  return false;
247}
248
249//===----------------------------------------------------------------------===//
250// Methods on ConversionSpecifier.
251//===----------------------------------------------------------------------===//
252const char *ConversionSpecifier::toString() const {
253  switch (kind) {
254  case dArg: return "d";
255  case iArg: return "i";
256  case oArg: return "o";
257  case uArg: return "u";
258  case xArg: return "x";
259  case XArg: return "X";
260  case fArg: return "f";
261  case FArg: return "F";
262  case eArg: return "e";
263  case EArg: return "E";
264  case gArg: return "g";
265  case GArg: return "G";
266  case aArg: return "a";
267  case AArg: return "A";
268  case cArg: return "c";
269  case sArg: return "s";
270  case pArg: return "p";
271  case nArg: return "n";
272  case PercentArg:  return "%";
273  case ScanListArg: return "[";
274  case InvalidSpecifier: return NULL;
275
276  // MacOS X unicode extensions.
277  case CArg: return "C";
278  case SArg: return "S";
279
280  // Objective-C specific specifiers.
281  case ObjCObjArg: return "@";
282
283  // FreeBSD specific specifiers.
284  case bArg: return "b";
285  case DArg: return "D";
286  case rArg: return "r";
287
288  // GlibC specific specifiers.
289  case PrintErrno: return "m";
290  }
291  return NULL;
292}
293
294//===----------------------------------------------------------------------===//
295// Methods on PrintfSpecifier.
296//===----------------------------------------------------------------------===//
297
298ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
299  const PrintfConversionSpecifier &CS = getConversionSpecifier();
300
301  if (!CS.consumesDataArgument())
302    return ArgTypeResult::Invalid();
303
304  if (CS.getKind() == ConversionSpecifier::cArg)
305    switch (LM.getKind()) {
306      case LengthModifier::None: return Ctx.IntTy;
307      case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
308      default:
309        return ArgTypeResult::Invalid();
310    }
311
312  if (CS.isIntArg())
313    switch (LM.getKind()) {
314      case LengthModifier::AsLongDouble:
315        return ArgTypeResult::Invalid();
316      case LengthModifier::None: return Ctx.IntTy;
317      case LengthModifier::AsChar: return Ctx.SignedCharTy;
318      case LengthModifier::AsShort: return Ctx.ShortTy;
319      case LengthModifier::AsLong: return Ctx.LongTy;
320      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
321      case LengthModifier::AsIntMax:
322        // FIXME: Return unknown for now.
323        return ArgTypeResult();
324      case LengthModifier::AsSizeT: return Ctx.getSizeType();
325      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
326    }
327
328  if (CS.isUIntArg())
329    switch (LM.getKind()) {
330      case LengthModifier::AsLongDouble:
331        return ArgTypeResult::Invalid();
332      case LengthModifier::None: return Ctx.UnsignedIntTy;
333      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
334      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
335      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
336      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
337      case LengthModifier::AsIntMax:
338        // FIXME: Return unknown for now.
339        return ArgTypeResult();
340      case LengthModifier::AsSizeT:
341        // FIXME: How to get the corresponding unsigned
342        // version of size_t?
343        return ArgTypeResult();
344      case LengthModifier::AsPtrDiff:
345        // FIXME: How to get the corresponding unsigned
346        // version of ptrdiff_t?
347        return ArgTypeResult();
348    }
349
350  if (CS.isDoubleArg()) {
351    if (LM.getKind() == LengthModifier::AsLongDouble)
352      return Ctx.LongDoubleTy;
353    return Ctx.DoubleTy;
354  }
355
356  switch (CS.getKind()) {
357    case ConversionSpecifier::sArg:
358      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
359          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
360    case ConversionSpecifier::SArg:
361      // FIXME: This appears to be Mac OS X specific.
362      return ArgTypeResult::WCStrTy;
363    case ConversionSpecifier::CArg:
364      return Ctx.WCharTy;
365    case ConversionSpecifier::pArg:
366      return ArgTypeResult::CPointerTy;
367    default:
368      break;
369  }
370
371  // FIXME: Handle other cases.
372  return ArgTypeResult();
373}
374
375bool PrintfSpecifier::fixType(QualType QT) {
376  // Handle strings first (char *, wchar_t *)
377  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
378    CS.setKind(ConversionSpecifier::sArg);
379
380    // Disable irrelevant flags
381    HasAlternativeForm = 0;
382    HasLeadingZeroes = 0;
383
384    // Set the long length modifier for wide characters
385    if (QT->getPointeeType()->isWideCharType())
386      LM.setKind(LengthModifier::AsWideChar);
387
388    return true;
389  }
390
391  // We can only work with builtin types.
392  if (!QT->isBuiltinType())
393    return false;
394
395  // Everything else should be a base type
396  const BuiltinType *BT = QT->getAs<BuiltinType>();
397
398  // Set length modifier
399  switch (BT->getKind()) {
400  default:
401    // The rest of the conversions are either optional or for non-builtin types
402    LM.setKind(LengthModifier::None);
403    break;
404
405  case BuiltinType::Char_U:
406  case BuiltinType::UChar:
407  case BuiltinType::Char_S:
408  case BuiltinType::SChar:
409    LM.setKind(LengthModifier::AsChar);
410    break;
411
412  case BuiltinType::Short:
413  case BuiltinType::UShort:
414    LM.setKind(LengthModifier::AsShort);
415    break;
416
417  case BuiltinType::WChar_S:
418  case BuiltinType::WChar_U:
419  case BuiltinType::Long:
420  case BuiltinType::ULong:
421    LM.setKind(LengthModifier::AsLong);
422    break;
423
424  case BuiltinType::LongLong:
425  case BuiltinType::ULongLong:
426    LM.setKind(LengthModifier::AsLongLong);
427    break;
428
429  case BuiltinType::LongDouble:
430    LM.setKind(LengthModifier::AsLongDouble);
431    break;
432  }
433
434  // Set conversion specifier and disable any flags which do not apply to it.
435  // Let typedefs to char fall through to int, as %c is silly for uint8_t.
436  if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
437    CS.setKind(ConversionSpecifier::cArg);
438    LM.setKind(LengthModifier::None);
439    Precision.setHowSpecified(OptionalAmount::NotSpecified);
440    HasAlternativeForm = 0;
441    HasLeadingZeroes = 0;
442    HasPlusPrefix = 0;
443  }
444  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
445  else if (QT->isRealFloatingType()) {
446    CS.setKind(ConversionSpecifier::fArg);
447  }
448  else if (QT->isPointerType()) {
449    CS.setKind(ConversionSpecifier::pArg);
450    Precision.setHowSpecified(OptionalAmount::NotSpecified);
451    HasAlternativeForm = 0;
452    HasLeadingZeroes = 0;
453    HasPlusPrefix = 0;
454  }
455  else if (QT->isSignedIntegerType()) {
456    CS.setKind(ConversionSpecifier::dArg);
457    HasAlternativeForm = 0;
458  }
459  else if (QT->isUnsignedIntegerType()) {
460    CS.setKind(ConversionSpecifier::uArg);
461    HasAlternativeForm = 0;
462    HasPlusPrefix = 0;
463  }
464  else {
465    return false;
466  }
467
468  return true;
469}
470
471void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
472  // Whilst some features have no defined order, we are using the order
473  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
474  os << "%";
475
476  // Positional args
477  if (usesPositionalArg()) {
478    os << getPositionalArgIndex() << "$";
479  }
480
481  // Conversion flags
482  if (IsLeftJustified)    os << "-";
483  if (HasPlusPrefix)      os << "+";
484  if (HasSpacePrefix)     os << " ";
485  if (HasAlternativeForm) os << "#";
486  if (HasLeadingZeroes)   os << "0";
487
488  // Minimum field width
489  FieldWidth.toString(os);
490  // Precision
491  Precision.toString(os);
492  // Length modifier
493  os << LM.toString();
494  // Conversion specifier
495  os << CS.toString();
496}
497
498bool PrintfSpecifier::hasValidPlusPrefix() const {
499  if (!HasPlusPrefix)
500    return true;
501
502  // The plus prefix only makes sense for signed conversions
503  switch (CS.getKind()) {
504  case ConversionSpecifier::dArg:
505  case ConversionSpecifier::iArg:
506  case ConversionSpecifier::fArg:
507  case ConversionSpecifier::FArg:
508  case ConversionSpecifier::eArg:
509  case ConversionSpecifier::EArg:
510  case ConversionSpecifier::gArg:
511  case ConversionSpecifier::GArg:
512  case ConversionSpecifier::aArg:
513  case ConversionSpecifier::AArg:
514  case ConversionSpecifier::rArg:
515    return true;
516
517  default:
518    return false;
519  }
520}
521
522bool PrintfSpecifier::hasValidAlternativeForm() const {
523  if (!HasAlternativeForm)
524    return true;
525
526  // Alternate form flag only valid with the oxXaAeEfFgG conversions
527  switch (CS.getKind()) {
528  case ConversionSpecifier::oArg:
529  case ConversionSpecifier::xArg:
530  case ConversionSpecifier::XArg:
531  case ConversionSpecifier::aArg:
532  case ConversionSpecifier::AArg:
533  case ConversionSpecifier::eArg:
534  case ConversionSpecifier::EArg:
535  case ConversionSpecifier::fArg:
536  case ConversionSpecifier::FArg:
537  case ConversionSpecifier::gArg:
538  case ConversionSpecifier::GArg:
539  case ConversionSpecifier::rArg:
540    return true;
541
542  default:
543    return false;
544  }
545}
546
547bool PrintfSpecifier::hasValidLeadingZeros() const {
548  if (!HasLeadingZeroes)
549    return true;
550
551  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
552  switch (CS.getKind()) {
553  case ConversionSpecifier::dArg:
554  case ConversionSpecifier::iArg:
555  case ConversionSpecifier::oArg:
556  case ConversionSpecifier::uArg:
557  case ConversionSpecifier::xArg:
558  case ConversionSpecifier::XArg:
559  case ConversionSpecifier::aArg:
560  case ConversionSpecifier::AArg:
561  case ConversionSpecifier::eArg:
562  case ConversionSpecifier::EArg:
563  case ConversionSpecifier::fArg:
564  case ConversionSpecifier::FArg:
565  case ConversionSpecifier::gArg:
566  case ConversionSpecifier::GArg:
567    return true;
568
569  default:
570    return false;
571  }
572}
573
574bool PrintfSpecifier::hasValidSpacePrefix() const {
575  if (!HasSpacePrefix)
576    return true;
577
578  // The space prefix only makes sense for signed conversions
579  switch (CS.getKind()) {
580  case ConversionSpecifier::dArg:
581  case ConversionSpecifier::iArg:
582  case ConversionSpecifier::fArg:
583  case ConversionSpecifier::FArg:
584  case ConversionSpecifier::eArg:
585  case ConversionSpecifier::EArg:
586  case ConversionSpecifier::gArg:
587  case ConversionSpecifier::GArg:
588  case ConversionSpecifier::aArg:
589  case ConversionSpecifier::AArg:
590    return true;
591
592  default:
593    return false;
594  }
595}
596
597bool PrintfSpecifier::hasValidLeftJustified() const {
598  if (!IsLeftJustified)
599    return true;
600
601  // The left justified flag is valid for all conversions except n
602  switch (CS.getKind()) {
603  case ConversionSpecifier::nArg:
604    return false;
605
606  default:
607    return true;
608  }
609}
610
611bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
612  if (!HasThousandsGrouping)
613    return true;
614
615  switch (CS.getKind()) {
616    case ConversionSpecifier::dArg:
617    case ConversionSpecifier::iArg:
618    case ConversionSpecifier::uArg:
619    case ConversionSpecifier::fArg:
620    case ConversionSpecifier::FArg:
621    case ConversionSpecifier::gArg:
622    case ConversionSpecifier::GArg:
623      return true;
624    default:
625      return false;
626  }
627}
628
629bool PrintfSpecifier::hasValidPrecision() const {
630  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
631    return true;
632
633  // Precision is only valid with the diouxXaAeEfFgGs conversions
634  switch (CS.getKind()) {
635  case ConversionSpecifier::dArg:
636  case ConversionSpecifier::iArg:
637  case ConversionSpecifier::oArg:
638  case ConversionSpecifier::uArg:
639  case ConversionSpecifier::xArg:
640  case ConversionSpecifier::XArg:
641  case ConversionSpecifier::aArg:
642  case ConversionSpecifier::AArg:
643  case ConversionSpecifier::eArg:
644  case ConversionSpecifier::EArg:
645  case ConversionSpecifier::fArg:
646  case ConversionSpecifier::FArg:
647  case ConversionSpecifier::gArg:
648  case ConversionSpecifier::GArg:
649  case ConversionSpecifier::sArg:
650    return true;
651
652  default:
653    return false;
654  }
655}
656bool PrintfSpecifier::hasValidFieldWidth() const {
657  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
658      return true;
659
660  // The field width is valid for all conversions except n
661  switch (CS.getKind()) {
662  case ConversionSpecifier::nArg:
663    return false;
664
665  default:
666    return true;
667  }
668}
669