PrintfFormatString.cpp revision 213681
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_printf::PrintfSpecifier;
24
25using namespace clang;
26
27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28        PrintfSpecifierResult;
29
30//===----------------------------------------------------------------------===//
31// Methods for parsing format strings.
32//===----------------------------------------------------------------------===//
33
34using analyze_format_string::ParseNonPositionAmount;
35
36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                           const char *Start, const char *&Beg, const char *E,
38                           unsigned *argIndex) {
39  if (argIndex) {
40    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41  }
42  else {
43    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                           analyze_format_string::PrecisionPos);
45    if (Amt.isInvalid())
46      return true;
47    FS.setPrecision(Amt);
48  }
49  return false;
50}
51
52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                  const char *&Beg,
54                                                  const char *E,
55                                                  unsigned &argIndex,
56                                                  bool FormatExtensions) {
57
58  using namespace clang::analyze_format_string;
59  using namespace clang::analyze_printf;
60
61  const char *I = Beg;
62  const char *Start = 0;
63  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65  // Look for a '%' character that indicates the start of a format specifier.
66  for ( ; I != E ; ++I) {
67    char c = *I;
68    if (c == '\0') {
69      // Detect spurious null characters, which are likely errors.
70      H.HandleNullChar(I);
71      return true;
72    }
73    if (c == '%') {
74      Start = I++;  // Record the start of the format specifier.
75      break;
76    }
77  }
78
79  // No format specifier found?
80  if (!Start)
81    return false;
82
83  if (I == E) {
84    // No more characters left?
85    H.HandleIncompleteSpecifier(Start, E - Start);
86    return true;
87  }
88
89  PrintfSpecifier FS;
90  if (ParseArgPosition(H, FS, Start, I, E))
91    return true;
92
93  if (I == E) {
94    // No more characters left?
95    H.HandleIncompleteSpecifier(Start, E - Start);
96    return true;
97  }
98
99  // Look for flags (if any).
100  bool hasMore = true;
101  for ( ; I != E; ++I) {
102    switch (*I) {
103      default: hasMore = false; break;
104      case '-': FS.setIsLeftJustified(I); break;
105      case '+': FS.setHasPlusPrefix(I); break;
106      case ' ': FS.setHasSpacePrefix(I); break;
107      case '#': FS.setHasAlternativeForm(I); break;
108      case '0': FS.setHasLeadingZeros(I); break;
109    }
110    if (!hasMore)
111      break;
112  }
113
114  if (I == E) {
115    // No more characters left?
116    H.HandleIncompleteSpecifier(Start, E - Start);
117    return true;
118  }
119
120  // Look for the field width (if any).
121  if (ParseFieldWidth(H, FS, Start, I, E,
122                      FS.usesPositionalArg() ? 0 : &argIndex))
123    return true;
124
125  if (I == E) {
126    // No more characters left?
127    H.HandleIncompleteSpecifier(Start, E - Start);
128    return true;
129  }
130
131  // Look for the precision (if any).
132  if (*I == '.') {
133    ++I;
134    if (I == E) {
135      H.HandleIncompleteSpecifier(Start, E - Start);
136      return true;
137    }
138
139    if (ParsePrecision(H, FS, Start, I, E,
140                       FS.usesPositionalArg() ? 0 : &argIndex))
141      return true;
142
143    if (I == E) {
144      // No more characters left?
145      H.HandleIncompleteSpecifier(Start, E - Start);
146      return true;
147    }
148  }
149
150  // Look for the length modifier.
151  if (ParseLengthModifier(FS, I, E) && I == E) {
152    // No more characters left?
153    H.HandleIncompleteSpecifier(Start, E - Start);
154    return true;
155  }
156
157  if (*I == '\0') {
158    // Detect spurious null characters, which are likely errors.
159    H.HandleNullChar(I);
160    return true;
161  }
162
163  // Finally, look for the conversion specifier.
164  const char *conversionPosition = I++;
165  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
166  switch (*conversionPosition) {
167    default:
168      break;
169    // C99: 7.19.6.1 (section 8).
170    case '%': k = ConversionSpecifier::PercentArg;   break;
171    case 'A': k = ConversionSpecifier::AArg; break;
172    case 'E': k = ConversionSpecifier::EArg; break;
173    case 'F': k = ConversionSpecifier::FArg; break;
174    case 'G': k = ConversionSpecifier::GArg; break;
175    case 'X': k = ConversionSpecifier::XArg; break;
176    case 'a': k = ConversionSpecifier::aArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'd': k = ConversionSpecifier::dArg; break;
179    case 'e': k = ConversionSpecifier::eArg; break;
180    case 'f': k = ConversionSpecifier::fArg; break;
181    case 'g': k = ConversionSpecifier::gArg; break;
182    case 'i': k = ConversionSpecifier::iArg; break;
183    case 'n': k = ConversionSpecifier::nArg; break;
184    case 'o': k = ConversionSpecifier::oArg; break;
185    case 'p': k = ConversionSpecifier::pArg;   break;
186    case 's': k = ConversionSpecifier::sArg;      break;
187    case 'u': k = ConversionSpecifier::uArg; break;
188    case 'x': k = ConversionSpecifier::xArg; break;
189    // Mac OS X (unicode) specific
190    case 'C': k = ConversionSpecifier::CArg; break;
191    case 'S': k = ConversionSpecifier::SArg; break;
192    // Objective-C.
193    case '@': k = ConversionSpecifier::ObjCObjArg; break;
194    // Glibc specific.
195    case 'm': k = ConversionSpecifier::PrintErrno; break;
196    // FreeBSD format extensions
197    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
198    case 'r': if (FormatExtensions) k = ConversionSpecifier::xArg; break;
199    case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
200    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
201  }
202  PrintfConversionSpecifier CS(conversionPosition, k);
203  FS.setConversionSpecifier(CS);
204  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
205    FS.setArgIndex(argIndex++);
206  // FreeBSD extension
207  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
208    argIndex++;
209
210  if (k == ConversionSpecifier::InvalidSpecifier) {
211    // Assume the conversion takes one argument.
212    return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
213  }
214  return PrintfSpecifierResult(Start, FS);
215}
216
217bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
218                                                     const char *I,
219                                                     const char *E,
220                                                     bool FormatExtensions) {
221
222  unsigned argIndex = 0;
223
224  // Keep looking for a format specifier until we have exhausted the string.
225  while (I != E) {
226    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
227                                                            FormatExtensions);
228    // Did a fail-stop error of any kind occur when parsing the specifier?
229    // If so, don't do any more processing.
230    if (FSR.shouldStop())
231      return true;;
232    // Did we exhaust the string or encounter an error that
233    // we can recover from?
234    if (!FSR.hasValue())
235      continue;
236    // We have a format specifier.  Pass it to the callback.
237    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
238                                 I - FSR.getStart()))
239      return true;
240  }
241  assert(I == E && "Format string not exhausted");
242  return false;
243}
244
245//===----------------------------------------------------------------------===//
246// Methods on ConversionSpecifier.
247//===----------------------------------------------------------------------===//
248const char *ConversionSpecifier::toString() const {
249  switch (kind) {
250  case dArg: return "d";
251  case iArg: return "i";
252  case oArg: return "o";
253  case uArg: return "u";
254  case xArg: return "x";
255  case XArg: return "X";
256  case fArg: return "f";
257  case FArg: return "F";
258  case eArg: return "e";
259  case EArg: return "E";
260  case gArg: return "g";
261  case GArg: return "G";
262  case aArg: return "a";
263  case AArg: return "A";
264  case cArg: return "c";
265  case sArg: return "s";
266  case pArg: return "p";
267  case nArg: return "n";
268  case PercentArg:  return "%";
269  case ScanListArg: return "[";
270  case InvalidSpecifier: return NULL;
271
272  // MacOS X unicode extensions.
273  case CArg: return "C";
274  case SArg: return "S";
275
276  // Objective-C specific specifiers.
277  case ObjCObjArg: return "@";
278
279  // FreeBSD specific specifiers.
280  case bArg: return "b";
281  case DArg: return "D";
282
283  // GlibC specific specifiers.
284  case PrintErrno: return "m";
285  }
286  return NULL;
287}
288
289//===----------------------------------------------------------------------===//
290// Methods on PrintfSpecifier.
291//===----------------------------------------------------------------------===//
292
293ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
294  const PrintfConversionSpecifier &CS = getConversionSpecifier();
295
296  if (!CS.consumesDataArgument())
297    return ArgTypeResult::Invalid();
298
299  if (CS.getKind() == ConversionSpecifier::cArg)
300    switch (LM.getKind()) {
301      case LengthModifier::None: return Ctx.IntTy;
302      case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
303      default:
304        return ArgTypeResult::Invalid();
305    }
306
307  if (CS.isIntArg())
308    switch (LM.getKind()) {
309      case LengthModifier::AsLongDouble:
310        return ArgTypeResult::Invalid();
311      case LengthModifier::None: return Ctx.IntTy;
312      case LengthModifier::AsChar: return Ctx.SignedCharTy;
313      case LengthModifier::AsShort: return Ctx.ShortTy;
314      case LengthModifier::AsLong: return Ctx.LongTy;
315      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
316      case LengthModifier::AsIntMax:
317        // FIXME: Return unknown for now.
318        return ArgTypeResult();
319      case LengthModifier::AsSizeT: return Ctx.getSizeType();
320      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
321    }
322
323  if (CS.isUIntArg())
324    switch (LM.getKind()) {
325      case LengthModifier::AsLongDouble:
326        return ArgTypeResult::Invalid();
327      case LengthModifier::None: return Ctx.UnsignedIntTy;
328      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
329      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
330      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
331      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
332      case LengthModifier::AsIntMax:
333        // FIXME: Return unknown for now.
334        return ArgTypeResult();
335      case LengthModifier::AsSizeT:
336        // FIXME: How to get the corresponding unsigned
337        // version of size_t?
338        return ArgTypeResult();
339      case LengthModifier::AsPtrDiff:
340        // FIXME: How to get the corresponding unsigned
341        // version of ptrdiff_t?
342        return ArgTypeResult();
343    }
344
345  if (CS.isDoubleArg()) {
346    if (LM.getKind() == LengthModifier::AsLongDouble)
347      return Ctx.LongDoubleTy;
348    return Ctx.DoubleTy;
349  }
350
351  switch (CS.getKind()) {
352    case ConversionSpecifier::sArg:
353      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
354          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
355    case ConversionSpecifier::SArg:
356      // FIXME: This appears to be Mac OS X specific.
357      return ArgTypeResult::WCStrTy;
358    case ConversionSpecifier::CArg:
359      return Ctx.WCharTy;
360    case ConversionSpecifier::pArg:
361      return ArgTypeResult::CPointerTy;
362    default:
363      break;
364  }
365
366  // FIXME: Handle other cases.
367  return ArgTypeResult();
368}
369
370bool PrintfSpecifier::fixType(QualType QT) {
371  // Handle strings first (char *, wchar_t *)
372  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
373    CS.setKind(ConversionSpecifier::sArg);
374
375    // Disable irrelevant flags
376    HasAlternativeForm = 0;
377    HasLeadingZeroes = 0;
378
379    // Set the long length modifier for wide characters
380    if (QT->getPointeeType()->isWideCharType())
381      LM.setKind(LengthModifier::AsWideChar);
382
383    return true;
384  }
385
386  // We can only work with builtin types.
387  if (!QT->isBuiltinType())
388    return false;
389
390  // Everything else should be a base type
391  const BuiltinType *BT = QT->getAs<BuiltinType>();
392
393  // Set length modifier
394  switch (BT->getKind()) {
395  default:
396    // The rest of the conversions are either optional or for non-builtin types
397    LM.setKind(LengthModifier::None);
398    break;
399
400  case BuiltinType::WChar:
401  case BuiltinType::Long:
402  case BuiltinType::ULong:
403    LM.setKind(LengthModifier::AsLong);
404    break;
405
406  case BuiltinType::LongLong:
407  case BuiltinType::ULongLong:
408    LM.setKind(LengthModifier::AsLongLong);
409    break;
410
411  case BuiltinType::LongDouble:
412    LM.setKind(LengthModifier::AsLongDouble);
413    break;
414  }
415
416  // Set conversion specifier and disable any flags which do not apply to it.
417  if (QT->isAnyCharacterType()) {
418    CS.setKind(ConversionSpecifier::cArg);
419    Precision.setHowSpecified(OptionalAmount::NotSpecified);
420    HasAlternativeForm = 0;
421    HasLeadingZeroes = 0;
422    HasPlusPrefix = 0;
423  }
424  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
425  else if (QT->isRealFloatingType()) {
426    CS.setKind(ConversionSpecifier::fArg);
427  }
428  else if (QT->isPointerType()) {
429    CS.setKind(ConversionSpecifier::pArg);
430    Precision.setHowSpecified(OptionalAmount::NotSpecified);
431    HasAlternativeForm = 0;
432    HasLeadingZeroes = 0;
433    HasPlusPrefix = 0;
434  }
435  else if (QT->isSignedIntegerType()) {
436    CS.setKind(ConversionSpecifier::dArg);
437    HasAlternativeForm = 0;
438  }
439  else if (QT->isUnsignedIntegerType()) {
440    CS.setKind(ConversionSpecifier::uArg);
441    HasAlternativeForm = 0;
442    HasPlusPrefix = 0;
443  }
444  else {
445    return false;
446  }
447
448  return true;
449}
450
451void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
452  // Whilst some features have no defined order, we are using the order
453  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ��7.19.6.1)
454  os << "%";
455
456  // Positional args
457  if (usesPositionalArg()) {
458    os << getPositionalArgIndex() << "$";
459  }
460
461  // Conversion flags
462  if (IsLeftJustified)    os << "-";
463  if (HasPlusPrefix)      os << "+";
464  if (HasSpacePrefix)     os << " ";
465  if (HasAlternativeForm) os << "#";
466  if (HasLeadingZeroes)   os << "0";
467
468  // Minimum field width
469  FieldWidth.toString(os);
470  // Precision
471  Precision.toString(os);
472  // Length modifier
473  os << LM.toString();
474  // Conversion specifier
475  os << CS.toString();
476}
477
478bool PrintfSpecifier::hasValidPlusPrefix() const {
479  if (!HasPlusPrefix)
480    return true;
481
482  // The plus prefix only makes sense for signed conversions
483  switch (CS.getKind()) {
484  case ConversionSpecifier::dArg:
485  case ConversionSpecifier::iArg:
486  case ConversionSpecifier::fArg:
487  case ConversionSpecifier::FArg:
488  case ConversionSpecifier::eArg:
489  case ConversionSpecifier::EArg:
490  case ConversionSpecifier::gArg:
491  case ConversionSpecifier::GArg:
492  case ConversionSpecifier::aArg:
493  case ConversionSpecifier::AArg:
494    return true;
495
496  default:
497    return false;
498  }
499}
500
501bool PrintfSpecifier::hasValidAlternativeForm() const {
502  if (!HasAlternativeForm)
503    return true;
504
505  // Alternate form flag only valid with the oxaAeEfFgG conversions
506  switch (CS.getKind()) {
507  case ConversionSpecifier::oArg:
508  case ConversionSpecifier::xArg:
509  case ConversionSpecifier::aArg:
510  case ConversionSpecifier::AArg:
511  case ConversionSpecifier::eArg:
512  case ConversionSpecifier::EArg:
513  case ConversionSpecifier::fArg:
514  case ConversionSpecifier::FArg:
515  case ConversionSpecifier::gArg:
516  case ConversionSpecifier::GArg:
517    return true;
518
519  default:
520    return false;
521  }
522}
523
524bool PrintfSpecifier::hasValidLeadingZeros() const {
525  if (!HasLeadingZeroes)
526    return true;
527
528  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
529  switch (CS.getKind()) {
530  case ConversionSpecifier::dArg:
531  case ConversionSpecifier::iArg:
532  case ConversionSpecifier::oArg:
533  case ConversionSpecifier::uArg:
534  case ConversionSpecifier::xArg:
535  case ConversionSpecifier::XArg:
536  case ConversionSpecifier::aArg:
537  case ConversionSpecifier::AArg:
538  case ConversionSpecifier::eArg:
539  case ConversionSpecifier::EArg:
540  case ConversionSpecifier::fArg:
541  case ConversionSpecifier::FArg:
542  case ConversionSpecifier::gArg:
543  case ConversionSpecifier::GArg:
544    return true;
545
546  default:
547    return false;
548  }
549}
550
551bool PrintfSpecifier::hasValidSpacePrefix() const {
552  if (!HasSpacePrefix)
553    return true;
554
555  // The space prefix only makes sense for signed conversions
556  switch (CS.getKind()) {
557  case ConversionSpecifier::dArg:
558  case ConversionSpecifier::iArg:
559  case ConversionSpecifier::fArg:
560  case ConversionSpecifier::FArg:
561  case ConversionSpecifier::eArg:
562  case ConversionSpecifier::EArg:
563  case ConversionSpecifier::gArg:
564  case ConversionSpecifier::GArg:
565  case ConversionSpecifier::aArg:
566  case ConversionSpecifier::AArg:
567    return true;
568
569  default:
570    return false;
571  }
572}
573
574bool PrintfSpecifier::hasValidLeftJustified() const {
575  if (!IsLeftJustified)
576    return true;
577
578  // The left justified flag is valid for all conversions except n
579  switch (CS.getKind()) {
580  case ConversionSpecifier::nArg:
581    return false;
582
583  default:
584    return true;
585  }
586}
587
588bool PrintfSpecifier::hasValidPrecision() const {
589  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
590    return true;
591
592  // Precision is only valid with the diouxXaAeEfFgGs conversions
593  switch (CS.getKind()) {
594  case ConversionSpecifier::dArg:
595  case ConversionSpecifier::iArg:
596  case ConversionSpecifier::oArg:
597  case ConversionSpecifier::uArg:
598  case ConversionSpecifier::xArg:
599  case ConversionSpecifier::XArg:
600  case ConversionSpecifier::aArg:
601  case ConversionSpecifier::AArg:
602  case ConversionSpecifier::eArg:
603  case ConversionSpecifier::EArg:
604  case ConversionSpecifier::fArg:
605  case ConversionSpecifier::FArg:
606  case ConversionSpecifier::gArg:
607  case ConversionSpecifier::GArg:
608  case ConversionSpecifier::sArg:
609    return true;
610
611  default:
612    return false;
613  }
614}
615bool PrintfSpecifier::hasValidFieldWidth() const {
616  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
617      return true;
618
619  // The field width is valid for all conversions except n
620  switch (CS.getKind()) {
621  case ConversionSpecifier::nArg:
622    return false;
623
624  default:
625    return true;
626  }
627}
628