PrintfFormatString.cpp revision 210299
160484Sobrien//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
277298Sobrien//
360484Sobrien//                     The LLVM Compiler Infrastructure
460484Sobrien//
560484Sobrien// This file is distributed under the University of Illinois Open Source
660484Sobrien// License. See LICENSE.TXT for details.
760484Sobrien//
860484Sobrien//===----------------------------------------------------------------------===//
960484Sobrien//
1060484Sobrien// Handling of format string in printf and friends.  The structure of format
1160484Sobrien// strings for fprintf() are described in C99 7.19.6.1.
1260484Sobrien//
1360484Sobrien//===----------------------------------------------------------------------===//
1460484Sobrien
1560484Sobrien#include "clang/Analysis/Analyses/PrintfFormatString.h"
1660484Sobrien#include "clang/AST/ASTContext.h"
1760484Sobrien#include "clang/AST/Type.h"
1860484Sobrien#include "llvm/Support/raw_ostream.h"
1960484Sobrien
2060484Sobrienusing clang::analyze_printf::ArgTypeResult;
2160484Sobrienusing clang::analyze_printf::FormatSpecifier;
2260484Sobrienusing clang::analyze_printf::FormatStringHandler;
2360484Sobrienusing clang::analyze_printf::OptionalAmount;
2460484Sobrienusing clang::analyze_printf::PositionContext;
2560484Sobrienusing clang::analyze_printf::ConversionSpecifier;
2660484Sobrienusing clang::analyze_printf::LengthModifier;
2760484Sobrien
2860484Sobrienusing namespace clang;
2960484Sobrien
3060484Sobriennamespace {
3160484Sobrienclass FormatSpecifierResult {
3260484Sobrien  FormatSpecifier FS;
3360484Sobrien  const char *Start;
3460484Sobrien  bool Stop;
3560484Sobrienpublic:
3660484Sobrien  FormatSpecifierResult(bool stop = false)
3760484Sobrien    : Start(0), Stop(stop) {}
3860484Sobrien  FormatSpecifierResult(const char *start,
3960484Sobrien                        const FormatSpecifier &fs)
4060484Sobrien    : FS(fs), Start(start), Stop(false) {}
4177298Sobrien
4277298Sobrien  const char *getStart() const { return Start; }
4377298Sobrien  bool shouldStop() const { return Stop; }
4460484Sobrien  bool hasValue() const { return Start != 0; }
4560484Sobrien  const FormatSpecifier &getValue() const {
4660484Sobrien    assert(hasValue());
4777298Sobrien    return FS;
4860484Sobrien  }
4960484Sobrien  const FormatSpecifier &getValue() { return FS; }
5060484Sobrien};
5160484Sobrien} // end anonymous namespace
5260484Sobrien
5360484Sobrientemplate <typename T>
5460484Sobrienclass UpdateOnReturn {
5560484Sobrien  T &ValueToUpdate;
5660484Sobrien  const T &ValueToCopy;
5760484Sobrienpublic:
5860484Sobrien  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
5960484Sobrien    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
6060484Sobrien
6160484Sobrien  ~UpdateOnReturn() {
6260484Sobrien    ValueToUpdate = ValueToCopy;
6360484Sobrien  }
6460484Sobrien};
6560484Sobrien
6660484Sobrien//===----------------------------------------------------------------------===//
6760484Sobrien// Methods for parsing format strings.
6860484Sobrien//===----------------------------------------------------------------------===//
6960484Sobrien
7060484Sobrienstatic OptionalAmount ParseAmount(const char *&Beg, const char *E) {
7160484Sobrien  const char *I = Beg;
7260484Sobrien  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
7360484Sobrien
7460484Sobrien  unsigned accumulator = 0;
7560484Sobrien  bool hasDigits = false;
7660484Sobrien
7760484Sobrien  for ( ; I != E; ++I) {
7860484Sobrien    char c = *I;
7960484Sobrien    if (c >= '0' && c <= '9') {
8060484Sobrien      hasDigits = true;
8160484Sobrien      accumulator = (accumulator * 10) + (c - '0');
8260484Sobrien      continue;
8377298Sobrien    }
8460484Sobrien
8560484Sobrien    if (hasDigits)
8660484Sobrien      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
8760484Sobrien          false);
8860484Sobrien
8960484Sobrien    break;
9060484Sobrien  }
9160484Sobrien
9260484Sobrien  return OptionalAmount();
9360484Sobrien}
9460484Sobrien
9560484Sobrienstatic OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
9660484Sobrien                                             unsigned &argIndex) {
9760484Sobrien  if (*Beg == '*') {
9860484Sobrien    ++Beg;
9960484Sobrien    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
10060484Sobrien  }
10177298Sobrien
10277298Sobrien  return ParseAmount(Beg, E);
10377298Sobrien}
10477298Sobrien
10560484Sobrienstatic OptionalAmount ParsePositionAmount(FormatStringHandler &H,
10660484Sobrien                                          const char *Start,
10760484Sobrien                                          const char *&Beg, const char *E,
10860484Sobrien                                          PositionContext p) {
10977298Sobrien  if (*Beg == '*') {
11077298Sobrien    const char *I = Beg + 1;
11177298Sobrien    const OptionalAmount &Amt = ParseAmount(I, E);
11260484Sobrien
11360484Sobrien    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
11460484Sobrien      H.HandleInvalidPosition(Beg, I - Beg, p);
11560484Sobrien      return OptionalAmount(false);
11677298Sobrien    }
11777298Sobrien
11877298Sobrien    if (I== E) {
11977298Sobrien      // No more characters left?
12077298Sobrien      H.HandleIncompleteFormatSpecifier(Start, E - Start);
12177298Sobrien      return OptionalAmount(false);
12260484Sobrien    }
12360484Sobrien
12460484Sobrien    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
12577298Sobrien
12677298Sobrien    if (*I == '$') {
12777298Sobrien      // Handle positional arguments
12877298Sobrien
12977298Sobrien      // Special case: '*0$', since this is an easy mistake.
13077298Sobrien      if (Amt.getConstantAmount() == 0) {
13177298Sobrien        H.HandleZeroPosition(Beg, I - Beg + 1);
13277298Sobrien        return OptionalAmount(false);
13360484Sobrien      }
13460484Sobrien
13560484Sobrien      const char *Tmp = Beg;
13660484Sobrien      Beg = ++I;
13760484Sobrien
13860484Sobrien      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
13960484Sobrien                            Tmp, 0, true);
14060484Sobrien    }
14160484Sobrien
14277298Sobrien    H.HandleInvalidPosition(Beg, I - Beg, p);
14377298Sobrien    return OptionalAmount(false);
14477298Sobrien  }
14577298Sobrien
14677298Sobrien  return ParseAmount(Beg, E);
14777298Sobrien}
14889857Sobrien
14989857Sobrienstatic bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
15089857Sobrien                           const char *Start, const char *&Beg, const char *E,
15160484Sobrien                           unsigned *argIndex) {
15260484Sobrien  if (argIndex) {
15360484Sobrien    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
15460484Sobrien  }
15560484Sobrien  else {
156    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
157                                                  analyze_printf::PrecisionPos);
158    if (Amt.isInvalid())
159      return true;
160    FS.setPrecision(Amt);
161  }
162  return false;
163}
164
165static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
166                            const char *Start, const char *&Beg, const char *E,
167                            unsigned *argIndex) {
168  // FIXME: Support negative field widths.
169  if (argIndex) {
170    FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
171  }
172  else {
173    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
174                                                 analyze_printf::FieldWidthPos);
175    if (Amt.isInvalid())
176      return true;
177    FS.setFieldWidth(Amt);
178  }
179  return false;
180}
181
182static bool ParseArgPosition(FormatStringHandler &H,
183                             FormatSpecifier &FS, const char *Start,
184                             const char *&Beg, const char *E) {
185
186  using namespace clang::analyze_printf;
187  const char *I = Beg;
188
189  const OptionalAmount &Amt = ParseAmount(I, E);
190
191  if (I == E) {
192    // No more characters left?
193    H.HandleIncompleteFormatSpecifier(Start, E - Start);
194    return true;
195  }
196
197  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
198    // Special case: '%0$', since this is an easy mistake.
199    if (Amt.getConstantAmount() == 0) {
200      H.HandleZeroPosition(Start, I - Start);
201      return true;
202    }
203
204    FS.setArgIndex(Amt.getConstantAmount() - 1);
205    FS.setUsesPositionalArg();
206    // Update the caller's pointer if we decided to consume
207    // these characters.
208    Beg = I;
209    return false;
210  }
211
212  return false;
213}
214
215static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
216                                                  const char *&Beg,
217                                                  const char *E,
218                                                  unsigned &argIndex,
219						  bool FormatExtensions) {
220
221  using namespace clang::analyze_printf;
222
223  const char *I = Beg;
224  const char *Start = 0;
225  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
226
227  // Look for a '%' character that indicates the start of a format specifier.
228  for ( ; I != E ; ++I) {
229    char c = *I;
230    if (c == '\0') {
231      // Detect spurious null characters, which are likely errors.
232      H.HandleNullChar(I);
233      return true;
234    }
235    if (c == '%') {
236      Start = I++;  // Record the start of the format specifier.
237      break;
238    }
239  }
240
241  // No format specifier found?
242  if (!Start)
243    return false;
244
245  if (I == E) {
246    // No more characters left?
247    H.HandleIncompleteFormatSpecifier(Start, E - Start);
248    return true;
249  }
250
251  FormatSpecifier FS;
252  if (ParseArgPosition(H, FS, Start, I, E))
253    return true;
254
255  if (I == E) {
256    // No more characters left?
257    H.HandleIncompleteFormatSpecifier(Start, E - Start);
258    return true;
259  }
260
261  // Look for flags (if any).
262  bool hasMore = true;
263  for ( ; I != E; ++I) {
264    switch (*I) {
265      default: hasMore = false; break;
266      case '-': FS.setIsLeftJustified(I); break;
267      case '+': FS.setHasPlusPrefix(I); break;
268      case ' ': FS.setHasSpacePrefix(I); break;
269      case '#': FS.setHasAlternativeForm(I); break;
270      case '0': FS.setHasLeadingZeros(I); break;
271    }
272    if (!hasMore)
273      break;
274  }
275
276  if (I == E) {
277    // No more characters left?
278    H.HandleIncompleteFormatSpecifier(Start, E - Start);
279    return true;
280  }
281
282  // Look for the field width (if any).
283  if (ParseFieldWidth(H, FS, Start, I, E,
284                      FS.usesPositionalArg() ? 0 : &argIndex))
285    return true;
286
287  if (I == E) {
288    // No more characters left?
289    H.HandleIncompleteFormatSpecifier(Start, E - Start);
290    return true;
291  }
292
293  // Look for the precision (if any).
294  if (*I == '.') {
295    ++I;
296    if (I == E) {
297      H.HandleIncompleteFormatSpecifier(Start, E - Start);
298      return true;
299    }
300
301    if (ParsePrecision(H, FS, Start, I, E,
302                       FS.usesPositionalArg() ? 0 : &argIndex))
303      return true;
304
305    if (I == E) {
306      // No more characters left?
307      H.HandleIncompleteFormatSpecifier(Start, E - Start);
308      return true;
309    }
310  }
311
312  // Look for the length modifier.
313  LengthModifier::Kind lmKind = LengthModifier::None;
314  const char *lmPosition = I;
315  switch (*I) {
316    default:
317      break;
318    case 'h':
319      ++I;
320      lmKind = (I != E && *I == 'h') ?
321          ++I, LengthModifier::AsChar : LengthModifier::AsShort;
322      break;
323    case 'l':
324      ++I;
325      lmKind = (I != E && *I == 'l') ?
326          ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
327      break;
328    case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
329    case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
330    case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
331    case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
332    case 'q': lmKind = LengthModifier::AsLongLong;   ++I; break;
333  }
334  LengthModifier lm(lmPosition, lmKind);
335  FS.setLengthModifier(lm);
336
337  if (I == E) {
338    // No more characters left?
339    H.HandleIncompleteFormatSpecifier(Start, E - Start);
340    return true;
341  }
342
343  if (*I == '\0') {
344    // Detect spurious null characters, which are likely errors.
345    H.HandleNullChar(I);
346    return true;
347  }
348
349  // Finally, look for the conversion specifier.
350  const char *conversionPosition = I++;
351  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
352  switch (*conversionPosition) {
353    default:
354      break;
355    // C99: 7.19.6.1 (section 8).
356    case '%': k = ConversionSpecifier::PercentArg;   break;
357    case 'A': k = ConversionSpecifier::AArg; break;
358    case 'E': k = ConversionSpecifier::EArg; break;
359    case 'F': k = ConversionSpecifier::FArg; break;
360    case 'G': k = ConversionSpecifier::GArg; break;
361    case 'X': k = ConversionSpecifier::XArg; break;
362    case 'a': k = ConversionSpecifier::aArg; break;
363    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
364    case 'd': k = ConversionSpecifier::dArg; break;
365    case 'e': k = ConversionSpecifier::eArg; break;
366    case 'f': k = ConversionSpecifier::fArg; break;
367    case 'g': k = ConversionSpecifier::gArg; break;
368    case 'i': k = ConversionSpecifier::iArg; break;
369    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
370    case 'o': k = ConversionSpecifier::oArg; break;
371    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
372    case 's': k = ConversionSpecifier::CStrArg;      break;
373    case 'u': k = ConversionSpecifier::uArg; break;
374    case 'x': k = ConversionSpecifier::xArg; break;
375    // Mac OS X (unicode) specific
376    case 'C': k = ConversionSpecifier::CArg; break;
377    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
378    // Objective-C.
379    case '@': k = ConversionSpecifier::ObjCObjArg; break;
380    // Glibc specific.
381    case 'm': k = ConversionSpecifier::PrintErrno; break;
382    // FreeBSD format extensions
383    case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
384    case 'r': if (FormatExtensions) k = ConversionSpecifier::xArg; break;
385    case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
386    case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
387  }
388  ConversionSpecifier CS(conversionPosition, k);
389  FS.setConversionSpecifier(CS);
390  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
391    FS.setArgIndex(argIndex++);
392  // FreeBSD extension
393  if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
394    argIndex++;
395
396  if (k == ConversionSpecifier::InvalidSpecifier) {
397    // Assume the conversion takes one argument.
398    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
399  }
400  return FormatSpecifierResult(Start, FS);
401}
402
403bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
404                       const char *I, const char *E, bool FormatExtensions) {
405
406  unsigned argIndex = 0;
407
408  // Keep looking for a format specifier until we have exhausted the string.
409  while (I != E) {
410    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex, FormatExtensions);
411    // Did a fail-stop error of any kind occur when parsing the specifier?
412    // If so, don't do any more processing.
413    if (FSR.shouldStop())
414      return true;;
415    // Did we exhaust the string or encounter an error that
416    // we can recover from?
417    if (!FSR.hasValue())
418      continue;
419    // We have a format specifier.  Pass it to the callback.
420    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
421                                 I - FSR.getStart()))
422      return true;
423  }
424  assert(I == E && "Format string not exhausted");
425  return false;
426}
427
428FormatStringHandler::~FormatStringHandler() {}
429
430//===----------------------------------------------------------------------===//
431// Methods on ArgTypeResult.
432//===----------------------------------------------------------------------===//
433
434bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
435  switch (K) {
436    case InvalidTy:
437      assert(false && "ArgTypeResult must be valid");
438      return true;
439
440    case UnknownTy:
441      return true;
442
443    case SpecificTy: {
444      argTy = C.getCanonicalType(argTy).getUnqualifiedType();
445      if (T == argTy)
446        return true;
447      if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
448        switch (BT->getKind()) {
449          default:
450            break;
451          case BuiltinType::Char_S:
452          case BuiltinType::SChar:
453            return T == C.UnsignedCharTy;
454          case BuiltinType::Char_U:
455          case BuiltinType::UChar:
456            return T == C.SignedCharTy;
457          case BuiltinType::Short:
458            return T == C.UnsignedShortTy;
459          case BuiltinType::UShort:
460            return T == C.ShortTy;
461          case BuiltinType::Int:
462            return T == C.UnsignedIntTy;
463          case BuiltinType::UInt:
464            return T == C.IntTy;
465          case BuiltinType::Long:
466            return T == C.UnsignedLongTy;
467          case BuiltinType::ULong:
468            return T == C.LongTy;
469          case BuiltinType::LongLong:
470            return T == C.UnsignedLongLongTy;
471          case BuiltinType::ULongLong:
472            return T == C.LongLongTy;
473        }
474      return false;
475    }
476
477    case CStrTy: {
478      const PointerType *PT = argTy->getAs<PointerType>();
479      if (!PT)
480        return false;
481      QualType pointeeTy = PT->getPointeeType();
482      if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
483        switch (BT->getKind()) {
484          case BuiltinType::Void:
485          case BuiltinType::Char_U:
486          case BuiltinType::UChar:
487          case BuiltinType::Char_S:
488          case BuiltinType::SChar:
489            return true;
490          default:
491            break;
492        }
493
494      return false;
495    }
496
497    case WCStrTy: {
498      const PointerType *PT = argTy->getAs<PointerType>();
499      if (!PT)
500        return false;
501      QualType pointeeTy =
502        C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
503      return pointeeTy == C.getWCharType();
504    }
505
506    case CPointerTy:
507      return argTy->getAs<PointerType>() != NULL ||
508      	     argTy->getAs<ObjCObjectPointerType>() != NULL;
509
510    case ObjCPointerTy:
511      return argTy->getAs<ObjCObjectPointerType>() != NULL;
512  }
513
514  // FIXME: Should be unreachable, but Clang is currently emitting
515  // a warning.
516  return false;
517}
518
519QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
520  switch (K) {
521    case InvalidTy:
522      assert(false && "No representative type for Invalid ArgTypeResult");
523      // Fall-through.
524    case UnknownTy:
525      return QualType();
526    case SpecificTy:
527      return T;
528    case CStrTy:
529      return C.getPointerType(C.CharTy);
530    case WCStrTy:
531      return C.getPointerType(C.getWCharType());
532    case ObjCPointerTy:
533      return C.ObjCBuiltinIdTy;
534    case CPointerTy:
535      return C.VoidPtrTy;
536  }
537
538  // FIXME: Should be unreachable, but Clang is currently emitting
539  // a warning.
540  return QualType();
541}
542
543//===----------------------------------------------------------------------===//
544// Methods on OptionalAmount.
545//===----------------------------------------------------------------------===//
546
547ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
548  return Ctx.IntTy;
549}
550
551//===----------------------------------------------------------------------===//
552// Methods on ConversionSpecifier.
553//===----------------------------------------------------------------------===//
554const char *ConversionSpecifier::toString() const {
555  switch (kind) {
556  case bArg: return "b";
557  case dArg: return "d";
558  case iArg: return "i";
559  case oArg: return "o";
560  case uArg: return "u";
561  case xArg: return "x";
562  case XArg: return "X";
563  case fArg: return "f";
564  case FArg: return "F";
565  case eArg: return "e";
566  case EArg: return "E";
567  case gArg: return "g";
568  case GArg: return "G";
569  case aArg: return "a";
570  case AArg: return "A";
571  case IntAsCharArg:     return "c";
572  case CStrArg:          return "s";
573  case VoidPtrArg:       return "p";
574  case OutIntPtrArg:     return "n";
575  case PercentArg:       return "%";
576  case InvalidSpecifier: return NULL;
577
578  // MacOS X unicode extensions.
579  case CArg:          return "C";
580  case UnicodeStrArg: return "S";
581
582  // Objective-C specific specifiers.
583  case ObjCObjArg: return "@";
584
585  // GlibC specific specifiers.
586  case PrintErrno: return "m";
587  }
588  return NULL;
589}
590
591//===----------------------------------------------------------------------===//
592// Methods on LengthModifier.
593//===----------------------------------------------------------------------===//
594
595const char *LengthModifier::toString() const {
596  switch (kind) {
597  case AsChar:
598    return "hh";
599  case AsShort:
600    return "h";
601  case AsLong: // or AsWideChar
602    return "l";
603  case AsLongLong:
604    return "ll";
605  case AsIntMax:
606    return "j";
607  case AsSizeT:
608    return "z";
609  case AsPtrDiff:
610    return "t";
611  case AsLongDouble:
612    return "L";
613  case None:
614    return "";
615  }
616  return NULL;
617}
618
619//===----------------------------------------------------------------------===//
620// Methods on OptionalAmount.
621//===----------------------------------------------------------------------===//
622
623void OptionalAmount::toString(llvm::raw_ostream &os) const {
624  switch (hs) {
625  case Invalid:
626  case NotSpecified:
627    return;
628  case Arg:
629    if (UsesDotPrefix)
630        os << ".";
631    if (usesPositionalArg())
632      os << "*" << getPositionalArgIndex() << "$";
633    else
634      os << "*";
635    break;
636  case Constant:
637    if (UsesDotPrefix)
638        os << ".";
639    os << amt;
640    break;
641  }
642}
643
644//===----------------------------------------------------------------------===//
645// Methods on FormatSpecifier.
646//===----------------------------------------------------------------------===//
647
648ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
649  if (!CS.consumesDataArgument())
650    return ArgTypeResult::Invalid();
651
652  if (CS.isIntArg())
653    switch (LM.getKind()) {
654      case LengthModifier::AsLongDouble:
655        return ArgTypeResult::Invalid();
656      case LengthModifier::None: return Ctx.IntTy;
657      case LengthModifier::AsChar: return Ctx.SignedCharTy;
658      case LengthModifier::AsShort: return Ctx.ShortTy;
659      case LengthModifier::AsLong: return Ctx.LongTy;
660      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
661      case LengthModifier::AsIntMax:
662        // FIXME: Return unknown for now.
663        return ArgTypeResult();
664      case LengthModifier::AsSizeT: return Ctx.getSizeType();
665      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
666    }
667
668  if (CS.isUIntArg())
669    switch (LM.getKind()) {
670      case LengthModifier::AsLongDouble:
671        return ArgTypeResult::Invalid();
672      case LengthModifier::None: return Ctx.UnsignedIntTy;
673      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
674      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
675      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
676      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
677      case LengthModifier::AsIntMax:
678        // FIXME: Return unknown for now.
679        return ArgTypeResult();
680      case LengthModifier::AsSizeT:
681        // FIXME: How to get the corresponding unsigned
682        // version of size_t?
683        return ArgTypeResult();
684      case LengthModifier::AsPtrDiff:
685        // FIXME: How to get the corresponding unsigned
686        // version of ptrdiff_t?
687        return ArgTypeResult();
688    }
689
690  if (CS.isDoubleArg()) {
691    if (LM.getKind() == LengthModifier::AsLongDouble)
692      return Ctx.LongDoubleTy;
693    return Ctx.DoubleTy;
694  }
695
696  switch (CS.getKind()) {
697    case ConversionSpecifier::CStrArg:
698      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
699          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
700    case ConversionSpecifier::UnicodeStrArg:
701      // FIXME: This appears to be Mac OS X specific.
702      return ArgTypeResult::WCStrTy;
703    case ConversionSpecifier::CArg:
704      return Ctx.WCharTy;
705    case ConversionSpecifier::VoidPtrArg:
706      return ArgTypeResult::CPointerTy;
707    default:
708      break;
709  }
710
711  // FIXME: Handle other cases.
712  return ArgTypeResult();
713}
714
715bool FormatSpecifier::fixType(QualType QT) {
716  // Handle strings first (char *, wchar_t *)
717  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
718    CS.setKind(ConversionSpecifier::CStrArg);
719
720    // Disable irrelevant flags
721    HasAlternativeForm = 0;
722    HasLeadingZeroes = 0;
723
724    // Set the long length modifier for wide characters
725    if (QT->getPointeeType()->isWideCharType())
726      LM.setKind(LengthModifier::AsWideChar);
727
728    return true;
729  }
730
731  // We can only work with builtin types.
732  if (!QT->isBuiltinType())
733    return false;
734
735  // Everything else should be a base type
736  const BuiltinType *BT = QT->getAs<BuiltinType>();
737
738  // Set length modifier
739  switch (BT->getKind()) {
740  default:
741    // The rest of the conversions are either optional or for non-builtin types
742    LM.setKind(LengthModifier::None);
743    break;
744
745  case BuiltinType::WChar:
746  case BuiltinType::Long:
747  case BuiltinType::ULong:
748    LM.setKind(LengthModifier::AsLong);
749    break;
750
751  case BuiltinType::LongLong:
752  case BuiltinType::ULongLong:
753    LM.setKind(LengthModifier::AsLongLong);
754    break;
755
756  case BuiltinType::LongDouble:
757    LM.setKind(LengthModifier::AsLongDouble);
758    break;
759  }
760
761  // Set conversion specifier and disable any flags which do not apply to it.
762  if (QT->isAnyCharacterType()) {
763    CS.setKind(ConversionSpecifier::IntAsCharArg);
764    Precision.setHowSpecified(OptionalAmount::NotSpecified);
765    HasAlternativeForm = 0;
766    HasLeadingZeroes = 0;
767    HasPlusPrefix = 0;
768  }
769  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
770  else if (QT->isRealFloatingType()) {
771    CS.setKind(ConversionSpecifier::fArg);
772  }
773  else if (QT->isPointerType()) {
774    CS.setKind(ConversionSpecifier::VoidPtrArg);
775    Precision.setHowSpecified(OptionalAmount::NotSpecified);
776    HasAlternativeForm = 0;
777    HasLeadingZeroes = 0;
778    HasPlusPrefix = 0;
779  }
780  else if (QT->isSignedIntegerType()) {
781    CS.setKind(ConversionSpecifier::dArg);
782    HasAlternativeForm = 0;
783  }
784  else if (QT->isUnsignedIntegerType()) {
785    CS.setKind(ConversionSpecifier::uArg);
786    HasAlternativeForm = 0;
787    HasPlusPrefix = 0;
788  }
789  else {
790    return false;
791  }
792
793  return true;
794}
795
796void FormatSpecifier::toString(llvm::raw_ostream &os) const {
797  // Whilst some features have no defined order, we are using the order
798  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) �7.19.6.1)
799  os << "%";
800
801  // Positional args
802  if (usesPositionalArg()) {
803    os << getPositionalArgIndex() << "$";
804  }
805
806  // Conversion flags
807  if (IsLeftJustified)    os << "-";
808  if (HasPlusPrefix)      os << "+";
809  if (HasSpacePrefix)     os << " ";
810  if (HasAlternativeForm) os << "#";
811  if (HasLeadingZeroes)   os << "0";
812
813  // Minimum field width
814  FieldWidth.toString(os);
815  // Precision
816  Precision.toString(os);
817  // Length modifier
818  os << LM.toString();
819  // Conversion specifier
820  os << CS.toString();
821}
822
823bool FormatSpecifier::hasValidPlusPrefix() const {
824  if (!HasPlusPrefix)
825    return true;
826
827  // The plus prefix only makes sense for signed conversions
828  switch (CS.getKind()) {
829  case ConversionSpecifier::dArg:
830  case ConversionSpecifier::iArg:
831  case ConversionSpecifier::fArg:
832  case ConversionSpecifier::FArg:
833  case ConversionSpecifier::eArg:
834  case ConversionSpecifier::EArg:
835  case ConversionSpecifier::gArg:
836  case ConversionSpecifier::GArg:
837  case ConversionSpecifier::aArg:
838  case ConversionSpecifier::AArg:
839    return true;
840
841  default:
842    return false;
843  }
844}
845
846bool FormatSpecifier::hasValidAlternativeForm() const {
847  if (!HasAlternativeForm)
848    return true;
849
850  // Alternate form flag only valid with the oxaAeEfFgG conversions
851  switch (CS.getKind()) {
852  case ConversionSpecifier::oArg:
853  case ConversionSpecifier::xArg:
854  case ConversionSpecifier::aArg:
855  case ConversionSpecifier::AArg:
856  case ConversionSpecifier::eArg:
857  case ConversionSpecifier::EArg:
858  case ConversionSpecifier::fArg:
859  case ConversionSpecifier::FArg:
860  case ConversionSpecifier::gArg:
861  case ConversionSpecifier::GArg:
862    return true;
863
864  default:
865    return false;
866  }
867}
868
869bool FormatSpecifier::hasValidLeadingZeros() const {
870  if (!HasLeadingZeroes)
871    return true;
872
873  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
874  switch (CS.getKind()) {
875  case ConversionSpecifier::dArg:
876  case ConversionSpecifier::iArg:
877  case ConversionSpecifier::oArg:
878  case ConversionSpecifier::uArg:
879  case ConversionSpecifier::xArg:
880  case ConversionSpecifier::XArg:
881  case ConversionSpecifier::aArg:
882  case ConversionSpecifier::AArg:
883  case ConversionSpecifier::eArg:
884  case ConversionSpecifier::EArg:
885  case ConversionSpecifier::fArg:
886  case ConversionSpecifier::FArg:
887  case ConversionSpecifier::gArg:
888  case ConversionSpecifier::GArg:
889    return true;
890
891  default:
892    return false;
893  }
894}
895
896bool FormatSpecifier::hasValidSpacePrefix() const {
897  if (!HasSpacePrefix)
898    return true;
899
900  // The space prefix only makes sense for signed conversions
901  switch (CS.getKind()) {
902  case ConversionSpecifier::dArg:
903  case ConversionSpecifier::iArg:
904  case ConversionSpecifier::fArg:
905  case ConversionSpecifier::FArg:
906  case ConversionSpecifier::eArg:
907  case ConversionSpecifier::EArg:
908  case ConversionSpecifier::gArg:
909  case ConversionSpecifier::GArg:
910  case ConversionSpecifier::aArg:
911  case ConversionSpecifier::AArg:
912    return true;
913
914  default:
915    return false;
916  }
917}
918
919bool FormatSpecifier::hasValidLeftJustified() const {
920  if (!IsLeftJustified)
921    return true;
922
923  // The left justified flag is valid for all conversions except n
924  switch (CS.getKind()) {
925  case ConversionSpecifier::OutIntPtrArg:
926    return false;
927
928  default:
929    return true;
930  }
931}
932
933bool FormatSpecifier::hasValidLengthModifier() const {
934  switch (LM.getKind()) {
935  case LengthModifier::None:
936    return true;
937
938  // Handle most integer flags
939  case LengthModifier::AsChar:
940  case LengthModifier::AsShort:
941  case LengthModifier::AsLongLong:
942  case LengthModifier::AsIntMax:
943  case LengthModifier::AsSizeT:
944  case LengthModifier::AsPtrDiff:
945    switch (CS.getKind()) {
946    case ConversionSpecifier::dArg:
947    case ConversionSpecifier::iArg:
948    case ConversionSpecifier::oArg:
949    case ConversionSpecifier::uArg:
950    case ConversionSpecifier::xArg:
951    case ConversionSpecifier::XArg:
952    case ConversionSpecifier::OutIntPtrArg:
953      return true;
954    default:
955      return false;
956    }
957
958  // Handle 'l' flag
959  case LengthModifier::AsLong:
960    switch (CS.getKind()) {
961    case ConversionSpecifier::dArg:
962    case ConversionSpecifier::iArg:
963    case ConversionSpecifier::oArg:
964    case ConversionSpecifier::uArg:
965    case ConversionSpecifier::xArg:
966    case ConversionSpecifier::XArg:
967    case ConversionSpecifier::aArg:
968    case ConversionSpecifier::AArg:
969    case ConversionSpecifier::fArg:
970    case ConversionSpecifier::FArg:
971    case ConversionSpecifier::eArg:
972    case ConversionSpecifier::EArg:
973    case ConversionSpecifier::gArg:
974    case ConversionSpecifier::GArg:
975    case ConversionSpecifier::OutIntPtrArg:
976    case ConversionSpecifier::IntAsCharArg:
977    case ConversionSpecifier::CStrArg:
978      return true;
979    default:
980      return false;
981    }
982
983  case LengthModifier::AsLongDouble:
984    switch (CS.getKind()) {
985    case ConversionSpecifier::aArg:
986    case ConversionSpecifier::AArg:
987    case ConversionSpecifier::fArg:
988    case ConversionSpecifier::FArg:
989    case ConversionSpecifier::eArg:
990    case ConversionSpecifier::EArg:
991    case ConversionSpecifier::gArg:
992    case ConversionSpecifier::GArg:
993      return true;
994    default:
995      return false;
996    }
997  }
998  return false;
999}
1000
1001bool FormatSpecifier::hasValidPrecision() const {
1002  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1003    return true;
1004
1005  // Precision is only valid with the diouxXaAeEfFgGs conversions
1006  switch (CS.getKind()) {
1007  case ConversionSpecifier::dArg:
1008  case ConversionSpecifier::iArg:
1009  case ConversionSpecifier::oArg:
1010  case ConversionSpecifier::uArg:
1011  case ConversionSpecifier::xArg:
1012  case ConversionSpecifier::XArg:
1013  case ConversionSpecifier::aArg:
1014  case ConversionSpecifier::AArg:
1015  case ConversionSpecifier::eArg:
1016  case ConversionSpecifier::EArg:
1017  case ConversionSpecifier::fArg:
1018  case ConversionSpecifier::FArg:
1019  case ConversionSpecifier::gArg:
1020  case ConversionSpecifier::GArg:
1021  case ConversionSpecifier::CStrArg:
1022    return true;
1023
1024  default:
1025    return false;
1026  }
1027}
1028bool FormatSpecifier::hasValidFieldWidth() const {
1029  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1030      return true;
1031
1032  // The field width is valid for all conversions except n
1033  switch (CS.getKind()) {
1034  case ConversionSpecifier::OutIntPtrArg:
1035    return false;
1036
1037  default:
1038    return true;
1039  }
1040}
1041