1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in printf and friends.  The structure of format
10// strings for fprintf() are described in C99 7.19.6.1.
11//
12//===----------------------------------------------------------------------===//
13
14#include "FormatStringParsing.h"
15#include "clang/AST/FormatString.h"
16#include "clang/AST/OSLog.h"
17#include "clang/Basic/TargetInfo.h"
18#include "llvm/Support/Regex.h"
19
20using clang::analyze_format_string::ArgType;
21using clang::analyze_format_string::FormatStringHandler;
22using clang::analyze_format_string::LengthModifier;
23using clang::analyze_format_string::OptionalAmount;
24using clang::analyze_format_string::ConversionSpecifier;
25using clang::analyze_printf::PrintfSpecifier;
26
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30        PrintfSpecifierResult;
31
32//===----------------------------------------------------------------------===//
33// Methods for parsing format strings.
34//===----------------------------------------------------------------------===//
35
36using analyze_format_string::ParseNonPositionAmount;
37
38static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39                           const char *Start, const char *&Beg, const char *E,
40                           unsigned *argIndex) {
41  if (argIndex) {
42    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43  } else {
44    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45                                           analyze_format_string::PrecisionPos);
46    if (Amt.isInvalid())
47      return true;
48    FS.setPrecision(Amt);
49  }
50  return false;
51}
52
53static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54                           const char *FlagBeg, const char *E, bool Warn) {
55   StringRef Flag(FlagBeg, E - FlagBeg);
56   // Currently there is only one flag.
57   if (Flag == "tt") {
58     FS.setHasObjCTechnicalTerm(FlagBeg);
59     return false;
60   }
61   // Handle either the case of no flag or an invalid flag.
62   if (Warn) {
63     if (Flag == "")
64       H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
65     else
66       H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
67   }
68   return true;
69}
70
71static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72                                                  const char *&Beg,
73                                                  const char *E,
74                                                  unsigned &argIndex,
75                                                  const LangOptions &LO,
76                                                  const TargetInfo &Target,
77                                                  bool Warn,
78                                                  bool isFreeBSDKPrintf) {
79
80  using namespace clang::analyze_format_string;
81  using namespace clang::analyze_printf;
82
83  const char *I = Beg;
84  const char *Start = nullptr;
85  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87  // Look for a '%' character that indicates the start of a format specifier.
88  for ( ; I != E ; ++I) {
89    char c = *I;
90    if (c == '\0') {
91      // Detect spurious null characters, which are likely errors.
92      H.HandleNullChar(I);
93      return true;
94    }
95    if (c == '%') {
96      Start = I++;  // Record the start of the format specifier.
97      break;
98    }
99  }
100
101  // No format specifier found?
102  if (!Start)
103    return false;
104
105  if (I == E) {
106    // No more characters left?
107    if (Warn)
108      H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  PrintfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114    return true;
115
116  if (I == E) {
117    // No more characters left?
118    if (Warn)
119      H.HandleIncompleteSpecifier(Start, E - Start);
120    return true;
121  }
122
123  if (*I == '{') {
124    ++I;
125    unsigned char PrivacyFlags = 0;
126    StringRef MatchedStr;
127
128    do {
129      StringRef Str(I, E - I);
130      std::string Match = "^[[:space:]]*"
131                          "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132                          "[[:space:]]*(,|})";
133      llvm::Regex R(Match);
134      SmallVector<StringRef, 2> Matches;
135
136      if (R.match(Str, &Matches)) {
137        MatchedStr = Matches[1];
138        I += Matches[0].size();
139
140        // Set the privacy flag if the privacy annotation in the
141        // comma-delimited segment is at least as strict as the privacy
142        // annotations in previous comma-delimited segments.
143        if (MatchedStr.startswith("mask")) {
144          StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
145          unsigned Size = MaskType.size();
146          if (Warn && (Size == 0 || Size > 8))
147            H.handleInvalidMaskType(MaskType);
148          FS.setMaskType(MaskType);
149        } else if (MatchedStr.equals("sensitive"))
150          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151        else if (PrivacyFlags !=
152                 clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153                 MatchedStr.equals("private"))
154          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155        else if (PrivacyFlags == 0 && MatchedStr.equals("public"))
156          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157      } else {
158        size_t CommaOrBracePos =
159            Str.find_if([](char c) { return c == ',' || c == '}'; });
160
161        if (CommaOrBracePos == StringRef::npos) {
162          // Neither a comma nor the closing brace was found.
163          if (Warn)
164            H.HandleIncompleteSpecifier(Start, E - Start);
165          return true;
166        }
167
168        I += CommaOrBracePos + 1;
169      }
170      // Continue until the closing brace is found.
171    } while (*(I - 1) == ',');
172
173    // Set the privacy flag.
174    switch (PrivacyFlags) {
175    case 0:
176      break;
177    case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178      FS.setIsPrivate(MatchedStr.data());
179      break;
180    case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181      FS.setIsPublic(MatchedStr.data());
182      break;
183    case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184      FS.setIsSensitive(MatchedStr.data());
185      break;
186    default:
187      llvm_unreachable("Unexpected privacy flag value");
188    }
189  }
190
191  // Look for flags (if any).
192  bool hasMore = true;
193  for ( ; I != E; ++I) {
194    switch (*I) {
195      default: hasMore = false; break;
196      case '\'':
197        // FIXME: POSIX specific.  Always accept?
198        FS.setHasThousandsGrouping(I);
199        break;
200      case '-': FS.setIsLeftJustified(I); break;
201      case '+': FS.setHasPlusPrefix(I); break;
202      case ' ': FS.setHasSpacePrefix(I); break;
203      case '#': FS.setHasAlternativeForm(I); break;
204      case '0': FS.setHasLeadingZeros(I); break;
205    }
206    if (!hasMore)
207      break;
208  }
209
210  if (I == E) {
211    // No more characters left?
212    if (Warn)
213      H.HandleIncompleteSpecifier(Start, E - Start);
214    return true;
215  }
216
217  // Look for the field width (if any).
218  if (ParseFieldWidth(H, FS, Start, I, E,
219                      FS.usesPositionalArg() ? nullptr : &argIndex))
220    return true;
221
222  if (I == E) {
223    // No more characters left?
224    if (Warn)
225      H.HandleIncompleteSpecifier(Start, E - Start);
226    return true;
227  }
228
229  // Look for the precision (if any).
230  if (*I == '.') {
231    ++I;
232    if (I == E) {
233      if (Warn)
234        H.HandleIncompleteSpecifier(Start, E - Start);
235      return true;
236    }
237
238    if (ParsePrecision(H, FS, Start, I, E,
239                       FS.usesPositionalArg() ? nullptr : &argIndex))
240      return true;
241
242    if (I == E) {
243      // No more characters left?
244      if (Warn)
245        H.HandleIncompleteSpecifier(Start, E - Start);
246      return true;
247    }
248  }
249
250  if (ParseVectorModifier(H, FS, I, E, LO))
251    return true;
252
253  // Look for the length modifier.
254  if (ParseLengthModifier(FS, I, E, LO) && I == E) {
255    // No more characters left?
256    if (Warn)
257      H.HandleIncompleteSpecifier(Start, E - Start);
258    return true;
259  }
260
261  // Look for the Objective-C modifier flags, if any.
262  // We parse these here, even if they don't apply to
263  // the conversion specifier, and then emit an error
264  // later if the conversion specifier isn't '@'.  This
265  // enables better recovery, and we don't know if
266  // these flags are applicable until later.
267  const char *ObjCModifierFlagsStart = nullptr,
268             *ObjCModifierFlagsEnd = nullptr;
269  if (*I == '[') {
270    ObjCModifierFlagsStart = I;
271    ++I;
272    auto flagStart = I;
273    for (;; ++I) {
274      ObjCModifierFlagsEnd = I;
275      if (I == E) {
276        if (Warn)
277          H.HandleIncompleteSpecifier(Start, E - Start);
278        return true;
279      }
280      // Did we find the closing ']'?
281      if (*I == ']') {
282        if (ParseObjCFlags(H, FS, flagStart, I, Warn))
283          return true;
284        ++I;
285        break;
286      }
287      // There are no separators defined yet for multiple
288      // Objective-C modifier flags.  When those are
289      // defined, this is the place to check.
290    }
291  }
292
293  if (*I == '\0') {
294    // Detect spurious null characters, which are likely errors.
295    H.HandleNullChar(I);
296    return true;
297  }
298
299  // Finally, look for the conversion specifier.
300  const char *conversionPosition = I++;
301  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302  switch (*conversionPosition) {
303    default:
304      break;
305    // C99: 7.19.6.1 (section 8).
306    case '%': k = ConversionSpecifier::PercentArg;   break;
307    case 'A': k = ConversionSpecifier::AArg; break;
308    case 'E': k = ConversionSpecifier::EArg; break;
309    case 'F': k = ConversionSpecifier::FArg; break;
310    case 'G': k = ConversionSpecifier::GArg; break;
311    case 'X': k = ConversionSpecifier::XArg; break;
312    case 'a': k = ConversionSpecifier::aArg; break;
313    case 'c': k = ConversionSpecifier::cArg; break;
314    case 'd': k = ConversionSpecifier::dArg; break;
315    case 'e': k = ConversionSpecifier::eArg; break;
316    case 'f': k = ConversionSpecifier::fArg; break;
317    case 'g': k = ConversionSpecifier::gArg; break;
318    case 'i': k = ConversionSpecifier::iArg; break;
319    case 'n':
320      // Not handled, but reserved in OpenCL and FreeBSD kernel.
321      if (!LO.OpenCL && !isFreeBSDKPrintf)
322        k = ConversionSpecifier::nArg;
323      break;
324    case 'o': k = ConversionSpecifier::oArg; break;
325    case 'p': k = ConversionSpecifier::pArg; break;
326    case 's': k = ConversionSpecifier::sArg; break;
327    case 'u': k = ConversionSpecifier::uArg; break;
328    case 'x': k = ConversionSpecifier::xArg; break;
329    // POSIX specific.
330    case 'C': k = ConversionSpecifier::CArg; break;
331    case 'S': k = ConversionSpecifier::SArg; break;
332    // Apple extension for os_log
333    case 'P':
334      k = ConversionSpecifier::PArg;
335      break;
336    // Objective-C.
337    case '@': k = ConversionSpecifier::ObjCObjArg; break;
338    // Glibc specific.
339    case 'm': k = ConversionSpecifier::PrintErrno; break;
340    // FreeBSD kernel specific.
341    case 'b':
342      if (isFreeBSDKPrintf)
343        k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
344      break;
345    case 'r':
346      if (isFreeBSDKPrintf)
347        k = ConversionSpecifier::FreeBSDrArg; // int
348      break;
349    case 'y':
350      if (isFreeBSDKPrintf)
351        k = ConversionSpecifier::FreeBSDyArg; // int
352      break;
353    // Apple-specific.
354    case 'D':
355      if (isFreeBSDKPrintf)
356        k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
357      else if (Target.getTriple().isOSDarwin())
358        k = ConversionSpecifier::DArg;
359      break;
360    case 'O':
361      if (Target.getTriple().isOSDarwin())
362        k = ConversionSpecifier::OArg;
363      break;
364    case 'U':
365      if (Target.getTriple().isOSDarwin())
366        k = ConversionSpecifier::UArg;
367      break;
368    // MS specific.
369    case 'Z':
370      if (Target.getTriple().isOSMSVCRT())
371        k = ConversionSpecifier::ZArg;
372      break;
373  }
374
375  // Check to see if we used the Objective-C modifier flags with
376  // a conversion specifier other than '@'.
377  if (k != ConversionSpecifier::ObjCObjArg &&
378      k != ConversionSpecifier::InvalidSpecifier &&
379      ObjCModifierFlagsStart) {
380    H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
381                                           ObjCModifierFlagsEnd + 1,
382                                           conversionPosition);
383    return true;
384  }
385
386  PrintfConversionSpecifier CS(conversionPosition, k);
387  FS.setConversionSpecifier(CS);
388  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
389    FS.setArgIndex(argIndex++);
390  // FreeBSD kernel specific.
391  if (k == ConversionSpecifier::FreeBSDbArg ||
392      k == ConversionSpecifier::FreeBSDDArg)
393    argIndex++;
394
395  if (k == ConversionSpecifier::InvalidSpecifier) {
396    unsigned Len = I - Start;
397    if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
398      CS.setEndScanList(Start + Len);
399      FS.setConversionSpecifier(CS);
400    }
401    // Assume the conversion takes one argument.
402    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
403  }
404  return PrintfSpecifierResult(Start, FS);
405}
406
407bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
408                                                     const char *I,
409                                                     const char *E,
410                                                     const LangOptions &LO,
411                                                     const TargetInfo &Target,
412                                                     bool isFreeBSDKPrintf) {
413
414  unsigned argIndex = 0;
415
416  // Keep looking for a format specifier until we have exhausted the string.
417  while (I != E) {
418    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
419                                                            LO, Target, true,
420                                                            isFreeBSDKPrintf);
421    // Did a fail-stop error of any kind occur when parsing the specifier?
422    // If so, don't do any more processing.
423    if (FSR.shouldStop())
424      return true;
425    // Did we exhaust the string or encounter an error that
426    // we can recover from?
427    if (!FSR.hasValue())
428      continue;
429    // We have a format specifier.  Pass it to the callback.
430    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
431                                 I - FSR.getStart()))
432      return true;
433  }
434  assert(I == E && "Format string not exhausted");
435  return false;
436}
437
438bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
439                                                            const char *E,
440                                                            const LangOptions &LO,
441                                                            const TargetInfo &Target) {
442
443  unsigned argIndex = 0;
444
445  // Keep looking for a %s format specifier until we have exhausted the string.
446  FormatStringHandler H;
447  while (I != E) {
448    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
449                                                            LO, Target, false,
450                                                            false);
451    // Did a fail-stop error of any kind occur when parsing the specifier?
452    // If so, don't do any more processing.
453    if (FSR.shouldStop())
454      return false;
455    // Did we exhaust the string or encounter an error that
456    // we can recover from?
457    if (!FSR.hasValue())
458      continue;
459    const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
460    // Return true if this a %s format specifier.
461    if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
462      return true;
463  }
464  return false;
465}
466
467bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
468    const char *Begin, const char *End, const LangOptions &LO,
469    const TargetInfo &Target) {
470  unsigned ArgIndex = 0;
471  // Keep looking for a formatting specifier until we have exhausted the string.
472  FormatStringHandler H;
473  while (Begin != End) {
474    const PrintfSpecifierResult &FSR =
475        ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
476    if (FSR.shouldStop())
477      break;
478    if (FSR.hasValue())
479      return true;
480  }
481  return false;
482}
483
484//===----------------------------------------------------------------------===//
485// Methods on PrintfSpecifier.
486//===----------------------------------------------------------------------===//
487
488ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
489                                          bool IsObjCLiteral) const {
490  if (CS.getKind() == ConversionSpecifier::cArg)
491    switch (LM.getKind()) {
492      case LengthModifier::None:
493        return Ctx.IntTy;
494      case LengthModifier::AsLong:
495      case LengthModifier::AsWide:
496        return ArgType(ArgType::WIntTy, "wint_t");
497      case LengthModifier::AsShort:
498        if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
499          return Ctx.IntTy;
500        LLVM_FALLTHROUGH;
501      default:
502        return ArgType::Invalid();
503    }
504
505  if (CS.isIntArg())
506    switch (LM.getKind()) {
507      case LengthModifier::AsLongDouble:
508        // GNU extension.
509        return Ctx.LongLongTy;
510      case LengthModifier::None:
511      case LengthModifier::AsShortLong:
512        return Ctx.IntTy;
513      case LengthModifier::AsInt32:
514        return ArgType(Ctx.IntTy, "__int32");
515      case LengthModifier::AsChar:
516        return ArgType::AnyCharTy;
517      case LengthModifier::AsShort: return Ctx.ShortTy;
518      case LengthModifier::AsLong: return Ctx.LongTy;
519      case LengthModifier::AsLongLong:
520      case LengthModifier::AsQuad:
521        return Ctx.LongLongTy;
522      case LengthModifier::AsInt64:
523        return ArgType(Ctx.LongLongTy, "__int64");
524      case LengthModifier::AsIntMax:
525        return ArgType(Ctx.getIntMaxType(), "intmax_t");
526      case LengthModifier::AsSizeT:
527        return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
528      case LengthModifier::AsInt3264:
529        return Ctx.getTargetInfo().getTriple().isArch64Bit()
530                   ? ArgType(Ctx.LongLongTy, "__int64")
531                   : ArgType(Ctx.IntTy, "__int32");
532      case LengthModifier::AsPtrDiff:
533        return ArgType::makePtrdiffT(
534            ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
535      case LengthModifier::AsAllocate:
536      case LengthModifier::AsMAllocate:
537      case LengthModifier::AsWide:
538        return ArgType::Invalid();
539    }
540
541  if (CS.isUIntArg())
542    switch (LM.getKind()) {
543      case LengthModifier::AsLongDouble:
544        // GNU extension.
545        return Ctx.UnsignedLongLongTy;
546      case LengthModifier::None:
547      case LengthModifier::AsShortLong:
548        return Ctx.UnsignedIntTy;
549      case LengthModifier::AsInt32:
550        return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
551      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
552      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
553      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
554      case LengthModifier::AsLongLong:
555      case LengthModifier::AsQuad:
556        return Ctx.UnsignedLongLongTy;
557      case LengthModifier::AsInt64:
558        return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
559      case LengthModifier::AsIntMax:
560        return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
561      case LengthModifier::AsSizeT:
562        return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
563      case LengthModifier::AsInt3264:
564        return Ctx.getTargetInfo().getTriple().isArch64Bit()
565                   ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
566                   : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
567      case LengthModifier::AsPtrDiff:
568        return ArgType::makePtrdiffT(
569            ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
570      case LengthModifier::AsAllocate:
571      case LengthModifier::AsMAllocate:
572      case LengthModifier::AsWide:
573        return ArgType::Invalid();
574    }
575
576  if (CS.isDoubleArg()) {
577    if (!VectorNumElts.isInvalid()) {
578      switch (LM.getKind()) {
579      case LengthModifier::AsShort:
580        return Ctx.HalfTy;
581      case LengthModifier::AsShortLong:
582        return Ctx.FloatTy;
583      case LengthModifier::AsLong:
584      default:
585        return Ctx.DoubleTy;
586      }
587    }
588
589    if (LM.getKind() == LengthModifier::AsLongDouble)
590      return Ctx.LongDoubleTy;
591    return Ctx.DoubleTy;
592  }
593
594  if (CS.getKind() == ConversionSpecifier::nArg) {
595    switch (LM.getKind()) {
596      case LengthModifier::None:
597        return ArgType::PtrTo(Ctx.IntTy);
598      case LengthModifier::AsChar:
599        return ArgType::PtrTo(Ctx.SignedCharTy);
600      case LengthModifier::AsShort:
601        return ArgType::PtrTo(Ctx.ShortTy);
602      case LengthModifier::AsLong:
603        return ArgType::PtrTo(Ctx.LongTy);
604      case LengthModifier::AsLongLong:
605      case LengthModifier::AsQuad:
606        return ArgType::PtrTo(Ctx.LongLongTy);
607      case LengthModifier::AsIntMax:
608        return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
609      case LengthModifier::AsSizeT:
610        return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
611      case LengthModifier::AsPtrDiff:
612        return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
613      case LengthModifier::AsLongDouble:
614        return ArgType(); // FIXME: Is this a known extension?
615      case LengthModifier::AsAllocate:
616      case LengthModifier::AsMAllocate:
617      case LengthModifier::AsInt32:
618      case LengthModifier::AsInt3264:
619      case LengthModifier::AsInt64:
620      case LengthModifier::AsWide:
621        return ArgType::Invalid();
622      case LengthModifier::AsShortLong:
623        llvm_unreachable("only used for OpenCL which doesn not handle nArg");
624    }
625  }
626
627  switch (CS.getKind()) {
628    case ConversionSpecifier::sArg:
629      if (LM.getKind() == LengthModifier::AsWideChar) {
630        if (IsObjCLiteral)
631          return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
632                         "const unichar *");
633        return ArgType(ArgType::WCStrTy, "wchar_t *");
634      }
635      if (LM.getKind() == LengthModifier::AsWide)
636        return ArgType(ArgType::WCStrTy, "wchar_t *");
637      return ArgType::CStrTy;
638    case ConversionSpecifier::SArg:
639      if (IsObjCLiteral)
640        return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
641                       "const unichar *");
642      if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
643          LM.getKind() == LengthModifier::AsShort)
644        return ArgType::CStrTy;
645      return ArgType(ArgType::WCStrTy, "wchar_t *");
646    case ConversionSpecifier::CArg:
647      if (IsObjCLiteral)
648        return ArgType(Ctx.UnsignedShortTy, "unichar");
649      if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
650          LM.getKind() == LengthModifier::AsShort)
651        return Ctx.IntTy;
652      return ArgType(Ctx.WideCharTy, "wchar_t");
653    case ConversionSpecifier::pArg:
654    case ConversionSpecifier::PArg:
655      return ArgType::CPointerTy;
656    case ConversionSpecifier::ObjCObjArg:
657      return ArgType::ObjCPointerTy;
658    default:
659      break;
660  }
661
662  // FIXME: Handle other cases.
663  return ArgType();
664}
665
666
667ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
668                                    bool IsObjCLiteral) const {
669  const PrintfConversionSpecifier &CS = getConversionSpecifier();
670
671  if (!CS.consumesDataArgument())
672    return ArgType::Invalid();
673
674  ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
675  if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
676    return ScalarTy;
677
678  return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
679}
680
681bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
682                              ASTContext &Ctx, bool IsObjCLiteral) {
683  // %n is different from other conversion specifiers; don't try to fix it.
684  if (CS.getKind() == ConversionSpecifier::nArg)
685    return false;
686
687  // Handle Objective-C objects first. Note that while the '%@' specifier will
688  // not warn for structure pointer or void pointer arguments (because that's
689  // how CoreFoundation objects are implemented), we only show a fixit for '%@'
690  // if we know it's an object (block, id, class, or __attribute__((NSObject))).
691  if (QT->isObjCRetainableType()) {
692    if (!IsObjCLiteral)
693      return false;
694
695    CS.setKind(ConversionSpecifier::ObjCObjArg);
696
697    // Disable irrelevant flags
698    HasThousandsGrouping = false;
699    HasPlusPrefix = false;
700    HasSpacePrefix = false;
701    HasAlternativeForm = false;
702    HasLeadingZeroes = false;
703    Precision.setHowSpecified(OptionalAmount::NotSpecified);
704    LM.setKind(LengthModifier::None);
705
706    return true;
707  }
708
709  // Handle strings next (char *, wchar_t *)
710  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
711    CS.setKind(ConversionSpecifier::sArg);
712
713    // Disable irrelevant flags
714    HasAlternativeForm = 0;
715    HasLeadingZeroes = 0;
716
717    // Set the long length modifier for wide characters
718    if (QT->getPointeeType()->isWideCharType())
719      LM.setKind(LengthModifier::AsWideChar);
720    else
721      LM.setKind(LengthModifier::None);
722
723    return true;
724  }
725
726  // If it's an enum, get its underlying type.
727  if (const EnumType *ETy = QT->getAs<EnumType>())
728    QT = ETy->getDecl()->getIntegerType();
729
730  const BuiltinType *BT = QT->getAs<BuiltinType>();
731  if (!BT) {
732    const VectorType *VT = QT->getAs<VectorType>();
733    if (VT) {
734      QT = VT->getElementType();
735      BT = QT->getAs<BuiltinType>();
736      VectorNumElts = OptionalAmount(VT->getNumElements());
737    }
738  }
739
740  // We can only work with builtin types.
741  if (!BT)
742    return false;
743
744  // Set length modifier
745  switch (BT->getKind()) {
746  case BuiltinType::Bool:
747  case BuiltinType::WChar_U:
748  case BuiltinType::WChar_S:
749  case BuiltinType::Char8: // FIXME: Treat like 'char'?
750  case BuiltinType::Char16:
751  case BuiltinType::Char32:
752  case BuiltinType::UInt128:
753  case BuiltinType::Int128:
754  case BuiltinType::Half:
755  case BuiltinType::BFloat16:
756  case BuiltinType::Float16:
757  case BuiltinType::Float128:
758  case BuiltinType::ShortAccum:
759  case BuiltinType::Accum:
760  case BuiltinType::LongAccum:
761  case BuiltinType::UShortAccum:
762  case BuiltinType::UAccum:
763  case BuiltinType::ULongAccum:
764  case BuiltinType::ShortFract:
765  case BuiltinType::Fract:
766  case BuiltinType::LongFract:
767  case BuiltinType::UShortFract:
768  case BuiltinType::UFract:
769  case BuiltinType::ULongFract:
770  case BuiltinType::SatShortAccum:
771  case BuiltinType::SatAccum:
772  case BuiltinType::SatLongAccum:
773  case BuiltinType::SatUShortAccum:
774  case BuiltinType::SatUAccum:
775  case BuiltinType::SatULongAccum:
776  case BuiltinType::SatShortFract:
777  case BuiltinType::SatFract:
778  case BuiltinType::SatLongFract:
779  case BuiltinType::SatUShortFract:
780  case BuiltinType::SatUFract:
781  case BuiltinType::SatULongFract:
782    // Various types which are non-trivial to correct.
783    return false;
784
785#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
786  case BuiltinType::Id:
787#include "clang/Basic/OpenCLImageTypes.def"
788#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
789  case BuiltinType::Id:
790#include "clang/Basic/OpenCLExtensionTypes.def"
791#define SVE_TYPE(Name, Id, SingletonId) \
792  case BuiltinType::Id:
793#include "clang/Basic/AArch64SVEACLETypes.def"
794#define SIGNED_TYPE(Id, SingletonId)
795#define UNSIGNED_TYPE(Id, SingletonId)
796#define FLOATING_TYPE(Id, SingletonId)
797#define BUILTIN_TYPE(Id, SingletonId) \
798  case BuiltinType::Id:
799#include "clang/AST/BuiltinTypes.def"
800    // Misc other stuff which doesn't make sense here.
801    return false;
802
803  case BuiltinType::UInt:
804  case BuiltinType::Int:
805  case BuiltinType::Float:
806    LM.setKind(VectorNumElts.isInvalid() ?
807               LengthModifier::None : LengthModifier::AsShortLong);
808    break;
809  case BuiltinType::Double:
810    LM.setKind(VectorNumElts.isInvalid() ?
811               LengthModifier::None : LengthModifier::AsLong);
812    break;
813  case BuiltinType::Char_U:
814  case BuiltinType::UChar:
815  case BuiltinType::Char_S:
816  case BuiltinType::SChar:
817    LM.setKind(LengthModifier::AsChar);
818    break;
819
820  case BuiltinType::Short:
821  case BuiltinType::UShort:
822    LM.setKind(LengthModifier::AsShort);
823    break;
824
825  case BuiltinType::Long:
826  case BuiltinType::ULong:
827    LM.setKind(LengthModifier::AsLong);
828    break;
829
830  case BuiltinType::LongLong:
831  case BuiltinType::ULongLong:
832    LM.setKind(LengthModifier::AsLongLong);
833    break;
834
835  case BuiltinType::LongDouble:
836    LM.setKind(LengthModifier::AsLongDouble);
837    break;
838  }
839
840  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
841  if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
842    namedTypeToLengthModifier(QT, LM);
843
844  // If fixing the length modifier was enough, we might be done.
845  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
846    // If we're going to offer a fix anyway, make sure the sign matches.
847    switch (CS.getKind()) {
848    case ConversionSpecifier::uArg:
849    case ConversionSpecifier::UArg:
850      if (QT->isSignedIntegerType())
851        CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
852      break;
853    case ConversionSpecifier::dArg:
854    case ConversionSpecifier::DArg:
855    case ConversionSpecifier::iArg:
856      if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
857        CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
858      break;
859    default:
860      // Other specifiers do not have signed/unsigned variants.
861      break;
862    }
863
864    const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
865    if (ATR.isValid() && ATR.matchesType(Ctx, QT))
866      return true;
867  }
868
869  // Set conversion specifier and disable any flags which do not apply to it.
870  // Let typedefs to char fall through to int, as %c is silly for uint8_t.
871  if (!isa<TypedefType>(QT) && QT->isCharType()) {
872    CS.setKind(ConversionSpecifier::cArg);
873    LM.setKind(LengthModifier::None);
874    Precision.setHowSpecified(OptionalAmount::NotSpecified);
875    HasAlternativeForm = 0;
876    HasLeadingZeroes = 0;
877    HasPlusPrefix = 0;
878  }
879  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
880  else if (QT->isRealFloatingType()) {
881    CS.setKind(ConversionSpecifier::fArg);
882  }
883  else if (QT->isSignedIntegerType()) {
884    CS.setKind(ConversionSpecifier::dArg);
885    HasAlternativeForm = 0;
886  }
887  else if (QT->isUnsignedIntegerType()) {
888    CS.setKind(ConversionSpecifier::uArg);
889    HasAlternativeForm = 0;
890    HasPlusPrefix = 0;
891  } else {
892    llvm_unreachable("Unexpected type");
893  }
894
895  return true;
896}
897
898void PrintfSpecifier::toString(raw_ostream &os) const {
899  // Whilst some features have no defined order, we are using the order
900  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
901  os << "%";
902
903  // Positional args
904  if (usesPositionalArg()) {
905    os << getPositionalArgIndex() << "$";
906  }
907
908  // Conversion flags
909  if (IsLeftJustified)    os << "-";
910  if (HasPlusPrefix)      os << "+";
911  if (HasSpacePrefix)     os << " ";
912  if (HasAlternativeForm) os << "#";
913  if (HasLeadingZeroes)   os << "0";
914
915  // Minimum field width
916  FieldWidth.toString(os);
917  // Precision
918  Precision.toString(os);
919
920  // Vector modifier
921  if (!VectorNumElts.isInvalid())
922    os << 'v' << VectorNumElts.getConstantAmount();
923
924  // Length modifier
925  os << LM.toString();
926  // Conversion specifier
927  os << CS.toString();
928}
929
930bool PrintfSpecifier::hasValidPlusPrefix() const {
931  if (!HasPlusPrefix)
932    return true;
933
934  // The plus prefix only makes sense for signed conversions
935  switch (CS.getKind()) {
936  case ConversionSpecifier::dArg:
937  case ConversionSpecifier::DArg:
938  case ConversionSpecifier::iArg:
939  case ConversionSpecifier::fArg:
940  case ConversionSpecifier::FArg:
941  case ConversionSpecifier::eArg:
942  case ConversionSpecifier::EArg:
943  case ConversionSpecifier::gArg:
944  case ConversionSpecifier::GArg:
945  case ConversionSpecifier::aArg:
946  case ConversionSpecifier::AArg:
947  case ConversionSpecifier::FreeBSDrArg:
948  case ConversionSpecifier::FreeBSDyArg:
949    return true;
950
951  default:
952    return false;
953  }
954}
955
956bool PrintfSpecifier::hasValidAlternativeForm() const {
957  if (!HasAlternativeForm)
958    return true;
959
960  // Alternate form flag only valid with the oxXaAeEfFgG conversions
961  switch (CS.getKind()) {
962  case ConversionSpecifier::oArg:
963  case ConversionSpecifier::OArg:
964  case ConversionSpecifier::xArg:
965  case ConversionSpecifier::XArg:
966  case ConversionSpecifier::aArg:
967  case ConversionSpecifier::AArg:
968  case ConversionSpecifier::eArg:
969  case ConversionSpecifier::EArg:
970  case ConversionSpecifier::fArg:
971  case ConversionSpecifier::FArg:
972  case ConversionSpecifier::gArg:
973  case ConversionSpecifier::GArg:
974  case ConversionSpecifier::FreeBSDrArg:
975  case ConversionSpecifier::FreeBSDyArg:
976    return true;
977
978  default:
979    return false;
980  }
981}
982
983bool PrintfSpecifier::hasValidLeadingZeros() const {
984  if (!HasLeadingZeroes)
985    return true;
986
987  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
988  switch (CS.getKind()) {
989  case ConversionSpecifier::dArg:
990  case ConversionSpecifier::DArg:
991  case ConversionSpecifier::iArg:
992  case ConversionSpecifier::oArg:
993  case ConversionSpecifier::OArg:
994  case ConversionSpecifier::uArg:
995  case ConversionSpecifier::UArg:
996  case ConversionSpecifier::xArg:
997  case ConversionSpecifier::XArg:
998  case ConversionSpecifier::aArg:
999  case ConversionSpecifier::AArg:
1000  case ConversionSpecifier::eArg:
1001  case ConversionSpecifier::EArg:
1002  case ConversionSpecifier::fArg:
1003  case ConversionSpecifier::FArg:
1004  case ConversionSpecifier::gArg:
1005  case ConversionSpecifier::GArg:
1006  case ConversionSpecifier::FreeBSDrArg:
1007  case ConversionSpecifier::FreeBSDyArg:
1008    return true;
1009
1010  default:
1011    return false;
1012  }
1013}
1014
1015bool PrintfSpecifier::hasValidSpacePrefix() const {
1016  if (!HasSpacePrefix)
1017    return true;
1018
1019  // The space prefix only makes sense for signed conversions
1020  switch (CS.getKind()) {
1021  case ConversionSpecifier::dArg:
1022  case ConversionSpecifier::DArg:
1023  case ConversionSpecifier::iArg:
1024  case ConversionSpecifier::fArg:
1025  case ConversionSpecifier::FArg:
1026  case ConversionSpecifier::eArg:
1027  case ConversionSpecifier::EArg:
1028  case ConversionSpecifier::gArg:
1029  case ConversionSpecifier::GArg:
1030  case ConversionSpecifier::aArg:
1031  case ConversionSpecifier::AArg:
1032  case ConversionSpecifier::FreeBSDrArg:
1033  case ConversionSpecifier::FreeBSDyArg:
1034    return true;
1035
1036  default:
1037    return false;
1038  }
1039}
1040
1041bool PrintfSpecifier::hasValidLeftJustified() const {
1042  if (!IsLeftJustified)
1043    return true;
1044
1045  // The left justified flag is valid for all conversions except n
1046  switch (CS.getKind()) {
1047  case ConversionSpecifier::nArg:
1048    return false;
1049
1050  default:
1051    return true;
1052  }
1053}
1054
1055bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1056  if (!HasThousandsGrouping)
1057    return true;
1058
1059  switch (CS.getKind()) {
1060    case ConversionSpecifier::dArg:
1061    case ConversionSpecifier::DArg:
1062    case ConversionSpecifier::iArg:
1063    case ConversionSpecifier::uArg:
1064    case ConversionSpecifier::UArg:
1065    case ConversionSpecifier::fArg:
1066    case ConversionSpecifier::FArg:
1067    case ConversionSpecifier::gArg:
1068    case ConversionSpecifier::GArg:
1069      return true;
1070    default:
1071      return false;
1072  }
1073}
1074
1075bool PrintfSpecifier::hasValidPrecision() const {
1076  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1077    return true;
1078
1079  // Precision is only valid with the diouxXaAeEfFgGsP conversions
1080  switch (CS.getKind()) {
1081  case ConversionSpecifier::dArg:
1082  case ConversionSpecifier::DArg:
1083  case ConversionSpecifier::iArg:
1084  case ConversionSpecifier::oArg:
1085  case ConversionSpecifier::OArg:
1086  case ConversionSpecifier::uArg:
1087  case ConversionSpecifier::UArg:
1088  case ConversionSpecifier::xArg:
1089  case ConversionSpecifier::XArg:
1090  case ConversionSpecifier::aArg:
1091  case ConversionSpecifier::AArg:
1092  case ConversionSpecifier::eArg:
1093  case ConversionSpecifier::EArg:
1094  case ConversionSpecifier::fArg:
1095  case ConversionSpecifier::FArg:
1096  case ConversionSpecifier::gArg:
1097  case ConversionSpecifier::GArg:
1098  case ConversionSpecifier::sArg:
1099  case ConversionSpecifier::FreeBSDrArg:
1100  case ConversionSpecifier::FreeBSDyArg:
1101  case ConversionSpecifier::PArg:
1102    return true;
1103
1104  default:
1105    return false;
1106  }
1107}
1108bool PrintfSpecifier::hasValidFieldWidth() const {
1109  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1110      return true;
1111
1112  // The field width is valid for all conversions except n
1113  switch (CS.getKind()) {
1114  case ConversionSpecifier::nArg:
1115    return false;
1116
1117  default:
1118    return true;
1119  }
1120}
1121