1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in printf and friends.  The structure of format
10// strings for fprintf() are described in C99 7.19.6.1.
11//
12//===----------------------------------------------------------------------===//
13
14#include "FormatStringParsing.h"
15#include "clang/AST/FormatString.h"
16#include "clang/AST/OSLog.h"
17#include "clang/Basic/TargetInfo.h"
18#include "llvm/Support/Regex.h"
19
20using clang::analyze_format_string::ArgType;
21using clang::analyze_format_string::FormatStringHandler;
22using clang::analyze_format_string::LengthModifier;
23using clang::analyze_format_string::OptionalAmount;
24using clang::analyze_format_string::ConversionSpecifier;
25using clang::analyze_printf::PrintfSpecifier;
26
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30        PrintfSpecifierResult;
31
32//===----------------------------------------------------------------------===//
33// Methods for parsing format strings.
34//===----------------------------------------------------------------------===//
35
36using analyze_format_string::ParseNonPositionAmount;
37
38static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39                           const char *Start, const char *&Beg, const char *E,
40                           unsigned *argIndex) {
41  if (argIndex) {
42    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43  } else {
44    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45                                           analyze_format_string::PrecisionPos);
46    if (Amt.isInvalid())
47      return true;
48    FS.setPrecision(Amt);
49  }
50  return false;
51}
52
53static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54                           const char *FlagBeg, const char *E, bool Warn) {
55   StringRef Flag(FlagBeg, E - FlagBeg);
56   // Currently there is only one flag.
57   if (Flag == "tt") {
58     FS.setHasObjCTechnicalTerm(FlagBeg);
59     return false;
60   }
61   // Handle either the case of no flag or an invalid flag.
62   if (Warn) {
63     if (Flag == "")
64       H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
65     else
66       H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
67   }
68   return true;
69}
70
71static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72                                                  const char *&Beg,
73                                                  const char *E,
74                                                  unsigned &argIndex,
75                                                  const LangOptions &LO,
76                                                  const TargetInfo &Target,
77                                                  bool Warn,
78                                                  bool isFreeBSDKPrintf) {
79
80  using namespace clang::analyze_format_string;
81  using namespace clang::analyze_printf;
82
83  const char *I = Beg;
84  const char *Start = nullptr;
85  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87  // Look for a '%' character that indicates the start of a format specifier.
88  for ( ; I != E ; ++I) {
89    char c = *I;
90    if (c == '\0') {
91      // Detect spurious null characters, which are likely errors.
92      H.HandleNullChar(I);
93      return true;
94    }
95    if (c == '%') {
96      Start = I++;  // Record the start of the format specifier.
97      break;
98    }
99  }
100
101  // No format specifier found?
102  if (!Start)
103    return false;
104
105  if (I == E) {
106    // No more characters left?
107    if (Warn)
108      H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  PrintfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114    return true;
115
116  if (I == E) {
117    // No more characters left?
118    if (Warn)
119      H.HandleIncompleteSpecifier(Start, E - Start);
120    return true;
121  }
122
123  if (*I == '{') {
124    ++I;
125    unsigned char PrivacyFlags = 0;
126    StringRef MatchedStr;
127
128    do {
129      StringRef Str(I, E - I);
130      std::string Match = "^[[:space:]]*"
131                          "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132                          "[[:space:]]*(,|})";
133      llvm::Regex R(Match);
134      SmallVector<StringRef, 2> Matches;
135
136      if (R.match(Str, &Matches)) {
137        MatchedStr = Matches[1];
138        I += Matches[0].size();
139
140        // Set the privacy flag if the privacy annotation in the
141        // comma-delimited segment is at least as strict as the privacy
142        // annotations in previous comma-delimited segments.
143        if (MatchedStr.starts_with("mask")) {
144          StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
145          unsigned Size = MaskType.size();
146          if (Warn && (Size == 0 || Size > 8))
147            H.handleInvalidMaskType(MaskType);
148          FS.setMaskType(MaskType);
149        } else if (MatchedStr.equals("sensitive"))
150          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151        else if (PrivacyFlags !=
152                 clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153                 MatchedStr.equals("private"))
154          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155        else if (PrivacyFlags == 0 && MatchedStr.equals("public"))
156          PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157      } else {
158        size_t CommaOrBracePos =
159            Str.find_if([](char c) { return c == ',' || c == '}'; });
160
161        if (CommaOrBracePos == StringRef::npos) {
162          // Neither a comma nor the closing brace was found.
163          if (Warn)
164            H.HandleIncompleteSpecifier(Start, E - Start);
165          return true;
166        }
167
168        I += CommaOrBracePos + 1;
169      }
170      // Continue until the closing brace is found.
171    } while (*(I - 1) == ',');
172
173    // Set the privacy flag.
174    switch (PrivacyFlags) {
175    case 0:
176      break;
177    case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178      FS.setIsPrivate(MatchedStr.data());
179      break;
180    case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181      FS.setIsPublic(MatchedStr.data());
182      break;
183    case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184      FS.setIsSensitive(MatchedStr.data());
185      break;
186    default:
187      llvm_unreachable("Unexpected privacy flag value");
188    }
189  }
190
191  // Look for flags (if any).
192  bool hasMore = true;
193  for ( ; I != E; ++I) {
194    switch (*I) {
195      default: hasMore = false; break;
196      case '\'':
197        // FIXME: POSIX specific.  Always accept?
198        FS.setHasThousandsGrouping(I);
199        break;
200      case '-': FS.setIsLeftJustified(I); break;
201      case '+': FS.setHasPlusPrefix(I); break;
202      case ' ': FS.setHasSpacePrefix(I); break;
203      case '#': FS.setHasAlternativeForm(I); break;
204      case '0': FS.setHasLeadingZeros(I); break;
205    }
206    if (!hasMore)
207      break;
208  }
209
210  if (I == E) {
211    // No more characters left?
212    if (Warn)
213      H.HandleIncompleteSpecifier(Start, E - Start);
214    return true;
215  }
216
217  // Look for the field width (if any).
218  if (ParseFieldWidth(H, FS, Start, I, E,
219                      FS.usesPositionalArg() ? nullptr : &argIndex))
220    return true;
221
222  if (I == E) {
223    // No more characters left?
224    if (Warn)
225      H.HandleIncompleteSpecifier(Start, E - Start);
226    return true;
227  }
228
229  // Look for the precision (if any).
230  if (*I == '.') {
231    ++I;
232    if (I == E) {
233      if (Warn)
234        H.HandleIncompleteSpecifier(Start, E - Start);
235      return true;
236    }
237
238    if (ParsePrecision(H, FS, Start, I, E,
239                       FS.usesPositionalArg() ? nullptr : &argIndex))
240      return true;
241
242    if (I == E) {
243      // No more characters left?
244      if (Warn)
245        H.HandleIncompleteSpecifier(Start, E - Start);
246      return true;
247    }
248  }
249
250  if (ParseVectorModifier(H, FS, I, E, LO))
251    return true;
252
253  // Look for the length modifier.
254  if (ParseLengthModifier(FS, I, E, LO) && I == E) {
255    // No more characters left?
256    if (Warn)
257      H.HandleIncompleteSpecifier(Start, E - Start);
258    return true;
259  }
260
261  // Look for the Objective-C modifier flags, if any.
262  // We parse these here, even if they don't apply to
263  // the conversion specifier, and then emit an error
264  // later if the conversion specifier isn't '@'.  This
265  // enables better recovery, and we don't know if
266  // these flags are applicable until later.
267  const char *ObjCModifierFlagsStart = nullptr,
268             *ObjCModifierFlagsEnd = nullptr;
269  if (*I == '[') {
270    ObjCModifierFlagsStart = I;
271    ++I;
272    auto flagStart = I;
273    for (;; ++I) {
274      ObjCModifierFlagsEnd = I;
275      if (I == E) {
276        if (Warn)
277          H.HandleIncompleteSpecifier(Start, E - Start);
278        return true;
279      }
280      // Did we find the closing ']'?
281      if (*I == ']') {
282        if (ParseObjCFlags(H, FS, flagStart, I, Warn))
283          return true;
284        ++I;
285        break;
286      }
287      // There are no separators defined yet for multiple
288      // Objective-C modifier flags.  When those are
289      // defined, this is the place to check.
290    }
291  }
292
293  if (*I == '\0') {
294    // Detect spurious null characters, which are likely errors.
295    H.HandleNullChar(I);
296    return true;
297  }
298
299  // Finally, look for the conversion specifier.
300  const char *conversionPosition = I++;
301  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302  switch (*conversionPosition) {
303    default:
304      break;
305    // C99: 7.19.6.1 (section 8).
306    case '%': k = ConversionSpecifier::PercentArg;   break;
307    case 'A': k = ConversionSpecifier::AArg; break;
308    case 'E': k = ConversionSpecifier::EArg; break;
309    case 'F': k = ConversionSpecifier::FArg; break;
310    case 'G': k = ConversionSpecifier::GArg; break;
311    case 'X': k = ConversionSpecifier::XArg; break;
312    case 'a': k = ConversionSpecifier::aArg; break;
313    case 'c': k = ConversionSpecifier::cArg; break;
314    case 'd': k = ConversionSpecifier::dArg; break;
315    case 'e': k = ConversionSpecifier::eArg; break;
316    case 'f': k = ConversionSpecifier::fArg; break;
317    case 'g': k = ConversionSpecifier::gArg; break;
318    case 'i': k = ConversionSpecifier::iArg; break;
319    case 'n':
320      // Not handled, but reserved in OpenCL and FreeBSD kernel.
321      if (!LO.OpenCL && !isFreeBSDKPrintf)
322        k = ConversionSpecifier::nArg;
323      break;
324    case 'o': k = ConversionSpecifier::oArg; break;
325    case 'p': k = ConversionSpecifier::pArg; break;
326    case 's': k = ConversionSpecifier::sArg; break;
327    case 'u': k = ConversionSpecifier::uArg; break;
328    case 'x': k = ConversionSpecifier::xArg; break;
329    // C23.
330    case 'b':
331      if (isFreeBSDKPrintf)
332        k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
333      else
334        k = ConversionSpecifier::bArg;
335      break;
336    case 'B': k = ConversionSpecifier::BArg; break;
337    // POSIX specific.
338    case 'C': k = ConversionSpecifier::CArg; break;
339    case 'S': k = ConversionSpecifier::SArg; break;
340    // Apple extension for os_log
341    case 'P':
342      k = ConversionSpecifier::PArg;
343      break;
344    // Objective-C.
345    case '@': k = ConversionSpecifier::ObjCObjArg; break;
346    // Glibc specific.
347    case 'm': k = ConversionSpecifier::PrintErrno; break;
348    case 'r':
349      if (isFreeBSDKPrintf)
350        k = ConversionSpecifier::FreeBSDrArg; // int
351      break;
352    case 'y':
353      if (isFreeBSDKPrintf)
354        k = ConversionSpecifier::FreeBSDyArg; // int
355      break;
356    // Apple-specific.
357    case 'D':
358      if (isFreeBSDKPrintf)
359        k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
360      else if (Target.getTriple().isOSDarwin())
361        k = ConversionSpecifier::DArg;
362      break;
363    case 'O':
364      if (Target.getTriple().isOSDarwin())
365        k = ConversionSpecifier::OArg;
366      break;
367    case 'U':
368      if (Target.getTriple().isOSDarwin())
369        k = ConversionSpecifier::UArg;
370      break;
371    // MS specific.
372    case 'Z':
373      if (Target.getTriple().isOSMSVCRT())
374        k = ConversionSpecifier::ZArg;
375      break;
376  }
377
378  // Check to see if we used the Objective-C modifier flags with
379  // a conversion specifier other than '@'.
380  if (k != ConversionSpecifier::ObjCObjArg &&
381      k != ConversionSpecifier::InvalidSpecifier &&
382      ObjCModifierFlagsStart) {
383    H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
384                                           ObjCModifierFlagsEnd + 1,
385                                           conversionPosition);
386    return true;
387  }
388
389  PrintfConversionSpecifier CS(conversionPosition, k);
390  FS.setConversionSpecifier(CS);
391  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
392    FS.setArgIndex(argIndex++);
393  // FreeBSD kernel specific.
394  if (k == ConversionSpecifier::FreeBSDbArg ||
395      k == ConversionSpecifier::FreeBSDDArg)
396    argIndex++;
397
398  if (k == ConversionSpecifier::InvalidSpecifier) {
399    unsigned Len = I - Start;
400    if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
401      CS.setEndScanList(Start + Len);
402      FS.setConversionSpecifier(CS);
403    }
404    // Assume the conversion takes one argument.
405    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
406  }
407  return PrintfSpecifierResult(Start, FS);
408}
409
410bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
411                                                     const char *I,
412                                                     const char *E,
413                                                     const LangOptions &LO,
414                                                     const TargetInfo &Target,
415                                                     bool isFreeBSDKPrintf) {
416
417  unsigned argIndex = 0;
418
419  // Keep looking for a format specifier until we have exhausted the string.
420  while (I != E) {
421    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
422                                                            LO, Target, true,
423                                                            isFreeBSDKPrintf);
424    // Did a fail-stop error of any kind occur when parsing the specifier?
425    // If so, don't do any more processing.
426    if (FSR.shouldStop())
427      return true;
428    // Did we exhaust the string or encounter an error that
429    // we can recover from?
430    if (!FSR.hasValue())
431      continue;
432    // We have a format specifier.  Pass it to the callback.
433    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
434                                 I - FSR.getStart(), Target))
435      return true;
436  }
437  assert(I == E && "Format string not exhausted");
438  return false;
439}
440
441bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
442                                                            const char *E,
443                                                            const LangOptions &LO,
444                                                            const TargetInfo &Target) {
445
446  unsigned argIndex = 0;
447
448  // Keep looking for a %s format specifier until we have exhausted the string.
449  FormatStringHandler H;
450  while (I != E) {
451    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
452                                                            LO, Target, false,
453                                                            false);
454    // Did a fail-stop error of any kind occur when parsing the specifier?
455    // If so, don't do any more processing.
456    if (FSR.shouldStop())
457      return false;
458    // Did we exhaust the string or encounter an error that
459    // we can recover from?
460    if (!FSR.hasValue())
461      continue;
462    const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
463    // Return true if this a %s format specifier.
464    if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
465      return true;
466  }
467  return false;
468}
469
470bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
471    const char *Begin, const char *End, const LangOptions &LO,
472    const TargetInfo &Target) {
473  unsigned ArgIndex = 0;
474  // Keep looking for a formatting specifier until we have exhausted the string.
475  FormatStringHandler H;
476  while (Begin != End) {
477    const PrintfSpecifierResult &FSR =
478        ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
479    if (FSR.shouldStop())
480      break;
481    if (FSR.hasValue())
482      return true;
483  }
484  return false;
485}
486
487//===----------------------------------------------------------------------===//
488// Methods on PrintfSpecifier.
489//===----------------------------------------------------------------------===//
490
491ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
492                                          bool IsObjCLiteral) const {
493  if (CS.getKind() == ConversionSpecifier::cArg)
494    switch (LM.getKind()) {
495      case LengthModifier::None:
496        return Ctx.IntTy;
497      case LengthModifier::AsLong:
498      case LengthModifier::AsWide:
499        return ArgType(ArgType::WIntTy, "wint_t");
500      case LengthModifier::AsShort:
501        if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
502          return Ctx.IntTy;
503        [[fallthrough]];
504      default:
505        return ArgType::Invalid();
506    }
507
508  if (CS.isIntArg())
509    switch (LM.getKind()) {
510      case LengthModifier::AsLongDouble:
511        // GNU extension.
512        return Ctx.LongLongTy;
513      case LengthModifier::None:
514      case LengthModifier::AsShortLong:
515        return Ctx.IntTy;
516      case LengthModifier::AsInt32:
517        return ArgType(Ctx.IntTy, "__int32");
518      case LengthModifier::AsChar:
519        return ArgType::AnyCharTy;
520      case LengthModifier::AsShort: return Ctx.ShortTy;
521      case LengthModifier::AsLong: return Ctx.LongTy;
522      case LengthModifier::AsLongLong:
523      case LengthModifier::AsQuad:
524        return Ctx.LongLongTy;
525      case LengthModifier::AsInt64:
526        return ArgType(Ctx.LongLongTy, "__int64");
527      case LengthModifier::AsIntMax:
528        return ArgType(Ctx.getIntMaxType(), "intmax_t");
529      case LengthModifier::AsSizeT:
530        return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
531      case LengthModifier::AsInt3264:
532        return Ctx.getTargetInfo().getTriple().isArch64Bit()
533                   ? ArgType(Ctx.LongLongTy, "__int64")
534                   : ArgType(Ctx.IntTy, "__int32");
535      case LengthModifier::AsPtrDiff:
536        return ArgType::makePtrdiffT(
537            ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
538      case LengthModifier::AsAllocate:
539      case LengthModifier::AsMAllocate:
540      case LengthModifier::AsWide:
541        return ArgType::Invalid();
542    }
543
544  if (CS.isUIntArg())
545    switch (LM.getKind()) {
546      case LengthModifier::AsLongDouble:
547        // GNU extension.
548        return Ctx.UnsignedLongLongTy;
549      case LengthModifier::None:
550      case LengthModifier::AsShortLong:
551        return Ctx.UnsignedIntTy;
552      case LengthModifier::AsInt32:
553        return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
554      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
555      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
556      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
557      case LengthModifier::AsLongLong:
558      case LengthModifier::AsQuad:
559        return Ctx.UnsignedLongLongTy;
560      case LengthModifier::AsInt64:
561        return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
562      case LengthModifier::AsIntMax:
563        return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
564      case LengthModifier::AsSizeT:
565        return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
566      case LengthModifier::AsInt3264:
567        return Ctx.getTargetInfo().getTriple().isArch64Bit()
568                   ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
569                   : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
570      case LengthModifier::AsPtrDiff:
571        return ArgType::makePtrdiffT(
572            ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
573      case LengthModifier::AsAllocate:
574      case LengthModifier::AsMAllocate:
575      case LengthModifier::AsWide:
576        return ArgType::Invalid();
577    }
578
579  if (CS.isDoubleArg()) {
580    if (!VectorNumElts.isInvalid()) {
581      switch (LM.getKind()) {
582      case LengthModifier::AsShort:
583        return Ctx.HalfTy;
584      case LengthModifier::AsShortLong:
585        return Ctx.FloatTy;
586      case LengthModifier::AsLong:
587      default:
588        return Ctx.DoubleTy;
589      }
590    }
591
592    if (LM.getKind() == LengthModifier::AsLongDouble)
593      return Ctx.LongDoubleTy;
594    return Ctx.DoubleTy;
595  }
596
597  if (CS.getKind() == ConversionSpecifier::nArg) {
598    switch (LM.getKind()) {
599      case LengthModifier::None:
600        return ArgType::PtrTo(Ctx.IntTy);
601      case LengthModifier::AsChar:
602        return ArgType::PtrTo(Ctx.SignedCharTy);
603      case LengthModifier::AsShort:
604        return ArgType::PtrTo(Ctx.ShortTy);
605      case LengthModifier::AsLong:
606        return ArgType::PtrTo(Ctx.LongTy);
607      case LengthModifier::AsLongLong:
608      case LengthModifier::AsQuad:
609        return ArgType::PtrTo(Ctx.LongLongTy);
610      case LengthModifier::AsIntMax:
611        return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
612      case LengthModifier::AsSizeT:
613        return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
614      case LengthModifier::AsPtrDiff:
615        return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
616      case LengthModifier::AsLongDouble:
617        return ArgType(); // FIXME: Is this a known extension?
618      case LengthModifier::AsAllocate:
619      case LengthModifier::AsMAllocate:
620      case LengthModifier::AsInt32:
621      case LengthModifier::AsInt3264:
622      case LengthModifier::AsInt64:
623      case LengthModifier::AsWide:
624        return ArgType::Invalid();
625      case LengthModifier::AsShortLong:
626        llvm_unreachable("only used for OpenCL which doesn not handle nArg");
627    }
628  }
629
630  switch (CS.getKind()) {
631    case ConversionSpecifier::sArg:
632      if (LM.getKind() == LengthModifier::AsWideChar) {
633        if (IsObjCLiteral)
634          return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
635                         "const unichar *");
636        return ArgType(ArgType::WCStrTy, "wchar_t *");
637      }
638      if (LM.getKind() == LengthModifier::AsWide)
639        return ArgType(ArgType::WCStrTy, "wchar_t *");
640      return ArgType::CStrTy;
641    case ConversionSpecifier::SArg:
642      if (IsObjCLiteral)
643        return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
644                       "const unichar *");
645      if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
646          LM.getKind() == LengthModifier::AsShort)
647        return ArgType::CStrTy;
648      return ArgType(ArgType::WCStrTy, "wchar_t *");
649    case ConversionSpecifier::CArg:
650      if (IsObjCLiteral)
651        return ArgType(Ctx.UnsignedShortTy, "unichar");
652      if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
653          LM.getKind() == LengthModifier::AsShort)
654        return Ctx.IntTy;
655      return ArgType(Ctx.WideCharTy, "wchar_t");
656    case ConversionSpecifier::pArg:
657    case ConversionSpecifier::PArg:
658      return ArgType::CPointerTy;
659    case ConversionSpecifier::ObjCObjArg:
660      return ArgType::ObjCPointerTy;
661    default:
662      break;
663  }
664
665  // FIXME: Handle other cases.
666  return ArgType();
667}
668
669
670ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
671                                    bool IsObjCLiteral) const {
672  const PrintfConversionSpecifier &CS = getConversionSpecifier();
673
674  if (!CS.consumesDataArgument())
675    return ArgType::Invalid();
676
677  ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
678  if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
679    return ScalarTy;
680
681  return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
682}
683
684bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
685                              ASTContext &Ctx, bool IsObjCLiteral) {
686  // %n is different from other conversion specifiers; don't try to fix it.
687  if (CS.getKind() == ConversionSpecifier::nArg)
688    return false;
689
690  // Handle Objective-C objects first. Note that while the '%@' specifier will
691  // not warn for structure pointer or void pointer arguments (because that's
692  // how CoreFoundation objects are implemented), we only show a fixit for '%@'
693  // if we know it's an object (block, id, class, or __attribute__((NSObject))).
694  if (QT->isObjCRetainableType()) {
695    if (!IsObjCLiteral)
696      return false;
697
698    CS.setKind(ConversionSpecifier::ObjCObjArg);
699
700    // Disable irrelevant flags
701    HasThousandsGrouping = false;
702    HasPlusPrefix = false;
703    HasSpacePrefix = false;
704    HasAlternativeForm = false;
705    HasLeadingZeroes = false;
706    Precision.setHowSpecified(OptionalAmount::NotSpecified);
707    LM.setKind(LengthModifier::None);
708
709    return true;
710  }
711
712  // Handle strings next (char *, wchar_t *)
713  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
714    CS.setKind(ConversionSpecifier::sArg);
715
716    // Disable irrelevant flags
717    HasAlternativeForm = false;
718    HasLeadingZeroes = false;
719
720    // Set the long length modifier for wide characters
721    if (QT->getPointeeType()->isWideCharType())
722      LM.setKind(LengthModifier::AsWideChar);
723    else
724      LM.setKind(LengthModifier::None);
725
726    return true;
727  }
728
729  // If it's an enum, get its underlying type.
730  if (const EnumType *ETy = QT->getAs<EnumType>())
731    QT = ETy->getDecl()->getIntegerType();
732
733  const BuiltinType *BT = QT->getAs<BuiltinType>();
734  if (!BT) {
735    const VectorType *VT = QT->getAs<VectorType>();
736    if (VT) {
737      QT = VT->getElementType();
738      BT = QT->getAs<BuiltinType>();
739      VectorNumElts = OptionalAmount(VT->getNumElements());
740    }
741  }
742
743  // We can only work with builtin types.
744  if (!BT)
745    return false;
746
747  // Set length modifier
748  switch (BT->getKind()) {
749  case BuiltinType::Bool:
750  case BuiltinType::WChar_U:
751  case BuiltinType::WChar_S:
752  case BuiltinType::Char8: // FIXME: Treat like 'char'?
753  case BuiltinType::Char16:
754  case BuiltinType::Char32:
755  case BuiltinType::UInt128:
756  case BuiltinType::Int128:
757  case BuiltinType::Half:
758  case BuiltinType::BFloat16:
759  case BuiltinType::Float16:
760  case BuiltinType::Float128:
761  case BuiltinType::Ibm128:
762  case BuiltinType::ShortAccum:
763  case BuiltinType::Accum:
764  case BuiltinType::LongAccum:
765  case BuiltinType::UShortAccum:
766  case BuiltinType::UAccum:
767  case BuiltinType::ULongAccum:
768  case BuiltinType::ShortFract:
769  case BuiltinType::Fract:
770  case BuiltinType::LongFract:
771  case BuiltinType::UShortFract:
772  case BuiltinType::UFract:
773  case BuiltinType::ULongFract:
774  case BuiltinType::SatShortAccum:
775  case BuiltinType::SatAccum:
776  case BuiltinType::SatLongAccum:
777  case BuiltinType::SatUShortAccum:
778  case BuiltinType::SatUAccum:
779  case BuiltinType::SatULongAccum:
780  case BuiltinType::SatShortFract:
781  case BuiltinType::SatFract:
782  case BuiltinType::SatLongFract:
783  case BuiltinType::SatUShortFract:
784  case BuiltinType::SatUFract:
785  case BuiltinType::SatULongFract:
786    // Various types which are non-trivial to correct.
787    return false;
788
789#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
790  case BuiltinType::Id:
791#include "clang/Basic/OpenCLImageTypes.def"
792#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
793  case BuiltinType::Id:
794#include "clang/Basic/OpenCLExtensionTypes.def"
795#define SVE_TYPE(Name, Id, SingletonId) \
796  case BuiltinType::Id:
797#include "clang/Basic/AArch64SVEACLETypes.def"
798#define PPC_VECTOR_TYPE(Name, Id, Size) \
799  case BuiltinType::Id:
800#include "clang/Basic/PPCTypes.def"
801#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
802#include "clang/Basic/RISCVVTypes.def"
803#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
804#include "clang/Basic/WebAssemblyReferenceTypes.def"
805#define SIGNED_TYPE(Id, SingletonId)
806#define UNSIGNED_TYPE(Id, SingletonId)
807#define FLOATING_TYPE(Id, SingletonId)
808#define BUILTIN_TYPE(Id, SingletonId) \
809  case BuiltinType::Id:
810#include "clang/AST/BuiltinTypes.def"
811    // Misc other stuff which doesn't make sense here.
812    return false;
813
814  case BuiltinType::UInt:
815  case BuiltinType::Int:
816  case BuiltinType::Float:
817    LM.setKind(VectorNumElts.isInvalid() ?
818               LengthModifier::None : LengthModifier::AsShortLong);
819    break;
820  case BuiltinType::Double:
821    LM.setKind(VectorNumElts.isInvalid() ?
822               LengthModifier::None : LengthModifier::AsLong);
823    break;
824  case BuiltinType::Char_U:
825  case BuiltinType::UChar:
826  case BuiltinType::Char_S:
827  case BuiltinType::SChar:
828    LM.setKind(LengthModifier::AsChar);
829    break;
830
831  case BuiltinType::Short:
832  case BuiltinType::UShort:
833    LM.setKind(LengthModifier::AsShort);
834    break;
835
836  case BuiltinType::Long:
837  case BuiltinType::ULong:
838    LM.setKind(LengthModifier::AsLong);
839    break;
840
841  case BuiltinType::LongLong:
842  case BuiltinType::ULongLong:
843    LM.setKind(LengthModifier::AsLongLong);
844    break;
845
846  case BuiltinType::LongDouble:
847    LM.setKind(LengthModifier::AsLongDouble);
848    break;
849  }
850
851  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
852  if (LangOpt.C99 || LangOpt.CPlusPlus11)
853    namedTypeToLengthModifier(QT, LM);
854
855  // If fixing the length modifier was enough, we might be done.
856  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
857    // If we're going to offer a fix anyway, make sure the sign matches.
858    switch (CS.getKind()) {
859    case ConversionSpecifier::uArg:
860    case ConversionSpecifier::UArg:
861      if (QT->isSignedIntegerType())
862        CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
863      break;
864    case ConversionSpecifier::dArg:
865    case ConversionSpecifier::DArg:
866    case ConversionSpecifier::iArg:
867      if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
868        CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
869      break;
870    default:
871      // Other specifiers do not have signed/unsigned variants.
872      break;
873    }
874
875    const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
876    if (ATR.isValid() && ATR.matchesType(Ctx, QT))
877      return true;
878  }
879
880  // Set conversion specifier and disable any flags which do not apply to it.
881  // Let typedefs to char fall through to int, as %c is silly for uint8_t.
882  if (!QT->getAs<TypedefType>() && QT->isCharType()) {
883    CS.setKind(ConversionSpecifier::cArg);
884    LM.setKind(LengthModifier::None);
885    Precision.setHowSpecified(OptionalAmount::NotSpecified);
886    HasAlternativeForm = false;
887    HasLeadingZeroes = false;
888    HasPlusPrefix = false;
889  }
890  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
891  else if (QT->isRealFloatingType()) {
892    CS.setKind(ConversionSpecifier::fArg);
893  } else if (QT->isSignedIntegerType()) {
894    CS.setKind(ConversionSpecifier::dArg);
895    HasAlternativeForm = false;
896  } else if (QT->isUnsignedIntegerType()) {
897    CS.setKind(ConversionSpecifier::uArg);
898    HasAlternativeForm = false;
899    HasPlusPrefix = false;
900  } else {
901    llvm_unreachable("Unexpected type");
902  }
903
904  return true;
905}
906
907void PrintfSpecifier::toString(raw_ostream &os) const {
908  // Whilst some features have no defined order, we are using the order
909  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
910  os << "%";
911
912  // Positional args
913  if (usesPositionalArg()) {
914    os << getPositionalArgIndex() << "$";
915  }
916
917  // Conversion flags
918  if (IsLeftJustified)    os << "-";
919  if (HasPlusPrefix)      os << "+";
920  if (HasSpacePrefix)     os << " ";
921  if (HasAlternativeForm) os << "#";
922  if (HasLeadingZeroes)   os << "0";
923
924  // Minimum field width
925  FieldWidth.toString(os);
926  // Precision
927  Precision.toString(os);
928
929  // Vector modifier
930  if (!VectorNumElts.isInvalid())
931    os << 'v' << VectorNumElts.getConstantAmount();
932
933  // Length modifier
934  os << LM.toString();
935  // Conversion specifier
936  os << CS.toString();
937}
938
939bool PrintfSpecifier::hasValidPlusPrefix() const {
940  if (!HasPlusPrefix)
941    return true;
942
943  // The plus prefix only makes sense for signed conversions
944  switch (CS.getKind()) {
945  case ConversionSpecifier::dArg:
946  case ConversionSpecifier::DArg:
947  case ConversionSpecifier::iArg:
948  case ConversionSpecifier::fArg:
949  case ConversionSpecifier::FArg:
950  case ConversionSpecifier::eArg:
951  case ConversionSpecifier::EArg:
952  case ConversionSpecifier::gArg:
953  case ConversionSpecifier::GArg:
954  case ConversionSpecifier::aArg:
955  case ConversionSpecifier::AArg:
956  case ConversionSpecifier::FreeBSDrArg:
957  case ConversionSpecifier::FreeBSDyArg:
958    return true;
959
960  default:
961    return false;
962  }
963}
964
965bool PrintfSpecifier::hasValidAlternativeForm() const {
966  if (!HasAlternativeForm)
967    return true;
968
969  // Alternate form flag only valid with the bBoxXaAeEfFgG conversions
970  switch (CS.getKind()) {
971  case ConversionSpecifier::bArg:
972  case ConversionSpecifier::BArg:
973  case ConversionSpecifier::oArg:
974  case ConversionSpecifier::OArg:
975  case ConversionSpecifier::xArg:
976  case ConversionSpecifier::XArg:
977  case ConversionSpecifier::aArg:
978  case ConversionSpecifier::AArg:
979  case ConversionSpecifier::eArg:
980  case ConversionSpecifier::EArg:
981  case ConversionSpecifier::fArg:
982  case ConversionSpecifier::FArg:
983  case ConversionSpecifier::gArg:
984  case ConversionSpecifier::GArg:
985  case ConversionSpecifier::FreeBSDrArg:
986  case ConversionSpecifier::FreeBSDyArg:
987    return true;
988
989  default:
990    return false;
991  }
992}
993
994bool PrintfSpecifier::hasValidLeadingZeros() const {
995  if (!HasLeadingZeroes)
996    return true;
997
998  // Leading zeroes flag only valid with the bBdiouxXaAeEfFgG conversions
999  switch (CS.getKind()) {
1000  case ConversionSpecifier::bArg:
1001  case ConversionSpecifier::BArg:
1002  case ConversionSpecifier::dArg:
1003  case ConversionSpecifier::DArg:
1004  case ConversionSpecifier::iArg:
1005  case ConversionSpecifier::oArg:
1006  case ConversionSpecifier::OArg:
1007  case ConversionSpecifier::uArg:
1008  case ConversionSpecifier::UArg:
1009  case ConversionSpecifier::xArg:
1010  case ConversionSpecifier::XArg:
1011  case ConversionSpecifier::aArg:
1012  case ConversionSpecifier::AArg:
1013  case ConversionSpecifier::eArg:
1014  case ConversionSpecifier::EArg:
1015  case ConversionSpecifier::fArg:
1016  case ConversionSpecifier::FArg:
1017  case ConversionSpecifier::gArg:
1018  case ConversionSpecifier::GArg:
1019  case ConversionSpecifier::FreeBSDrArg:
1020  case ConversionSpecifier::FreeBSDyArg:
1021    return true;
1022
1023  default:
1024    return false;
1025  }
1026}
1027
1028bool PrintfSpecifier::hasValidSpacePrefix() const {
1029  if (!HasSpacePrefix)
1030    return true;
1031
1032  // The space prefix only makes sense for signed conversions
1033  switch (CS.getKind()) {
1034  case ConversionSpecifier::dArg:
1035  case ConversionSpecifier::DArg:
1036  case ConversionSpecifier::iArg:
1037  case ConversionSpecifier::fArg:
1038  case ConversionSpecifier::FArg:
1039  case ConversionSpecifier::eArg:
1040  case ConversionSpecifier::EArg:
1041  case ConversionSpecifier::gArg:
1042  case ConversionSpecifier::GArg:
1043  case ConversionSpecifier::aArg:
1044  case ConversionSpecifier::AArg:
1045  case ConversionSpecifier::FreeBSDrArg:
1046  case ConversionSpecifier::FreeBSDyArg:
1047    return true;
1048
1049  default:
1050    return false;
1051  }
1052}
1053
1054bool PrintfSpecifier::hasValidLeftJustified() const {
1055  if (!IsLeftJustified)
1056    return true;
1057
1058  // The left justified flag is valid for all conversions except n
1059  switch (CS.getKind()) {
1060  case ConversionSpecifier::nArg:
1061    return false;
1062
1063  default:
1064    return true;
1065  }
1066}
1067
1068bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1069  if (!HasThousandsGrouping)
1070    return true;
1071
1072  switch (CS.getKind()) {
1073    case ConversionSpecifier::dArg:
1074    case ConversionSpecifier::DArg:
1075    case ConversionSpecifier::iArg:
1076    case ConversionSpecifier::uArg:
1077    case ConversionSpecifier::UArg:
1078    case ConversionSpecifier::fArg:
1079    case ConversionSpecifier::FArg:
1080    case ConversionSpecifier::gArg:
1081    case ConversionSpecifier::GArg:
1082      return true;
1083    default:
1084      return false;
1085  }
1086}
1087
1088bool PrintfSpecifier::hasValidPrecision() const {
1089  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1090    return true;
1091
1092  // Precision is only valid with the bBdiouxXaAeEfFgGsP conversions
1093  switch (CS.getKind()) {
1094  case ConversionSpecifier::bArg:
1095  case ConversionSpecifier::BArg:
1096  case ConversionSpecifier::dArg:
1097  case ConversionSpecifier::DArg:
1098  case ConversionSpecifier::iArg:
1099  case ConversionSpecifier::oArg:
1100  case ConversionSpecifier::OArg:
1101  case ConversionSpecifier::uArg:
1102  case ConversionSpecifier::UArg:
1103  case ConversionSpecifier::xArg:
1104  case ConversionSpecifier::XArg:
1105  case ConversionSpecifier::aArg:
1106  case ConversionSpecifier::AArg:
1107  case ConversionSpecifier::eArg:
1108  case ConversionSpecifier::EArg:
1109  case ConversionSpecifier::fArg:
1110  case ConversionSpecifier::FArg:
1111  case ConversionSpecifier::gArg:
1112  case ConversionSpecifier::GArg:
1113  case ConversionSpecifier::sArg:
1114  case ConversionSpecifier::FreeBSDrArg:
1115  case ConversionSpecifier::FreeBSDyArg:
1116  case ConversionSpecifier::PArg:
1117    return true;
1118
1119  default:
1120    return false;
1121  }
1122}
1123bool PrintfSpecifier::hasValidFieldWidth() const {
1124  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1125      return true;
1126
1127  // The field width is valid for all conversions except n
1128  switch (CS.getKind()) {
1129  case ConversionSpecifier::nArg:
1130    return false;
1131
1132  default:
1133    return true;
1134  }
1135}
1136