1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in scanf and friends.  The structure of format
10// strings for fscanf() are described in C99 7.19.6.2.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/FormatString.h"
15#include "FormatStringParsing.h"
16#include "clang/Basic/TargetInfo.h"
17
18using clang::analyze_format_string::ArgType;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfConversionSpecifier;
24using clang::analyze_scanf::ScanfSpecifier;
25using clang::UpdateOnReturn;
26using namespace clang;
27
28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29        ScanfSpecifierResult;
30
31static bool ParseScanList(FormatStringHandler &H,
32                          ScanfConversionSpecifier &CS,
33                          const char *&Beg, const char *E) {
34  const char *I = Beg;
35  const char *start = I - 1;
36  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38  // No more characters?
39  if (I == E) {
40    H.HandleIncompleteScanList(start, I);
41    return true;
42  }
43
44  // Special case: ']' is the first character.
45  if (*I == ']') {
46    if (++I == E) {
47      H.HandleIncompleteScanList(start, I - 1);
48      return true;
49    }
50  }
51
52  // Special case: "^]" are the first characters.
53  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54    I += 2;
55    if (I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  // Look for a ']' character which denotes the end of the scan list.
62  while (*I != ']') {
63    if (++I == E) {
64      H.HandleIncompleteScanList(start, I - 1);
65      return true;
66    }
67  }
68
69  CS.setEndScanList(I);
70  return false;
71}
72
73// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74// We can possibly refactor.
75static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76                                                const char *&Beg,
77                                                const char *E,
78                                                unsigned &argIndex,
79                                                const LangOptions &LO,
80                                                const TargetInfo &Target) {
81  using namespace clang::analyze_format_string;
82  using namespace clang::analyze_scanf;
83  const char *I = Beg;
84  const char *Start = nullptr;
85  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87    // Look for a '%' character that indicates the start of a format specifier.
88  for ( ; I != E ; ++I) {
89    char c = *I;
90    if (c == '\0') {
91        // Detect spurious null characters, which are likely errors.
92      H.HandleNullChar(I);
93      return true;
94    }
95    if (c == '%') {
96      Start = I++;  // Record the start of the format specifier.
97      break;
98    }
99  }
100
101    // No format specifier found?
102  if (!Start)
103    return false;
104
105  if (I == E) {
106      // No more characters left?
107    H.HandleIncompleteSpecifier(Start, E - Start);
108    return true;
109  }
110
111  ScanfSpecifier FS;
112  if (ParseArgPosition(H, FS, Start, I, E))
113    return true;
114
115  if (I == E) {
116      // No more characters left?
117    H.HandleIncompleteSpecifier(Start, E - Start);
118    return true;
119  }
120
121  // Look for '*' flag if it is present.
122  if (*I == '*') {
123    FS.setSuppressAssignment(I);
124    if (++I == E) {
125      H.HandleIncompleteSpecifier(Start, E - Start);
126      return true;
127    }
128  }
129
130  // Look for the field width (if any).  Unlike printf, this is either
131  // a fixed integer or isn't present.
132  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135    FS.setFieldWidth(Amt);
136
137    if (I == E) {
138      // No more characters left?
139      H.HandleIncompleteSpecifier(Start, E - Start);
140      return true;
141    }
142  }
143
144  // Look for the length modifier.
145  if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146      // No more characters left?
147    H.HandleIncompleteSpecifier(Start, E - Start);
148    return true;
149  }
150
151  // Detect spurious null characters, which are likely errors.
152  if (*I == '\0') {
153    H.HandleNullChar(I);
154    return true;
155  }
156
157  // Finally, look for the conversion specifier.
158  const char *conversionPosition = I++;
159  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160  switch (*conversionPosition) {
161    default:
162      break;
163    case '%': k = ConversionSpecifier::PercentArg;   break;
164    case 'A': k = ConversionSpecifier::AArg; break;
165    case 'E': k = ConversionSpecifier::EArg; break;
166    case 'F': k = ConversionSpecifier::FArg; break;
167    case 'G': k = ConversionSpecifier::GArg; break;
168    case 'X': k = ConversionSpecifier::XArg; break;
169    case 'a': k = ConversionSpecifier::aArg; break;
170    case 'd': k = ConversionSpecifier::dArg; break;
171    case 'e': k = ConversionSpecifier::eArg; break;
172    case 'f': k = ConversionSpecifier::fArg; break;
173    case 'g': k = ConversionSpecifier::gArg; break;
174    case 'i': k = ConversionSpecifier::iArg; break;
175    case 'n': k = ConversionSpecifier::nArg; break;
176    case 'c': k = ConversionSpecifier::cArg; break;
177    case 'C': k = ConversionSpecifier::CArg; break;
178    case 'S': k = ConversionSpecifier::SArg; break;
179    case '[': k = ConversionSpecifier::ScanListArg; break;
180    case 'u': k = ConversionSpecifier::uArg; break;
181    case 'x': k = ConversionSpecifier::xArg; break;
182    case 'o': k = ConversionSpecifier::oArg; break;
183    case 's': k = ConversionSpecifier::sArg; break;
184    case 'p': k = ConversionSpecifier::pArg; break;
185    // Apple extensions
186      // Apple-specific
187    case 'D':
188      if (Target.getTriple().isOSDarwin())
189        k = ConversionSpecifier::DArg;
190      break;
191    case 'O':
192      if (Target.getTriple().isOSDarwin())
193        k = ConversionSpecifier::OArg;
194      break;
195    case 'U':
196      if (Target.getTriple().isOSDarwin())
197        k = ConversionSpecifier::UArg;
198      break;
199  }
200  ScanfConversionSpecifier CS(conversionPosition, k);
201  if (k == ScanfConversionSpecifier::ScanListArg) {
202    if (ParseScanList(H, CS, I, E))
203      return true;
204  }
205  FS.setConversionSpecifier(CS);
206  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
207      && !FS.usesPositionalArg())
208    FS.setArgIndex(argIndex++);
209
210  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
211  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
212
213  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214    unsigned Len = I - Beg;
215    if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216      CS.setEndScanList(Beg + Len);
217      FS.setConversionSpecifier(CS);
218    }
219    // Assume the conversion takes one argument.
220    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
221  }
222  return ScanfSpecifierResult(Start, FS);
223}
224
225ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226  const ScanfConversionSpecifier &CS = getConversionSpecifier();
227
228  if (!CS.consumesDataArgument())
229    return ArgType::Invalid();
230
231  switch(CS.getKind()) {
232    // Signed int.
233    case ConversionSpecifier::dArg:
234    case ConversionSpecifier::DArg:
235    case ConversionSpecifier::iArg:
236      switch (LM.getKind()) {
237        case LengthModifier::None:
238          return ArgType::PtrTo(Ctx.IntTy);
239        case LengthModifier::AsChar:
240          return ArgType::PtrTo(ArgType::AnyCharTy);
241        case LengthModifier::AsShort:
242          return ArgType::PtrTo(Ctx.ShortTy);
243        case LengthModifier::AsLong:
244          return ArgType::PtrTo(Ctx.LongTy);
245        case LengthModifier::AsLongLong:
246        case LengthModifier::AsQuad:
247          return ArgType::PtrTo(Ctx.LongLongTy);
248        case LengthModifier::AsInt64:
249          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
250        case LengthModifier::AsIntMax:
251          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
252        case LengthModifier::AsSizeT:
253          return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
254        case LengthModifier::AsPtrDiff:
255          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
256        case LengthModifier::AsLongDouble:
257          // GNU extension.
258          return ArgType::PtrTo(Ctx.LongLongTy);
259        case LengthModifier::AsAllocate:
260        case LengthModifier::AsMAllocate:
261        case LengthModifier::AsInt32:
262        case LengthModifier::AsInt3264:
263        case LengthModifier::AsWide:
264        case LengthModifier::AsShortLong:
265          return ArgType::Invalid();
266      }
267      llvm_unreachable("Unsupported LengthModifier Type");
268
269    // Unsigned int.
270    case ConversionSpecifier::oArg:
271    case ConversionSpecifier::OArg:
272    case ConversionSpecifier::uArg:
273    case ConversionSpecifier::UArg:
274    case ConversionSpecifier::xArg:
275    case ConversionSpecifier::XArg:
276      switch (LM.getKind()) {
277        case LengthModifier::None:
278          return ArgType::PtrTo(Ctx.UnsignedIntTy);
279        case LengthModifier::AsChar:
280          return ArgType::PtrTo(Ctx.UnsignedCharTy);
281        case LengthModifier::AsShort:
282          return ArgType::PtrTo(Ctx.UnsignedShortTy);
283        case LengthModifier::AsLong:
284          return ArgType::PtrTo(Ctx.UnsignedLongTy);
285        case LengthModifier::AsLongLong:
286        case LengthModifier::AsQuad:
287          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288        case LengthModifier::AsInt64:
289          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290        case LengthModifier::AsIntMax:
291          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292        case LengthModifier::AsSizeT:
293          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294        case LengthModifier::AsPtrDiff:
295          return ArgType::PtrTo(
296              ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297        case LengthModifier::AsLongDouble:
298          // GNU extension.
299          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300        case LengthModifier::AsAllocate:
301        case LengthModifier::AsMAllocate:
302        case LengthModifier::AsInt32:
303        case LengthModifier::AsInt3264:
304        case LengthModifier::AsWide:
305        case LengthModifier::AsShortLong:
306          return ArgType::Invalid();
307      }
308      llvm_unreachable("Unsupported LengthModifier Type");
309
310    // Float.
311    case ConversionSpecifier::aArg:
312    case ConversionSpecifier::AArg:
313    case ConversionSpecifier::eArg:
314    case ConversionSpecifier::EArg:
315    case ConversionSpecifier::fArg:
316    case ConversionSpecifier::FArg:
317    case ConversionSpecifier::gArg:
318    case ConversionSpecifier::GArg:
319      switch (LM.getKind()) {
320        case LengthModifier::None:
321          return ArgType::PtrTo(Ctx.FloatTy);
322        case LengthModifier::AsLong:
323          return ArgType::PtrTo(Ctx.DoubleTy);
324        case LengthModifier::AsLongDouble:
325          return ArgType::PtrTo(Ctx.LongDoubleTy);
326        default:
327          return ArgType::Invalid();
328      }
329
330    // Char, string and scanlist.
331    case ConversionSpecifier::cArg:
332    case ConversionSpecifier::sArg:
333    case ConversionSpecifier::ScanListArg:
334      switch (LM.getKind()) {
335        case LengthModifier::None:
336          return ArgType::PtrTo(ArgType::AnyCharTy);
337        case LengthModifier::AsLong:
338        case LengthModifier::AsWide:
339          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
340        case LengthModifier::AsAllocate:
341        case LengthModifier::AsMAllocate:
342          return ArgType::PtrTo(ArgType::CStrTy);
343        case LengthModifier::AsShort:
344          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345            return ArgType::PtrTo(ArgType::AnyCharTy);
346          LLVM_FALLTHROUGH;
347        default:
348          return ArgType::Invalid();
349      }
350    case ConversionSpecifier::CArg:
351    case ConversionSpecifier::SArg:
352      // FIXME: Mac OS X specific?
353      switch (LM.getKind()) {
354        case LengthModifier::None:
355        case LengthModifier::AsWide:
356          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
357        case LengthModifier::AsAllocate:
358        case LengthModifier::AsMAllocate:
359          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360        case LengthModifier::AsShort:
361          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362            return ArgType::PtrTo(ArgType::AnyCharTy);
363          LLVM_FALLTHROUGH;
364        default:
365          return ArgType::Invalid();
366      }
367
368    // Pointer.
369    case ConversionSpecifier::pArg:
370      return ArgType::PtrTo(ArgType::CPointerTy);
371
372    // Write-back.
373    case ConversionSpecifier::nArg:
374      switch (LM.getKind()) {
375        case LengthModifier::None:
376          return ArgType::PtrTo(Ctx.IntTy);
377        case LengthModifier::AsChar:
378          return ArgType::PtrTo(Ctx.SignedCharTy);
379        case LengthModifier::AsShort:
380          return ArgType::PtrTo(Ctx.ShortTy);
381        case LengthModifier::AsLong:
382          return ArgType::PtrTo(Ctx.LongTy);
383        case LengthModifier::AsLongLong:
384        case LengthModifier::AsQuad:
385          return ArgType::PtrTo(Ctx.LongLongTy);
386        case LengthModifier::AsInt64:
387          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388        case LengthModifier::AsIntMax:
389          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390        case LengthModifier::AsSizeT:
391          return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392        case LengthModifier::AsPtrDiff:
393          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394        case LengthModifier::AsLongDouble:
395          return ArgType(); // FIXME: Is this a known extension?
396        case LengthModifier::AsAllocate:
397        case LengthModifier::AsMAllocate:
398        case LengthModifier::AsInt32:
399        case LengthModifier::AsInt3264:
400        case LengthModifier::AsWide:
401        case LengthModifier::AsShortLong:
402          return ArgType::Invalid();
403        }
404
405    default:
406      break;
407  }
408
409  return ArgType();
410}
411
412bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413                             const LangOptions &LangOpt,
414                             ASTContext &Ctx) {
415
416  // %n is different from other conversion specifiers; don't try to fix it.
417  if (CS.getKind() == ConversionSpecifier::nArg)
418    return false;
419
420  if (!QT->isPointerType())
421    return false;
422
423  QualType PT = QT->getPointeeType();
424
425  // If it's an enum, get its underlying type.
426  if (const EnumType *ETy = PT->getAs<EnumType>()) {
427    // Don't try to fix incomplete enums.
428    if (!ETy->getDecl()->isComplete())
429      return false;
430    PT = ETy->getDecl()->getIntegerType();
431  }
432
433  const BuiltinType *BT = PT->getAs<BuiltinType>();
434  if (!BT)
435    return false;
436
437  // Pointer to a character.
438  if (PT->isAnyCharacterType()) {
439    CS.setKind(ConversionSpecifier::sArg);
440    if (PT->isWideCharType())
441      LM.setKind(LengthModifier::AsWideChar);
442    else
443      LM.setKind(LengthModifier::None);
444
445    // If we know the target array length, we can use it as a field width.
446    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447      if (CAT->getSizeModifier() == ArrayType::Normal)
448        FieldWidth = OptionalAmount(OptionalAmount::Constant,
449                                    CAT->getSize().getZExtValue() - 1,
450                                    "", 0, false);
451
452    }
453    return true;
454  }
455
456  // Figure out the length modifier.
457  switch (BT->getKind()) {
458    // no modifier
459    case BuiltinType::UInt:
460    case BuiltinType::Int:
461    case BuiltinType::Float:
462      LM.setKind(LengthModifier::None);
463      break;
464
465    // hh
466    case BuiltinType::Char_U:
467    case BuiltinType::UChar:
468    case BuiltinType::Char_S:
469    case BuiltinType::SChar:
470      LM.setKind(LengthModifier::AsChar);
471      break;
472
473    // h
474    case BuiltinType::Short:
475    case BuiltinType::UShort:
476      LM.setKind(LengthModifier::AsShort);
477      break;
478
479    // l
480    case BuiltinType::Long:
481    case BuiltinType::ULong:
482    case BuiltinType::Double:
483      LM.setKind(LengthModifier::AsLong);
484      break;
485
486    // ll
487    case BuiltinType::LongLong:
488    case BuiltinType::ULongLong:
489      LM.setKind(LengthModifier::AsLongLong);
490      break;
491
492    // L
493    case BuiltinType::LongDouble:
494      LM.setKind(LengthModifier::AsLongDouble);
495      break;
496
497    // Don't know.
498    default:
499      return false;
500  }
501
502  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504    namedTypeToLengthModifier(PT, LM);
505
506  // If fixing the length modifier was enough, we are done.
507  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508    const analyze_scanf::ArgType &AT = getArgType(Ctx);
509    if (AT.isValid() && AT.matchesType(Ctx, QT))
510      return true;
511  }
512
513  // Figure out the conversion specifier.
514  if (PT->isRealFloatingType())
515    CS.setKind(ConversionSpecifier::fArg);
516  else if (PT->isSignedIntegerType())
517    CS.setKind(ConversionSpecifier::dArg);
518  else if (PT->isUnsignedIntegerType())
519    CS.setKind(ConversionSpecifier::uArg);
520  else
521    llvm_unreachable("Unexpected type");
522
523  return true;
524}
525
526void ScanfSpecifier::toString(raw_ostream &os) const {
527  os << "%";
528
529  if (usesPositionalArg())
530    os << getPositionalArgIndex() << "$";
531  if (SuppressAssignment)
532    os << "*";
533
534  FieldWidth.toString(os);
535  os << LM.toString();
536  os << CS.toString();
537}
538
539bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
540                                                    const char *I,
541                                                    const char *E,
542                                                    const LangOptions &LO,
543                                                    const TargetInfo &Target) {
544
545  unsigned argIndex = 0;
546
547  // Keep looking for a format specifier until we have exhausted the string.
548  while (I != E) {
549    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550                                                          LO, Target);
551    // Did a fail-stop error of any kind occur when parsing the specifier?
552    // If so, don't do any more processing.
553    if (FSR.shouldStop())
554      return true;
555      // Did we exhaust the string or encounter an error that
556      // we can recover from?
557    if (!FSR.hasValue())
558      continue;
559      // We have a format specifier.  Pass it to the callback.
560    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561                                I - FSR.getStart())) {
562      return true;
563    }
564  }
565  assert(I == E && "Format string not exhausted");
566  return false;
567}
568