ScanfFormatString.cpp revision 261991
1187277Sdas//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2187277Sdas//
3187277Sdas//                     The LLVM Compiler Infrastructure
4187277Sdas//
5187277Sdas// This file is distributed under the University of Illinois Open Source
6187277Sdas// License. See LICENSE.TXT for details.
7187277Sdas//
8187277Sdas//===----------------------------------------------------------------------===//
9187277Sdas//
10187277Sdas// Handling of format string in scanf and friends.  The structure of format
11187277Sdas// strings for fscanf() are described in C99 7.19.6.2.
12187277Sdas//
13187277Sdas//===----------------------------------------------------------------------===//
14187277Sdas
15187277Sdas#include "clang/Analysis/Analyses/FormatString.h"
16187277Sdas#include "FormatStringParsing.h"
17187277Sdas#include "clang/Basic/TargetInfo.h"
18187277Sdas
19187277Sdasusing clang::analyze_format_string::ArgType;
20187277Sdasusing clang::analyze_format_string::FormatStringHandler;
21187277Sdasusing clang::analyze_format_string::LengthModifier;
22187277Sdasusing clang::analyze_format_string::OptionalAmount;
23187277Sdasusing clang::analyze_format_string::ConversionSpecifier;
24187277Sdasusing clang::analyze_scanf::ScanfConversionSpecifier;
25187277Sdasusing clang::analyze_scanf::ScanfSpecifier;
26187277Sdasusing clang::UpdateOnReturn;
27187277Sdasusing namespace clang;
28187277Sdas
29187277Sdastypedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30187277Sdas        ScanfSpecifierResult;
31187277Sdas
32187277Sdasstatic bool ParseScanList(FormatStringHandler &H,
33187277Sdas                          ScanfConversionSpecifier &CS,
34187277Sdas                          const char *&Beg, const char *E) {
35187277Sdas  const char *I = Beg;
36187277Sdas  const char *start = I - 1;
37187277Sdas  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38187277Sdas
39187277Sdas  // No more characters?
40187284Sdas  if (I == E) {
41187284Sdas    H.HandleIncompleteScanList(start, I);
42187284Sdas    return true;
43187284Sdas  }
44187284Sdas
45187284Sdas  // Special case: ']' is the first character.
46187284Sdas  if (*I == ']') {
47187284Sdas    if (++I == E) {
48187284Sdas      H.HandleIncompleteScanList(start, I - 1);
49187284Sdas      return true;
50187284Sdas    }
51187284Sdas  }
52187284Sdas
53187284Sdas  // Look for a ']' character which denotes the end of the scan list.
54187284Sdas  while (*I != ']') {
55187284Sdas    if (++I == E) {
56187284Sdas      H.HandleIncompleteScanList(start, I - 1);
57187284Sdas      return true;
58187284Sdas    }
59187284Sdas  }
60187284Sdas
61187284Sdas  CS.setEndScanList(I);
62187277Sdas  return false;
63187277Sdas}
64187277Sdas
65187277Sdas// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66187277Sdas// We can possibly refactor.
67187277Sdasstatic ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68187277Sdas                                                const char *&Beg,
69187277Sdas                                                const char *E,
70187277Sdas                                                unsigned &argIndex,
71187277Sdas                                                const LangOptions &LO,
72187277Sdas                                                const TargetInfo &Target) {
73187354Sdas
74187277Sdas  using namespace clang::analyze_scanf;
75187277Sdas  const char *I = Beg;
76187277Sdas  const char *Start = 0;
77187277Sdas  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78187277Sdas
79187277Sdas    // Look for a '%' character that indicates the start of a format specifier.
80187277Sdas  for ( ; I != E ; ++I) {
81187277Sdas    char c = *I;
82187277Sdas    if (c == '\0') {
83187277Sdas        // Detect spurious null characters, which are likely errors.
84187277Sdas      H.HandleNullChar(I);
85187277Sdas      return true;
86187277Sdas    }
87187354Sdas    if (c == '%') {
88187354Sdas      Start = I++;  // Record the start of the format specifier.
89187277Sdas      break;
90187354Sdas    }
91187277Sdas  }
92187354Sdas
93187354Sdas    // No format specifier found?
94187277Sdas  if (!Start)
95187277Sdas    return false;
96187277Sdas
97187277Sdas  if (I == E) {
98187277Sdas      // No more characters left?
99187277Sdas    H.HandleIncompleteSpecifier(Start, E - Start);
100187277Sdas    return true;
101187277Sdas  }
102187277Sdas
103187277Sdas  ScanfSpecifier FS;
104187277Sdas  if (ParseArgPosition(H, FS, Start, I, E))
105187277Sdas    return true;
106187277Sdas
107187277Sdas  if (I == E) {
108187277Sdas      // No more characters left?
109187277Sdas    H.HandleIncompleteSpecifier(Start, E - Start);
110187277Sdas    return true;
111187277Sdas  }
112187277Sdas
113187277Sdas  // Look for '*' flag if it is present.
114187354Sdas  if (*I == '*') {
115187277Sdas    FS.setSuppressAssignment(I);
116187354Sdas    if (++I == E) {
117187354Sdas      H.HandleIncompleteSpecifier(Start, E - Start);
118187354Sdas      return true;
119187277Sdas    }
120187354Sdas  }
121187277Sdas
122187277Sdas  // Look for the field width (if any).  Unlike printf, this is either
123187277Sdas  // a fixed integer or isn't present.
124187277Sdas  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125187277Sdas  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126187277Sdas    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127187277Sdas    FS.setFieldWidth(Amt);
128187277Sdas
129187277Sdas    if (I == E) {
130187277Sdas      // No more characters left?
131187277Sdas      H.HandleIncompleteSpecifier(Start, E - Start);
132187277Sdas      return true;
133187277Sdas    }
134187277Sdas  }
135187277Sdas
136187277Sdas  // Look for the length modifier.
137187277Sdas  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138187354Sdas      // No more characters left?
139187354Sdas    H.HandleIncompleteSpecifier(Start, E - Start);
140187354Sdas    return true;
141187354Sdas  }
142187354Sdas
143187354Sdas  // Detect spurious null characters, which are likely errors.
144187354Sdas  if (*I == '\0') {
145187277Sdas    H.HandleNullChar(I);
146187277Sdas    return true;
147187277Sdas  }
148187277Sdas
149187277Sdas  // Finally, look for the conversion specifier.
150187277Sdas  const char *conversionPosition = I++;
151187277Sdas  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152187277Sdas  switch (*conversionPosition) {
153187284Sdas    default:
154187284Sdas      break;
155187284Sdas    case '%': k = ConversionSpecifier::PercentArg;   break;
156187284Sdas    case 'A': k = ConversionSpecifier::AArg; break;
157187284Sdas    case 'E': k = ConversionSpecifier::EArg; break;
158187284Sdas    case 'F': k = ConversionSpecifier::FArg; break;
159187284Sdas    case 'G': k = ConversionSpecifier::GArg; break;
160187284Sdas    case 'X': k = ConversionSpecifier::XArg; break;
161187284Sdas    case 'a': k = ConversionSpecifier::aArg; break;
162187284Sdas    case 'd': k = ConversionSpecifier::dArg; break;
163187284Sdas    case 'e': k = ConversionSpecifier::eArg; break;
164187284Sdas    case 'f': k = ConversionSpecifier::fArg; break;
165187284Sdas    case 'g': k = ConversionSpecifier::gArg; break;
166187284Sdas    case 'i': k = ConversionSpecifier::iArg; break;
167187284Sdas    case 'n': k = ConversionSpecifier::nArg; break;
168187284Sdas    case 'c': k = ConversionSpecifier::cArg; break;
169187284Sdas    case 'C': k = ConversionSpecifier::CArg; break;
170187284Sdas    case 'S': k = ConversionSpecifier::SArg; break;
171187284Sdas    case '[': k = ConversionSpecifier::ScanListArg; break;
172187284Sdas    case 'u': k = ConversionSpecifier::uArg; break;
173187284Sdas    case 'x': k = ConversionSpecifier::xArg; break;
174187284Sdas    case 'o': k = ConversionSpecifier::oArg; break;
175187284Sdas    case 's': k = ConversionSpecifier::sArg; break;
176187284Sdas    case 'p': k = ConversionSpecifier::pArg; break;
177187284Sdas    // Apple extensions
178187284Sdas      // Apple-specific
179187284Sdas    case 'D':
180187284Sdas      if (Target.getTriple().isOSDarwin())
181187284Sdas        k = ConversionSpecifier::DArg;
182187284Sdas      break;
183187284Sdas    case 'O':
184187284Sdas      if (Target.getTriple().isOSDarwin())
185187284Sdas        k = ConversionSpecifier::OArg;
186187284Sdas      break;
187187284Sdas    case 'U':
188187284Sdas      if (Target.getTriple().isOSDarwin())
189187284Sdas        k = ConversionSpecifier::UArg;
190187284Sdas      break;
191187284Sdas  }
192187284Sdas  ScanfConversionSpecifier CS(conversionPosition, k);
193187284Sdas  if (k == ScanfConversionSpecifier::ScanListArg) {
194187284Sdas    if (ParseScanList(H, CS, I, E))
195187284Sdas      return true;
196187284Sdas  }
197187284Sdas  FS.setConversionSpecifier(CS);
198187284Sdas  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199187284Sdas      && !FS.usesPositionalArg())
200187284Sdas    FS.setArgIndex(argIndex++);
201187284Sdas
202187284Sdas  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
203187284Sdas  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
204187284Sdas
205187284Sdas  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206187284Sdas    // Assume the conversion takes one argument.
207187284Sdas    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
208187284Sdas  }
209187284Sdas  return ScanfSpecifierResult(Start, FS);
210187284Sdas}
211187284Sdas
212187284SdasArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213187284Sdas  const ScanfConversionSpecifier &CS = getConversionSpecifier();
214187284Sdas
215187284Sdas  if (!CS.consumesDataArgument())
216187284Sdas    return ArgType::Invalid();
217187284Sdas
218187284Sdas  switch(CS.getKind()) {
219187284Sdas    // Signed int.
220187284Sdas    case ConversionSpecifier::dArg:
221187284Sdas    case ConversionSpecifier::DArg:
222187284Sdas    case ConversionSpecifier::iArg:
223187284Sdas      switch (LM.getKind()) {
224187284Sdas        case LengthModifier::None:
225187284Sdas          return ArgType::PtrTo(Ctx.IntTy);
226187284Sdas        case LengthModifier::AsChar:
227187284Sdas          return ArgType::PtrTo(ArgType::AnyCharTy);
228187284Sdas        case LengthModifier::AsShort:
229187284Sdas          return ArgType::PtrTo(Ctx.ShortTy);
230187284Sdas        case LengthModifier::AsLong:
231187284Sdas          return ArgType::PtrTo(Ctx.LongTy);
232187284Sdas        case LengthModifier::AsLongLong:
233187284Sdas        case LengthModifier::AsQuad:
234187284Sdas          return ArgType::PtrTo(Ctx.LongLongTy);
235187284Sdas        case LengthModifier::AsInt64:
236187284Sdas          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
237187284Sdas        case LengthModifier::AsIntMax:
238187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
239187284Sdas        case LengthModifier::AsSizeT:
240187284Sdas          // FIXME: ssize_t.
241187284Sdas          return ArgType();
242187284Sdas        case LengthModifier::AsPtrDiff:
243187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
244187284Sdas        case LengthModifier::AsLongDouble:
245187284Sdas          // GNU extension.
246187284Sdas          return ArgType::PtrTo(Ctx.LongLongTy);
247187284Sdas        case LengthModifier::AsAllocate:
248187284Sdas        case LengthModifier::AsMAllocate:
249187284Sdas        case LengthModifier::AsInt32:
250187284Sdas        case LengthModifier::AsInt3264:
251187284Sdas          return ArgType::Invalid();
252187284Sdas      }
253187284Sdas
254187284Sdas    // Unsigned int.
255187284Sdas    case ConversionSpecifier::oArg:
256187284Sdas    case ConversionSpecifier::OArg:
257187284Sdas    case ConversionSpecifier::uArg:
258187284Sdas    case ConversionSpecifier::UArg:
259187284Sdas    case ConversionSpecifier::xArg:
260187284Sdas    case ConversionSpecifier::XArg:
261187284Sdas      switch (LM.getKind()) {
262187284Sdas        case LengthModifier::None:
263187284Sdas          return ArgType::PtrTo(Ctx.UnsignedIntTy);
264187284Sdas        case LengthModifier::AsChar:
265187284Sdas          return ArgType::PtrTo(Ctx.UnsignedCharTy);
266187284Sdas        case LengthModifier::AsShort:
267187284Sdas          return ArgType::PtrTo(Ctx.UnsignedShortTy);
268187284Sdas        case LengthModifier::AsLong:
269187284Sdas          return ArgType::PtrTo(Ctx.UnsignedLongTy);
270187284Sdas        case LengthModifier::AsLongLong:
271187284Sdas        case LengthModifier::AsQuad:
272187284Sdas          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
273187284Sdas        case LengthModifier::AsInt64:
274187284Sdas          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
275187284Sdas        case LengthModifier::AsIntMax:
276187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
277187284Sdas        case LengthModifier::AsSizeT:
278187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
279187284Sdas        case LengthModifier::AsPtrDiff:
280187284Sdas          // FIXME: Unsigned version of ptrdiff_t?
281187284Sdas          return ArgType();
282187284Sdas        case LengthModifier::AsLongDouble:
283187284Sdas          // GNU extension.
284187284Sdas          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
285187284Sdas        case LengthModifier::AsAllocate:
286187284Sdas        case LengthModifier::AsMAllocate:
287187284Sdas        case LengthModifier::AsInt32:
288187284Sdas        case LengthModifier::AsInt3264:
289187284Sdas          return ArgType::Invalid();
290187284Sdas      }
291187284Sdas
292187284Sdas    // Float.
293187284Sdas    case ConversionSpecifier::aArg:
294187284Sdas    case ConversionSpecifier::AArg:
295187284Sdas    case ConversionSpecifier::eArg:
296187284Sdas    case ConversionSpecifier::EArg:
297187284Sdas    case ConversionSpecifier::fArg:
298187284Sdas    case ConversionSpecifier::FArg:
299187284Sdas    case ConversionSpecifier::gArg:
300187284Sdas    case ConversionSpecifier::GArg:
301187284Sdas      switch (LM.getKind()) {
302187284Sdas        case LengthModifier::None:
303187284Sdas          return ArgType::PtrTo(Ctx.FloatTy);
304187284Sdas        case LengthModifier::AsLong:
305187284Sdas          return ArgType::PtrTo(Ctx.DoubleTy);
306187284Sdas        case LengthModifier::AsLongDouble:
307187284Sdas          return ArgType::PtrTo(Ctx.LongDoubleTy);
308187284Sdas        default:
309187284Sdas          return ArgType::Invalid();
310187284Sdas      }
311187284Sdas
312187284Sdas    // Char, string and scanlist.
313187284Sdas    case ConversionSpecifier::cArg:
314187284Sdas    case ConversionSpecifier::sArg:
315187284Sdas    case ConversionSpecifier::ScanListArg:
316187284Sdas      switch (LM.getKind()) {
317187284Sdas        case LengthModifier::None:
318187284Sdas          return ArgType::PtrTo(ArgType::AnyCharTy);
319187284Sdas        case LengthModifier::AsLong:
320187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
321187284Sdas        case LengthModifier::AsAllocate:
322187284Sdas        case LengthModifier::AsMAllocate:
323187284Sdas          return ArgType::PtrTo(ArgType::CStrTy);
324187284Sdas        default:
325187284Sdas          return ArgType::Invalid();
326187284Sdas      }
327187284Sdas    case ConversionSpecifier::CArg:
328187284Sdas    case ConversionSpecifier::SArg:
329187284Sdas      // FIXME: Mac OS X specific?
330187284Sdas      switch (LM.getKind()) {
331187284Sdas        case LengthModifier::None:
332187284Sdas          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333187284Sdas        case LengthModifier::AsAllocate:
334187284Sdas        case LengthModifier::AsMAllocate:
335187284Sdas          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
336187284Sdas        default:
337187284Sdas          return ArgType::Invalid();
338187284Sdas      }
339187284Sdas
340187284Sdas    // Pointer.
341187284Sdas    case ConversionSpecifier::pArg:
342187284Sdas      return ArgType::PtrTo(ArgType::CPointerTy);
343187284Sdas
344187284Sdas    // Write-back.
345187284Sdas    case ConversionSpecifier::nArg:
346187284Sdas      switch (LM.getKind()) {
347        case LengthModifier::None:
348          return ArgType::PtrTo(Ctx.IntTy);
349        case LengthModifier::AsChar:
350          return ArgType::PtrTo(Ctx.SignedCharTy);
351        case LengthModifier::AsShort:
352          return ArgType::PtrTo(Ctx.ShortTy);
353        case LengthModifier::AsLong:
354          return ArgType::PtrTo(Ctx.LongTy);
355        case LengthModifier::AsLongLong:
356        case LengthModifier::AsQuad:
357          return ArgType::PtrTo(Ctx.LongLongTy);
358        case LengthModifier::AsInt64:
359          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
360        case LengthModifier::AsIntMax:
361          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
362        case LengthModifier::AsSizeT:
363          return ArgType(); // FIXME: ssize_t
364        case LengthModifier::AsPtrDiff:
365          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
366        case LengthModifier::AsLongDouble:
367          return ArgType(); // FIXME: Is this a known extension?
368        case LengthModifier::AsAllocate:
369        case LengthModifier::AsMAllocate:
370        case LengthModifier::AsInt32:
371        case LengthModifier::AsInt3264:
372          return ArgType::Invalid();
373        }
374
375    default:
376      break;
377  }
378
379  return ArgType();
380}
381
382bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
383                             ASTContext &Ctx) {
384  if (!QT->isPointerType())
385    return false;
386
387  // %n is different from other conversion specifiers; don't try to fix it.
388  if (CS.getKind() == ConversionSpecifier::nArg)
389    return false;
390
391  QualType PT = QT->getPointeeType();
392
393  // If it's an enum, get its underlying type.
394  if (const EnumType *ETy = QT->getAs<EnumType>())
395    QT = ETy->getDecl()->getIntegerType();
396
397  const BuiltinType *BT = PT->getAs<BuiltinType>();
398  if (!BT)
399    return false;
400
401  // Pointer to a character.
402  if (PT->isAnyCharacterType()) {
403    CS.setKind(ConversionSpecifier::sArg);
404    if (PT->isWideCharType())
405      LM.setKind(LengthModifier::AsWideChar);
406    else
407      LM.setKind(LengthModifier::None);
408    return true;
409  }
410
411  // Figure out the length modifier.
412  switch (BT->getKind()) {
413    // no modifier
414    case BuiltinType::UInt:
415    case BuiltinType::Int:
416    case BuiltinType::Float:
417      LM.setKind(LengthModifier::None);
418      break;
419
420    // hh
421    case BuiltinType::Char_U:
422    case BuiltinType::UChar:
423    case BuiltinType::Char_S:
424    case BuiltinType::SChar:
425      LM.setKind(LengthModifier::AsChar);
426      break;
427
428    // h
429    case BuiltinType::Short:
430    case BuiltinType::UShort:
431      LM.setKind(LengthModifier::AsShort);
432      break;
433
434    // l
435    case BuiltinType::Long:
436    case BuiltinType::ULong:
437    case BuiltinType::Double:
438      LM.setKind(LengthModifier::AsLong);
439      break;
440
441    // ll
442    case BuiltinType::LongLong:
443    case BuiltinType::ULongLong:
444      LM.setKind(LengthModifier::AsLongLong);
445      break;
446
447    // L
448    case BuiltinType::LongDouble:
449      LM.setKind(LengthModifier::AsLongDouble);
450      break;
451
452    // Don't know.
453    default:
454      return false;
455  }
456
457  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
458  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
459    namedTypeToLengthModifier(PT, LM);
460
461  // If fixing the length modifier was enough, we are done.
462  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
463    const analyze_scanf::ArgType &AT = getArgType(Ctx);
464    if (AT.isValid() && AT.matchesType(Ctx, QT))
465      return true;
466  }
467
468  // Figure out the conversion specifier.
469  if (PT->isRealFloatingType())
470    CS.setKind(ConversionSpecifier::fArg);
471  else if (PT->isSignedIntegerType())
472    CS.setKind(ConversionSpecifier::dArg);
473  else if (PT->isUnsignedIntegerType())
474    CS.setKind(ConversionSpecifier::uArg);
475  else
476    llvm_unreachable("Unexpected type");
477
478  return true;
479}
480
481void ScanfSpecifier::toString(raw_ostream &os) const {
482  os << "%";
483
484  if (usesPositionalArg())
485    os << getPositionalArgIndex() << "$";
486  if (SuppressAssignment)
487    os << "*";
488
489  FieldWidth.toString(os);
490  os << LM.toString();
491  os << CS.toString();
492}
493
494bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
495                                                    const char *I,
496                                                    const char *E,
497                                                    const LangOptions &LO,
498                                                    const TargetInfo &Target) {
499
500  unsigned argIndex = 0;
501
502  // Keep looking for a format specifier until we have exhausted the string.
503  while (I != E) {
504    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
505                                                          LO, Target);
506    // Did a fail-stop error of any kind occur when parsing the specifier?
507    // If so, don't do any more processing.
508    if (FSR.shouldStop())
509      return true;
510      // Did we exhaust the string or encounter an error that
511      // we can recover from?
512    if (!FSR.hasValue())
513      continue;
514      // We have a format specifier.  Pass it to the callback.
515    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
516                                I - FSR.getStart())) {
517      return true;
518    }
519  }
520  assert(I == E && "Format string not exhausted");
521  return false;
522}
523