//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Handling of format string in printf and friends. The structure of format // strings for fprintf() are described in C99 7.19.6.1. // //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/PrintfFormatString.h" #include "clang/AST/ASTContext.h" using clang::analyze_printf::ArgTypeResult; using clang::analyze_printf::FormatSpecifier; using clang::analyze_printf::FormatStringHandler; using clang::analyze_printf::OptionalAmount; using clang::analyze_printf::PositionContext; using namespace clang; namespace { class FormatSpecifierResult { FormatSpecifier FS; const char *Start; bool Stop; public: FormatSpecifierResult(bool stop = false) : Start(0), Stop(stop) {} FormatSpecifierResult(const char *start, const FormatSpecifier &fs) : FS(fs), Start(start), Stop(false) {} const char *getStart() const { return Start; } bool shouldStop() const { return Stop; } bool hasValue() const { return Start != 0; } const FormatSpecifier &getValue() const { assert(hasValue()); return FS; } const FormatSpecifier &getValue() { return FS; } }; } // end anonymous namespace template class UpdateOnReturn { T &ValueToUpdate; const T &ValueToCopy; public: UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} ~UpdateOnReturn() { ValueToUpdate = ValueToCopy; } }; //===----------------------------------------------------------------------===// // Methods for parsing format strings. //===----------------------------------------------------------------------===// static OptionalAmount ParseAmount(const char *&Beg, const char *E) { const char *I = Beg; UpdateOnReturn UpdateBeg(Beg, I); unsigned accumulator = 0; bool hasDigits = false; for ( ; I != E; ++I) { char c = *I; if (c >= '0' && c <= '9') { hasDigits = true; accumulator = (accumulator * 10) + (c - '0'); continue; } if (hasDigits) return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); break; } return OptionalAmount(); } static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex) { if (*Beg == '*') { ++Beg; return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); } return ParseAmount(Beg, E); } static OptionalAmount ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, const char *E, PositionContext p) { if (*Beg == '*') { const char *I = Beg + 1; const OptionalAmount &Amt = ParseAmount(I, E); if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { H.HandleInvalidPosition(Beg, I - Beg, p); return OptionalAmount(false); } if (I== E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return OptionalAmount(false); } assert(Amt.getHowSpecified() == OptionalAmount::Constant); if (*I == '$') { // Special case: '*0$', since this is an easy mistake. if (Amt.getConstantAmount() == 0) { H.HandleZeroPosition(Beg, I - Beg + 1); return OptionalAmount(false); } const char *Tmp = Beg; Beg = ++I; return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, Tmp); } H.HandleInvalidPosition(Beg, I - Beg, p); return OptionalAmount(false); } return ParseAmount(Beg, E); } static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, const char *Start, const char *&Beg, const char *E, unsigned *argIndex) { if (argIndex) { FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); } else { const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, analyze_printf::PrecisionPos); if (Amt.isInvalid()) return true; FS.setPrecision(Amt); } return false; } static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, const char *Start, const char *&Beg, const char *E, unsigned *argIndex) { // FIXME: Support negative field widths. if (argIndex) { FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); } else { const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, analyze_printf::FieldWidthPos); if (Amt.isInvalid()) return true; FS.setFieldWidth(Amt); } return false; } static bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &FS, const char *Start, const char *&Beg, const char *E) { using namespace clang::analyze_printf; const char *I = Beg; const OptionalAmount &Amt = ParseAmount(I, E); if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { // Special case: '%0$', since this is an easy mistake. if (Amt.getConstantAmount() == 0) { H.HandleZeroPosition(Start, I - Start); return true; } FS.setArgIndex(Amt.getConstantAmount() - 1); FS.setUsesPositionalArg(); // Update the caller's pointer if we decided to consume // these characters. Beg = I; return false; } return false; } static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex) { using namespace clang::analyze_printf; const char *I = Beg; const char *Start = 0; UpdateOnReturn UpdateBeg(Beg, I); // Look for a '%' character that indicates the start of a format specifier. for ( ; I != E ; ++I) { char c = *I; if (c == '\0') { // Detect spurious null characters, which are likely errors. H.HandleNullChar(I); return true; } if (c == '%') { Start = I++; // Record the start of the format specifier. break; } } // No format specifier found? if (!Start) return false; if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } FormatSpecifier FS; if (ParseArgPosition(H, FS, Start, I, E)) return true; if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } // Look for flags (if any). bool hasMore = true; for ( ; I != E; ++I) { switch (*I) { default: hasMore = false; break; case '-': FS.setIsLeftJustified(); break; case '+': FS.setHasPlusPrefix(); break; case ' ': FS.setHasSpacePrefix(); break; case '#': FS.setHasAlternativeForm(); break; case '0': FS.setHasLeadingZeros(); break; } if (!hasMore) break; } if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } // Look for the field width (if any). if (ParseFieldWidth(H, FS, Start, I, E, FS.usesPositionalArg() ? 0 : &argIndex)) return true; if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } // Look for the precision (if any). if (*I == '.') { ++I; if (I == E) { H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } if (ParsePrecision(H, FS, Start, I, E, FS.usesPositionalArg() ? 0 : &argIndex)) return true; if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } } // Look for the length modifier. LengthModifier lm = None; switch (*I) { default: break; case 'h': ++I; lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; break; case 'l': ++I; lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; break; case 'j': lm = AsIntMax; ++I; break; case 'z': lm = AsSizeT; ++I; break; case 't': lm = AsPtrDiff; ++I; break; case 'L': lm = AsLongDouble; ++I; break; case 'q': lm = AsLongLong; ++I; break; } FS.setLengthModifier(lm); if (I == E) { // No more characters left? H.HandleIncompleteFormatSpecifier(Start, E - Start); return true; } if (*I == '\0') { // Detect spurious null characters, which are likely errors. H.HandleNullChar(I); return true; } // Finally, look for the conversion specifier. const char *conversionPosition = I++; ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; switch (*conversionPosition) { default: break; // C99: 7.19.6.1 (section 8). case '%': k = ConversionSpecifier::PercentArg; break; case 'A': k = ConversionSpecifier::AArg; break; case 'E': k = ConversionSpecifier::EArg; break; case 'F': k = ConversionSpecifier::FArg; break; case 'G': k = ConversionSpecifier::GArg; break; case 'X': k = ConversionSpecifier::XArg; break; case 'a': k = ConversionSpecifier::aArg; break; case 'c': k = ConversionSpecifier::IntAsCharArg; break; case 'd': k = ConversionSpecifier::dArg; break; case 'e': k = ConversionSpecifier::eArg; break; case 'f': k = ConversionSpecifier::fArg; break; case 'g': k = ConversionSpecifier::gArg; break; case 'i': k = ConversionSpecifier::iArg; break; case 'n': k = ConversionSpecifier::OutIntPtrArg; break; case 'o': k = ConversionSpecifier::oArg; break; case 'p': k = ConversionSpecifier::VoidPtrArg; break; case 's': k = ConversionSpecifier::CStrArg; break; case 'u': k = ConversionSpecifier::uArg; break; case 'x': k = ConversionSpecifier::xArg; break; // Mac OS X (unicode) specific case 'C': k = ConversionSpecifier::CArg; break; case 'S': k = ConversionSpecifier::UnicodeStrArg; break; // Objective-C. case '@': k = ConversionSpecifier::ObjCObjArg; break; // Glibc specific. case 'm': k = ConversionSpecifier::PrintErrno; break; } ConversionSpecifier CS(conversionPosition, k); FS.setConversionSpecifier(CS); if (CS.consumesDataArgument() && !FS.usesPositionalArg()) FS.setArgIndex(argIndex++); if (k == ConversionSpecifier::InvalidSpecifier) { // Assume the conversion takes one argument. return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); } return FormatSpecifierResult(Start, FS); } bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, const char *I, const char *E) { unsigned argIndex = 0; // Keep looking for a format specifier until we have exhausted the string. while (I != E) { const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); // Did a fail-stop error of any kind occur when parsing the specifier? // If so, don't do any more processing. if (FSR.shouldStop()) return true;; // Did we exhaust the string or encounter an error that // we can recover from? if (!FSR.hasValue()) continue; // We have a format specifier. Pass it to the callback. if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), I - FSR.getStart())) return true; } assert(I == E && "Format string not exhausted"); return false; } FormatStringHandler::~FormatStringHandler() {} //===----------------------------------------------------------------------===// // Methods on ArgTypeResult. //===----------------------------------------------------------------------===// bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { assert(isValid()); if (K == UnknownTy) return true; if (K == SpecificTy) { argTy = C.getCanonicalType(argTy).getUnqualifiedType(); if (T == argTy) return true; if (const BuiltinType *BT = argTy->getAs()) switch (BT->getKind()) { default: break; case BuiltinType::Char_S: case BuiltinType::SChar: return T == C.UnsignedCharTy; case BuiltinType::Char_U: case BuiltinType::UChar: return T == C.SignedCharTy; case BuiltinType::Short: return T == C.UnsignedShortTy; case BuiltinType::UShort: return T == C.ShortTy; case BuiltinType::Int: return T == C.UnsignedIntTy; case BuiltinType::UInt: return T == C.IntTy; case BuiltinType::Long: return T == C.UnsignedLongTy; case BuiltinType::ULong: return T == C.LongTy; case BuiltinType::LongLong: return T == C.UnsignedLongLongTy; case BuiltinType::ULongLong: return T == C.LongLongTy; } return false; } if (K == CStrTy) { const PointerType *PT = argTy->getAs(); if (!PT) return false; QualType pointeeTy = PT->getPointeeType(); if (const BuiltinType *BT = pointeeTy->getAs()) switch (BT->getKind()) { case BuiltinType::Void: case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::Char_S: case BuiltinType::SChar: return true; default: break; } return false; } if (K == WCStrTy) { const PointerType *PT = argTy->getAs(); if (!PT) return false; QualType pointeeTy = C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); return pointeeTy == C.getWCharType(); } return false; } QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { assert(isValid()); if (K == SpecificTy) return T; if (K == CStrTy) return C.getPointerType(C.CharTy); if (K == WCStrTy) return C.getPointerType(C.getWCharType()); if (K == ObjCPointerTy) return C.ObjCBuiltinIdTy; return QualType(); } //===----------------------------------------------------------------------===// // Methods on OptionalAmount. //===----------------------------------------------------------------------===// ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { return Ctx.IntTy; } //===----------------------------------------------------------------------===// // Methods on FormatSpecifier. //===----------------------------------------------------------------------===// ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { if (!CS.consumesDataArgument()) return ArgTypeResult::Invalid(); if (CS.isIntArg()) switch (LM) { case AsLongDouble: return ArgTypeResult::Invalid(); case None: return Ctx.IntTy; case AsChar: return Ctx.SignedCharTy; case AsShort: return Ctx.ShortTy; case AsLong: return Ctx.LongTy; case AsLongLong: return Ctx.LongLongTy; case AsIntMax: // FIXME: Return unknown for now. return ArgTypeResult(); case AsSizeT: return Ctx.getSizeType(); case AsPtrDiff: return Ctx.getPointerDiffType(); } if (CS.isUIntArg()) switch (LM) { case AsLongDouble: return ArgTypeResult::Invalid(); case None: return Ctx.UnsignedIntTy; case AsChar: return Ctx.UnsignedCharTy; case AsShort: return Ctx.UnsignedShortTy; case AsLong: return Ctx.UnsignedLongTy; case AsLongLong: return Ctx.UnsignedLongLongTy; case AsIntMax: // FIXME: Return unknown for now. return ArgTypeResult(); case AsSizeT: // FIXME: How to get the corresponding unsigned // version of size_t? return ArgTypeResult(); case AsPtrDiff: // FIXME: How to get the corresponding unsigned // version of ptrdiff_t? return ArgTypeResult(); } if (CS.isDoubleArg()) { if (LM == AsLongDouble) return Ctx.LongDoubleTy; return Ctx.DoubleTy; } switch (CS.getKind()) { case ConversionSpecifier::CStrArg: return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); case ConversionSpecifier::UnicodeStrArg: // FIXME: This appears to be Mac OS X specific. return ArgTypeResult::WCStrTy; case ConversionSpecifier::CArg: return Ctx.WCharTy; default: break; } // FIXME: Handle other cases. return ArgTypeResult(); }