clang API Documentation

ScanfFormatString.cpp
Go to the documentation of this file.
00001 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Handling of format string in scanf and friends.  The structure of format
00011 // strings for fscanf() are described in C99 7.19.6.2.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "clang/Analysis/Analyses/FormatString.h"
00016 #include "FormatStringParsing.h"
00017 
00018 using clang::analyze_format_string::ArgTypeResult;
00019 using clang::analyze_format_string::FormatStringHandler;
00020 using clang::analyze_format_string::LengthModifier;
00021 using clang::analyze_format_string::OptionalAmount;
00022 using clang::analyze_format_string::ConversionSpecifier;
00023 using clang::analyze_scanf::ScanfArgTypeResult;
00024 using clang::analyze_scanf::ScanfConversionSpecifier;
00025 using clang::analyze_scanf::ScanfSpecifier;
00026 using clang::UpdateOnReturn;
00027 using namespace clang;
00028 
00029 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
00030         ScanfSpecifierResult;
00031 
00032 static bool ParseScanList(FormatStringHandler &H,
00033                           ScanfConversionSpecifier &CS,
00034                           const char *&Beg, const char *E) {
00035   const char *I = Beg;
00036   const char *start = I - 1;
00037   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
00038 
00039   // No more characters?
00040   if (I == E) {
00041     H.HandleIncompleteScanList(start, I);
00042     return true;
00043   }
00044   
00045   // Special case: ']' is the first character.
00046   if (*I == ']') {
00047     if (++I == E) {
00048       H.HandleIncompleteScanList(start, I - 1);
00049       return true;
00050     }
00051   }
00052 
00053   // Look for a ']' character which denotes the end of the scan list.
00054   while (*I != ']') {
00055     if (++I == E) {
00056       H.HandleIncompleteScanList(start, I - 1);
00057       return true;
00058     }
00059   }    
00060 
00061   CS.setEndScanList(I);
00062   return false;
00063 }
00064 
00065 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
00066 // We can possibly refactor.
00067 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
00068                                                 const char *&Beg,
00069                                                 const char *E,
00070                                                 unsigned &argIndex,
00071                                                 const LangOptions &LO) {
00072   
00073   using namespace clang::analyze_scanf;
00074   const char *I = Beg;
00075   const char *Start = 0;
00076   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
00077 
00078     // Look for a '%' character that indicates the start of a format specifier.
00079   for ( ; I != E ; ++I) {
00080     char c = *I;
00081     if (c == '\0') {
00082         // Detect spurious null characters, which are likely errors.
00083       H.HandleNullChar(I);
00084       return true;
00085     }
00086     if (c == '%') {
00087       Start = I++;  // Record the start of the format specifier.
00088       break;
00089     }
00090   }
00091   
00092     // No format specifier found?
00093   if (!Start)
00094     return false;
00095   
00096   if (I == E) {
00097       // No more characters left?
00098     H.HandleIncompleteSpecifier(Start, E - Start);
00099     return true;
00100   }
00101   
00102   ScanfSpecifier FS;
00103   if (ParseArgPosition(H, FS, Start, I, E))
00104     return true;
00105 
00106   if (I == E) {
00107       // No more characters left?
00108     H.HandleIncompleteSpecifier(Start, E - Start);
00109     return true;
00110   }
00111   
00112   // Look for '*' flag if it is present.
00113   if (*I == '*') {
00114     FS.setSuppressAssignment(I);
00115     if (++I == E) {
00116       H.HandleIncompleteSpecifier(Start, E - Start);
00117       return true;
00118     }
00119   }
00120   
00121   // Look for the field width (if any).  Unlike printf, this is either
00122   // a fixed integer or isn't present.
00123   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
00124   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
00125     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
00126     FS.setFieldWidth(Amt);
00127 
00128     if (I == E) {
00129       // No more characters left?
00130       H.HandleIncompleteSpecifier(Start, E - Start);
00131       return true;
00132     }
00133   }
00134   
00135   // Look for the length modifier.
00136   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
00137       // No more characters left?
00138     H.HandleIncompleteSpecifier(Start, E - Start);
00139     return true;
00140   }
00141   
00142   // Detect spurious null characters, which are likely errors.
00143   if (*I == '\0') {
00144     H.HandleNullChar(I);
00145     return true;
00146   }
00147   
00148   // Finally, look for the conversion specifier.
00149   const char *conversionPosition = I++;
00150   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
00151   switch (*conversionPosition) {
00152     default:
00153       break;
00154     case '%': k = ConversionSpecifier::PercentArg;   break;
00155     case 'A': k = ConversionSpecifier::AArg; break;
00156     case 'E': k = ConversionSpecifier::EArg; break;
00157     case 'F': k = ConversionSpecifier::FArg; break;
00158     case 'G': k = ConversionSpecifier::GArg; break;
00159     case 'X': k = ConversionSpecifier::XArg; break;
00160     case 'a': k = ConversionSpecifier::aArg; break;
00161     case 'd': k = ConversionSpecifier::dArg; break;
00162     case 'e': k = ConversionSpecifier::eArg; break;
00163     case 'f': k = ConversionSpecifier::fArg; break;
00164     case 'g': k = ConversionSpecifier::gArg; break;
00165     case 'i': k = ConversionSpecifier::iArg; break;
00166     case 'n': k = ConversionSpecifier::nArg; break;
00167     case 'c': k = ConversionSpecifier::cArg; break;
00168     case 'C': k = ConversionSpecifier::CArg; break;
00169     case 'S': k = ConversionSpecifier::SArg; break;
00170     case '[': k = ConversionSpecifier::ScanListArg; break;
00171     case 'u': k = ConversionSpecifier::uArg; break;
00172     case 'x': k = ConversionSpecifier::xArg; break;
00173     case 'o': k = ConversionSpecifier::oArg; break;
00174     case 's': k = ConversionSpecifier::sArg; break;
00175     case 'p': k = ConversionSpecifier::pArg; break;
00176   }
00177   ScanfConversionSpecifier CS(conversionPosition, k);
00178   if (k == ScanfConversionSpecifier::ScanListArg) {
00179     if (ParseScanList(H, CS, I, E))
00180       return true;
00181   }
00182   FS.setConversionSpecifier(CS);
00183   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
00184       && !FS.usesPositionalArg())
00185     FS.setArgIndex(argIndex++);
00186   
00187   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
00188   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
00189   
00190   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
00191     // Assume the conversion takes one argument.
00192     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
00193   }
00194   return ScanfSpecifierResult(Start, FS);
00195 }
00196 
00197 ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
00198   const ScanfConversionSpecifier &CS = getConversionSpecifier();
00199 
00200   if (!CS.consumesDataArgument())
00201     return ScanfArgTypeResult::Invalid();
00202 
00203   switch(CS.getKind()) {
00204     // Signed int.
00205     case ConversionSpecifier::dArg:
00206     case ConversionSpecifier::iArg:
00207       switch (LM.getKind()) {
00208         case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
00209         case LengthModifier::AsChar:
00210           return ArgTypeResult(ArgTypeResult::AnyCharTy);
00211         case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
00212         case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
00213         case LengthModifier::AsLongLong:
00214         case LengthModifier::AsQuad:
00215           return ArgTypeResult(Ctx.LongLongTy);
00216         case LengthModifier::AsIntMax:
00217           return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
00218         case LengthModifier::AsSizeT:
00219           // FIXME: ssize_t.
00220           return ScanfArgTypeResult();
00221         case LengthModifier::AsPtrDiff:
00222           return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
00223         case LengthModifier::AsLongDouble:
00224           // GNU extension.
00225           return ArgTypeResult(Ctx.LongLongTy);
00226         case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
00227         case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
00228       }
00229 
00230     // Unsigned int.
00231     case ConversionSpecifier::oArg:
00232     case ConversionSpecifier::uArg:
00233     case ConversionSpecifier::xArg:
00234     case ConversionSpecifier::XArg:
00235       switch (LM.getKind()) {
00236         case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
00237         case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
00238         case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
00239         case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
00240         case LengthModifier::AsLongLong:
00241         case LengthModifier::AsQuad:
00242           return ArgTypeResult(Ctx.UnsignedLongLongTy);
00243         case LengthModifier::AsIntMax:
00244           return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
00245         case LengthModifier::AsSizeT:
00246           return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
00247         case LengthModifier::AsPtrDiff:
00248           // FIXME: Unsigned version of ptrdiff_t?
00249           return ScanfArgTypeResult();
00250         case LengthModifier::AsLongDouble:
00251           // GNU extension.
00252           return ArgTypeResult(Ctx.UnsignedLongLongTy);
00253         case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
00254         case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
00255       }
00256 
00257     // Float.
00258     case ConversionSpecifier::aArg:
00259     case ConversionSpecifier::AArg:
00260     case ConversionSpecifier::eArg:
00261     case ConversionSpecifier::EArg:
00262     case ConversionSpecifier::fArg:
00263     case ConversionSpecifier::FArg:
00264     case ConversionSpecifier::gArg:
00265     case ConversionSpecifier::GArg:
00266       switch (LM.getKind()) {
00267         case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
00268         case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
00269         case LengthModifier::AsLongDouble:
00270           return ArgTypeResult(Ctx.LongDoubleTy);
00271         default:
00272           return ScanfArgTypeResult::Invalid();
00273       }
00274 
00275     // Char, string and scanlist.
00276     case ConversionSpecifier::cArg:
00277     case ConversionSpecifier::sArg:
00278     case ConversionSpecifier::ScanListArg:
00279       switch (LM.getKind()) {
00280         case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
00281         case LengthModifier::AsLong:
00282           return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
00283         case LengthModifier::AsAllocate:
00284         case LengthModifier::AsMAllocate:
00285           return ScanfArgTypeResult(ArgTypeResult::CStrTy);
00286         default:
00287           return ScanfArgTypeResult::Invalid();
00288       }
00289     case ConversionSpecifier::CArg:
00290     case ConversionSpecifier::SArg:
00291       // FIXME: Mac OS X specific?
00292       switch (LM.getKind()) {
00293         case LengthModifier::None:
00294           return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
00295         case LengthModifier::AsAllocate:
00296         case LengthModifier::AsMAllocate:
00297           return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
00298         default:
00299           return ScanfArgTypeResult::Invalid();
00300       }
00301 
00302     // Pointer.
00303     case ConversionSpecifier::pArg:
00304       return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
00305 
00306     default:
00307       break;
00308   }
00309 
00310   return ScanfArgTypeResult();
00311 }
00312 
00313 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
00314                              ASTContext &Ctx) {
00315   if (!QT->isPointerType())
00316     return false;
00317 
00318   QualType PT = QT->getPointeeType();
00319   const BuiltinType *BT = PT->getAs<BuiltinType>();
00320   if (!BT)
00321     return false;
00322 
00323   // Pointer to a character.
00324   if (PT->isAnyCharacterType()) {
00325     CS.setKind(ConversionSpecifier::sArg);
00326     if (PT->isWideCharType())
00327       LM.setKind(LengthModifier::AsWideChar);
00328     else
00329       LM.setKind(LengthModifier::None);
00330     return true;
00331   }
00332 
00333   // Figure out the length modifier.
00334   switch (BT->getKind()) {
00335     // no modifier
00336     case BuiltinType::UInt:
00337     case BuiltinType::Int:
00338     case BuiltinType::Float:
00339       LM.setKind(LengthModifier::None);
00340       break;
00341 
00342     // hh
00343     case BuiltinType::Char_U:
00344     case BuiltinType::UChar:
00345     case BuiltinType::Char_S:
00346     case BuiltinType::SChar:
00347       LM.setKind(LengthModifier::AsChar);
00348       break;
00349 
00350     // h
00351     case BuiltinType::Short:
00352     case BuiltinType::UShort:
00353       LM.setKind(LengthModifier::AsShort);
00354       break;
00355 
00356     // l
00357     case BuiltinType::Long:
00358     case BuiltinType::ULong:
00359     case BuiltinType::Double:
00360       LM.setKind(LengthModifier::AsLong);
00361       break;
00362 
00363     // ll
00364     case BuiltinType::LongLong:
00365     case BuiltinType::ULongLong:
00366       LM.setKind(LengthModifier::AsLongLong);
00367       break;
00368 
00369     // L
00370     case BuiltinType::LongDouble:
00371       LM.setKind(LengthModifier::AsLongDouble);
00372       break;
00373 
00374     // Don't know.
00375     default:
00376       return false;
00377   }
00378 
00379   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
00380   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
00381     const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
00382     if (Identifier->getName() == "size_t") {
00383       LM.setKind(LengthModifier::AsSizeT);
00384     } else if (Identifier->getName() == "ssize_t") {
00385       // Not C99, but common in Unix.
00386       LM.setKind(LengthModifier::AsSizeT);
00387     } else if (Identifier->getName() == "intmax_t") {
00388       LM.setKind(LengthModifier::AsIntMax);
00389     } else if (Identifier->getName() == "uintmax_t") {
00390       LM.setKind(LengthModifier::AsIntMax);
00391     } else if (Identifier->getName() == "ptrdiff_t") {
00392       LM.setKind(LengthModifier::AsPtrDiff);
00393     }
00394   }
00395 
00396   // If fixing the length modifier was enough, we are done.
00397   const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
00398   if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
00399     return true;
00400 
00401   // Figure out the conversion specifier.
00402   if (PT->isRealFloatingType())
00403     CS.setKind(ConversionSpecifier::fArg);
00404   else if (PT->isSignedIntegerType())
00405     CS.setKind(ConversionSpecifier::dArg);
00406   else if (PT->isUnsignedIntegerType())
00407     CS.setKind(ConversionSpecifier::uArg);
00408   else
00409     llvm_unreachable("Unexpected type");
00410 
00411   return true;
00412 }
00413 
00414 void ScanfSpecifier::toString(raw_ostream &os) const {
00415   os << "%";
00416 
00417   if (usesPositionalArg())
00418     os << getPositionalArgIndex() << "$";
00419   if (SuppressAssignment)
00420     os << "*";
00421 
00422   FieldWidth.toString(os);
00423   os << LM.toString();
00424   os << CS.toString();
00425 }
00426 
00427 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
00428                                                     const char *I,
00429                                                     const char *E,
00430                                                     const LangOptions &LO) {
00431   
00432   unsigned argIndex = 0;
00433   
00434   // Keep looking for a format specifier until we have exhausted the string.
00435   while (I != E) {
00436     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
00437                                                           LO);
00438     // Did a fail-stop error of any kind occur when parsing the specifier?
00439     // If so, don't do any more processing.
00440     if (FSR.shouldStop())
00441       return true;;
00442       // Did we exhaust the string or encounter an error that
00443       // we can recover from?
00444     if (!FSR.hasValue())
00445       continue;
00446       // We have a format specifier.  Pass it to the callback.
00447     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
00448                                 I - FSR.getStart())) {
00449       return true;
00450     }
00451   }
00452   assert(I == E && "Format string not exhausted");
00453   return false;
00454 }
00455 
00456 bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
00457   switch (K) {
00458     case InvalidTy:
00459       llvm_unreachable("ArgTypeResult must be valid");
00460     case UnknownTy:
00461       return true;
00462     case CStrTy:
00463       return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
00464     case WCStrTy:
00465       return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
00466     case PtrToArgTypeResultTy: {
00467       const PointerType *PT = argTy->getAs<PointerType>();
00468       if (!PT)
00469         return false;
00470       return A.matchesType(C, PT->getPointeeType());
00471     }
00472   }
00473 
00474   llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
00475 }
00476 
00477 QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
00478   switch (K) {
00479     case InvalidTy:
00480       llvm_unreachable("No representative type for Invalid ArgTypeResult");
00481     case UnknownTy:
00482       return QualType();
00483     case CStrTy:
00484       return C.getPointerType(C.CharTy);
00485     case WCStrTy:
00486       return C.getPointerType(C.getWCharType());
00487     case PtrToArgTypeResultTy:
00488       return C.getPointerType(A.getRepresentativeType(C));
00489   }
00490 
00491   llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
00492 }
00493 
00494 std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
00495   std::string S = getRepresentativeType(C).getAsString();
00496   if (!Name)
00497     return std::string("'") + S + "'";
00498   return std::string("'") + Name + "' (aka '" + S + "')";
00499 }