clang API Documentation

FormatString.cpp
Go to the documentation of this file.
00001 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Shared details for processing format strings of printf and scanf
00011 // (and friends).
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "FormatStringParsing.h"
00016 #include "clang/Basic/LangOptions.h"
00017 
00018 using clang::analyze_format_string::ArgTypeResult;
00019 using clang::analyze_format_string::FormatStringHandler;
00020 using clang::analyze_format_string::FormatSpecifier;
00021 using clang::analyze_format_string::LengthModifier;
00022 using clang::analyze_format_string::OptionalAmount;
00023 using clang::analyze_format_string::PositionContext;
00024 using clang::analyze_format_string::ConversionSpecifier;
00025 using namespace clang;
00026 
00027 // Key function to FormatStringHandler.
00028 FormatStringHandler::~FormatStringHandler() {}
00029 
00030 //===----------------------------------------------------------------------===//
00031 // Functions for parsing format strings components in both printf and
00032 // scanf format strings.
00033 //===----------------------------------------------------------------------===//
00034 
00035 OptionalAmount
00036 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
00037   const char *I = Beg;
00038   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
00039 
00040   unsigned accumulator = 0;
00041   bool hasDigits = false;
00042 
00043   for ( ; I != E; ++I) {
00044     char c = *I;
00045     if (c >= '0' && c <= '9') {
00046       hasDigits = true;
00047       accumulator = (accumulator * 10) + (c - '0');
00048       continue;
00049     }
00050 
00051     if (hasDigits)
00052       return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
00053           false);
00054 
00055     break;
00056   }
00057 
00058   return OptionalAmount();
00059 }
00060 
00061 OptionalAmount
00062 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
00063                                                      const char *E,
00064                                                      unsigned &argIndex) {
00065   if (*Beg == '*') {
00066     ++Beg;
00067     return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
00068   }
00069 
00070   return ParseAmount(Beg, E);
00071 }
00072 
00073 OptionalAmount
00074 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
00075                                                   const char *Start,
00076                                                   const char *&Beg,
00077                                                   const char *E,
00078                                                   PositionContext p) {
00079   if (*Beg == '*') {
00080     const char *I = Beg + 1;
00081     const OptionalAmount &Amt = ParseAmount(I, E);
00082 
00083     if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
00084       H.HandleInvalidPosition(Beg, I - Beg, p);
00085       return OptionalAmount(false);
00086     }
00087 
00088     if (I == E) {
00089       // No more characters left?
00090       H.HandleIncompleteSpecifier(Start, E - Start);
00091       return OptionalAmount(false);
00092     }
00093 
00094     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
00095 
00096     if (*I == '$') {
00097       // Handle positional arguments
00098 
00099       // Special case: '*0$', since this is an easy mistake.
00100       if (Amt.getConstantAmount() == 0) {
00101         H.HandleZeroPosition(Beg, I - Beg + 1);
00102         return OptionalAmount(false);
00103       }
00104 
00105       const char *Tmp = Beg;
00106       Beg = ++I;
00107 
00108       return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
00109                             Tmp, 0, true);
00110     }
00111 
00112     H.HandleInvalidPosition(Beg, I - Beg, p);
00113     return OptionalAmount(false);
00114   }
00115 
00116   return ParseAmount(Beg, E);
00117 }
00118 
00119 
00120 bool
00121 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
00122                                               FormatSpecifier &CS,
00123                                               const char *Start,
00124                                               const char *&Beg, const char *E,
00125                                               unsigned *argIndex) {
00126   // FIXME: Support negative field widths.
00127   if (argIndex) {
00128     CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
00129   }
00130   else {
00131     const OptionalAmount Amt =
00132       ParsePositionAmount(H, Start, Beg, E,
00133                           analyze_format_string::FieldWidthPos);
00134 
00135     if (Amt.isInvalid())
00136       return true;
00137     CS.setFieldWidth(Amt);
00138   }
00139   return false;
00140 }
00141 
00142 bool
00143 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
00144                                                FormatSpecifier &FS,
00145                                                const char *Start,
00146                                                const char *&Beg,
00147                                                const char *E) {
00148   const char *I = Beg;
00149 
00150   const OptionalAmount &Amt = ParseAmount(I, E);
00151 
00152   if (I == E) {
00153     // No more characters left?
00154     H.HandleIncompleteSpecifier(Start, E - Start);
00155     return true;
00156   }
00157 
00158   if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
00159     // Warn that positional arguments are non-standard.
00160     H.HandlePosition(Start, I - Start);
00161 
00162     // Special case: '%0$', since this is an easy mistake.
00163     if (Amt.getConstantAmount() == 0) {
00164       H.HandleZeroPosition(Start, I - Start);
00165       return true;
00166     }
00167 
00168     FS.setArgIndex(Amt.getConstantAmount() - 1);
00169     FS.setUsesPositionalArg();
00170     // Update the caller's pointer if we decided to consume
00171     // these characters.
00172     Beg = I;
00173     return false;
00174   }
00175 
00176   return false;
00177 }
00178 
00179 bool
00180 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
00181                                                   const char *&I,
00182                                                   const char *E,
00183                                                   const LangOptions &LO,
00184                                                   bool IsScanf) {
00185   LengthModifier::Kind lmKind = LengthModifier::None;
00186   const char *lmPosition = I;
00187   switch (*I) {
00188     default:
00189       return false;
00190     case 'h':
00191       ++I;
00192       lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
00193                                      : LengthModifier::AsShort;
00194       break;
00195     case 'l':
00196       ++I;
00197       lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
00198                                      : LengthModifier::AsLong;
00199       break;
00200     case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
00201     case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
00202     case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
00203     case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
00204     case 'q': lmKind = LengthModifier::AsQuad;       ++I; break;
00205     case 'a':
00206       if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) {
00207         // For scanf in C90, look at the next character to see if this should
00208         // be parsed as the GNU extension 'a' length modifier. If not, this
00209         // will be parsed as a conversion specifier.
00210         ++I;
00211         if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
00212           lmKind = LengthModifier::AsAllocate;
00213           break;
00214         }
00215         --I;
00216       }
00217       return false;
00218     case 'm':
00219       if (IsScanf) {
00220         lmKind = LengthModifier::AsMAllocate;
00221         ++I;
00222         break;
00223       }
00224       return false;
00225   }
00226   LengthModifier lm(lmPosition, lmKind);
00227   FS.setLengthModifier(lm);
00228   return true;
00229 }
00230 
00231 //===----------------------------------------------------------------------===//
00232 // Methods on ArgTypeResult.
00233 //===----------------------------------------------------------------------===//
00234 
00235 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
00236   switch (K) {
00237     case InvalidTy:
00238       llvm_unreachable("ArgTypeResult must be valid");
00239 
00240     case UnknownTy:
00241       return true;
00242       
00243     case AnyCharTy: {
00244       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
00245         switch (BT->getKind()) {
00246           default:
00247             break;
00248           case BuiltinType::Char_S:
00249           case BuiltinType::SChar:
00250           case BuiltinType::UChar:
00251           case BuiltinType::Char_U:
00252             return true;            
00253         }
00254       return false;
00255     }
00256       
00257     case SpecificTy: {
00258       argTy = C.getCanonicalType(argTy).getUnqualifiedType();
00259       if (T == argTy)
00260         return true;
00261       // Check for "compatible types".
00262       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
00263         switch (BT->getKind()) {
00264           default:
00265             break;
00266           case BuiltinType::Char_S:
00267           case BuiltinType::SChar:
00268           case BuiltinType::Char_U:
00269           case BuiltinType::UChar:                    
00270             return T == C.UnsignedCharTy || T == C.SignedCharTy;
00271           case BuiltinType::Short:
00272             return T == C.UnsignedShortTy;
00273           case BuiltinType::UShort:
00274             return T == C.ShortTy;
00275           case BuiltinType::Int:
00276             return T == C.UnsignedIntTy;
00277           case BuiltinType::UInt:
00278             return T == C.IntTy;
00279           case BuiltinType::Long:
00280             return T == C.UnsignedLongTy;
00281           case BuiltinType::ULong:
00282             return T == C.LongTy;
00283           case BuiltinType::LongLong:
00284             return T == C.UnsignedLongLongTy;
00285           case BuiltinType::ULongLong:
00286             return T == C.LongLongTy;
00287         }
00288       return false;
00289     }
00290 
00291     case CStrTy: {
00292       const PointerType *PT = argTy->getAs<PointerType>();
00293       if (!PT)
00294         return false;
00295       QualType pointeeTy = PT->getPointeeType();
00296       if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
00297         switch (BT->getKind()) {
00298           case BuiltinType::Void:
00299           case BuiltinType::Char_U:
00300           case BuiltinType::UChar:
00301           case BuiltinType::Char_S:
00302           case BuiltinType::SChar:
00303             return true;
00304           default:
00305             break;
00306         }
00307 
00308       return false;
00309     }
00310 
00311     case WCStrTy: {
00312       const PointerType *PT = argTy->getAs<PointerType>();
00313       if (!PT)
00314         return false;
00315       QualType pointeeTy =
00316         C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
00317       return pointeeTy == C.getWCharType();
00318     }
00319     
00320     case WIntTy: {
00321       
00322       QualType PromoArg = 
00323         argTy->isPromotableIntegerType()
00324           ? C.getPromotedIntegerType(argTy) : argTy;
00325       
00326       QualType WInt = C.getCanonicalType(C.getWIntType()).getUnqualifiedType();
00327       PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
00328       
00329       // If the promoted argument is the corresponding signed type of the
00330       // wint_t type, then it should match.
00331       if (PromoArg->hasSignedIntegerRepresentation() &&
00332           C.getCorrespondingUnsignedType(PromoArg) == WInt)
00333         return true;
00334 
00335       return WInt == PromoArg;
00336     }
00337 
00338     case CPointerTy:
00339       return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
00340              argTy->isBlockPointerType() || argTy->isNullPtrType();
00341 
00342     case ObjCPointerTy: {
00343       if (argTy->getAs<ObjCObjectPointerType>() ||
00344           argTy->getAs<BlockPointerType>())
00345         return true;
00346       
00347       // Handle implicit toll-free bridging.
00348       if (const PointerType *PT = argTy->getAs<PointerType>()) {
00349         // Things such as CFTypeRef are really just opaque pointers
00350         // to C structs representing CF types that can often be bridged
00351         // to Objective-C objects.  Since the compiler doesn't know which
00352         // structs can be toll-free bridged, we just accept them all.
00353         QualType pointee = PT->getPointeeType();
00354         if (pointee->getAsStructureType() || pointee->isVoidType())
00355           return true;
00356       }
00357       return false;      
00358     }
00359   }
00360 
00361   llvm_unreachable("Invalid ArgTypeResult Kind!");
00362 }
00363 
00364 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
00365   switch (K) {
00366     case InvalidTy:
00367       llvm_unreachable("No representative type for Invalid ArgTypeResult");
00368     case UnknownTy:
00369       return QualType();
00370     case AnyCharTy:
00371       return C.CharTy;
00372     case SpecificTy:
00373       return T;
00374     case CStrTy:
00375       return C.getPointerType(C.CharTy);
00376     case WCStrTy:
00377       return C.getPointerType(C.getWCharType());
00378     case ObjCPointerTy:
00379       return C.ObjCBuiltinIdTy;
00380     case CPointerTy:
00381       return C.VoidPtrTy;
00382     case WIntTy: {
00383       return C.getWIntType();
00384     }
00385   }
00386 
00387   llvm_unreachable("Invalid ArgTypeResult Kind!");
00388 }
00389 
00390 std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
00391   std::string S = getRepresentativeType(C).getAsString();
00392   if (Name && S != Name)
00393     return std::string("'") + Name + "' (aka '" + S + "')";
00394   return std::string("'") + S + "'";
00395 }
00396 
00397 
00398 //===----------------------------------------------------------------------===//
00399 // Methods on OptionalAmount.
00400 //===----------------------------------------------------------------------===//
00401 
00402 ArgTypeResult
00403 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
00404   return Ctx.IntTy;
00405 }
00406 
00407 //===----------------------------------------------------------------------===//
00408 // Methods on LengthModifier.
00409 //===----------------------------------------------------------------------===//
00410 
00411 const char *
00412 analyze_format_string::LengthModifier::toString() const {
00413   switch (kind) {
00414   case AsChar:
00415     return "hh";
00416   case AsShort:
00417     return "h";
00418   case AsLong: // or AsWideChar
00419     return "l";
00420   case AsLongLong:
00421     return "ll";
00422   case AsQuad:
00423     return "q";
00424   case AsIntMax:
00425     return "j";
00426   case AsSizeT:
00427     return "z";
00428   case AsPtrDiff:
00429     return "t";
00430   case AsLongDouble:
00431     return "L";
00432   case AsAllocate:
00433     return "a";
00434   case AsMAllocate:
00435     return "m";
00436   case None:
00437     return "";
00438   }
00439   return NULL;
00440 }
00441 
00442 //===----------------------------------------------------------------------===//
00443 // Methods on ConversionSpecifier.
00444 //===----------------------------------------------------------------------===//
00445 
00446 const char *ConversionSpecifier::toString() const {
00447   switch (kind) {
00448   case dArg: return "d";
00449   case iArg: return "i";
00450   case oArg: return "o";
00451   case uArg: return "u";
00452   case xArg: return "x";
00453   case XArg: return "X";
00454   case fArg: return "f";
00455   case FArg: return "F";
00456   case eArg: return "e";
00457   case EArg: return "E";
00458   case gArg: return "g";
00459   case GArg: return "G";
00460   case aArg: return "a";
00461   case AArg: return "A";
00462   case cArg: return "c";
00463   case sArg: return "s";
00464   case pArg: return "p";
00465   case nArg: return "n";
00466   case PercentArg:  return "%";
00467   case ScanListArg: return "[";
00468   case InvalidSpecifier: return NULL;
00469 
00470   // MacOS X unicode extensions.
00471   case CArg: return "C";
00472   case SArg: return "S";
00473 
00474   // Objective-C specific specifiers.
00475   case ObjCObjArg: return "@";
00476 
00477   // GlibC specific specifiers.
00478   case PrintErrno: return "m";
00479   }
00480   return NULL;
00481 }
00482 
00483 //===----------------------------------------------------------------------===//
00484 // Methods on OptionalAmount.
00485 //===----------------------------------------------------------------------===//
00486 
00487 void OptionalAmount::toString(raw_ostream &os) const {
00488   switch (hs) {
00489   case Invalid:
00490   case NotSpecified:
00491     return;
00492   case Arg:
00493     if (UsesDotPrefix)
00494         os << ".";
00495     if (usesPositionalArg())
00496       os << "*" << getPositionalArgIndex() << "$";
00497     else
00498       os << "*";
00499     break;
00500   case Constant:
00501     if (UsesDotPrefix)
00502         os << ".";
00503     os << amt;
00504     break;
00505   }
00506 }
00507 
00508 bool FormatSpecifier::hasValidLengthModifier() const {
00509   switch (LM.getKind()) {
00510     case LengthModifier::None:
00511       return true;
00512       
00513     // Handle most integer flags
00514     case LengthModifier::AsChar:
00515     case LengthModifier::AsShort:
00516     case LengthModifier::AsLongLong:
00517     case LengthModifier::AsQuad:
00518     case LengthModifier::AsIntMax:
00519     case LengthModifier::AsSizeT:
00520     case LengthModifier::AsPtrDiff:
00521       switch (CS.getKind()) {
00522         case ConversionSpecifier::dArg:
00523         case ConversionSpecifier::iArg:
00524         case ConversionSpecifier::oArg:
00525         case ConversionSpecifier::uArg:
00526         case ConversionSpecifier::xArg:
00527         case ConversionSpecifier::XArg:
00528         case ConversionSpecifier::nArg:
00529           return true;
00530         default:
00531           return false;
00532       }
00533       
00534     // Handle 'l' flag
00535     case LengthModifier::AsLong:
00536       switch (CS.getKind()) {
00537         case ConversionSpecifier::dArg:
00538         case ConversionSpecifier::iArg:
00539         case ConversionSpecifier::oArg:
00540         case ConversionSpecifier::uArg:
00541         case ConversionSpecifier::xArg:
00542         case ConversionSpecifier::XArg:
00543         case ConversionSpecifier::aArg:
00544         case ConversionSpecifier::AArg:
00545         case ConversionSpecifier::fArg:
00546         case ConversionSpecifier::FArg:
00547         case ConversionSpecifier::eArg:
00548         case ConversionSpecifier::EArg:
00549         case ConversionSpecifier::gArg:
00550         case ConversionSpecifier::GArg:
00551         case ConversionSpecifier::nArg:
00552         case ConversionSpecifier::cArg:
00553         case ConversionSpecifier::sArg:
00554         case ConversionSpecifier::ScanListArg:
00555           return true;
00556         default:
00557           return false;
00558       }
00559       
00560     case LengthModifier::AsLongDouble:
00561       switch (CS.getKind()) {
00562         case ConversionSpecifier::aArg:
00563         case ConversionSpecifier::AArg:
00564         case ConversionSpecifier::fArg:
00565         case ConversionSpecifier::FArg:
00566         case ConversionSpecifier::eArg:
00567         case ConversionSpecifier::EArg:
00568         case ConversionSpecifier::gArg:
00569         case ConversionSpecifier::GArg:
00570           return true;
00571         // GNU extension.
00572         case ConversionSpecifier::dArg:
00573         case ConversionSpecifier::iArg:
00574         case ConversionSpecifier::oArg:
00575         case ConversionSpecifier::uArg:
00576         case ConversionSpecifier::xArg:
00577         case ConversionSpecifier::XArg:
00578           return true;
00579         default:
00580           return false;
00581       }
00582 
00583     case LengthModifier::AsAllocate:
00584       switch (CS.getKind()) {
00585         case ConversionSpecifier::sArg:
00586         case ConversionSpecifier::SArg:
00587         case ConversionSpecifier::ScanListArg:
00588           return true;
00589         default:
00590           return false;
00591       }
00592 
00593     case LengthModifier::AsMAllocate:
00594       switch (CS.getKind()) {
00595         case ConversionSpecifier::cArg:
00596         case ConversionSpecifier::CArg:
00597         case ConversionSpecifier::sArg:
00598         case ConversionSpecifier::SArg:
00599         case ConversionSpecifier::ScanListArg:
00600           return true;
00601         default:
00602           return false;
00603       }
00604   }
00605   llvm_unreachable("Invalid LengthModifier Kind!");
00606 }
00607 
00608 bool FormatSpecifier::hasStandardLengthModifier() const {
00609   switch (LM.getKind()) {
00610     case LengthModifier::None:
00611     case LengthModifier::AsChar:
00612     case LengthModifier::AsShort:
00613     case LengthModifier::AsLong:
00614     case LengthModifier::AsLongLong:
00615     case LengthModifier::AsIntMax:
00616     case LengthModifier::AsSizeT:
00617     case LengthModifier::AsPtrDiff:
00618     case LengthModifier::AsLongDouble:
00619       return true;
00620     case LengthModifier::AsAllocate:
00621     case LengthModifier::AsMAllocate:
00622     case LengthModifier::AsQuad:
00623       return false;
00624   }
00625   llvm_unreachable("Invalid LengthModifier Kind!");
00626 }
00627 
00628 bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const {
00629   switch (CS.getKind()) {
00630     case ConversionSpecifier::cArg:
00631     case ConversionSpecifier::dArg:
00632     case ConversionSpecifier::iArg:
00633     case ConversionSpecifier::oArg:
00634     case ConversionSpecifier::uArg:
00635     case ConversionSpecifier::xArg:
00636     case ConversionSpecifier::XArg:
00637     case ConversionSpecifier::fArg:
00638     case ConversionSpecifier::FArg:
00639     case ConversionSpecifier::eArg:
00640     case ConversionSpecifier::EArg:
00641     case ConversionSpecifier::gArg:
00642     case ConversionSpecifier::GArg:
00643     case ConversionSpecifier::aArg:
00644     case ConversionSpecifier::AArg:
00645     case ConversionSpecifier::sArg:
00646     case ConversionSpecifier::pArg:
00647     case ConversionSpecifier::nArg:
00648     case ConversionSpecifier::ObjCObjArg:
00649     case ConversionSpecifier::ScanListArg:
00650     case ConversionSpecifier::PercentArg:
00651       return true;
00652     case ConversionSpecifier::CArg:
00653     case ConversionSpecifier::SArg:
00654       return LangOpt.ObjC1 || LangOpt.ObjC2;
00655     case ConversionSpecifier::InvalidSpecifier:
00656     case ConversionSpecifier::PrintErrno:
00657       return false;
00658   }
00659   llvm_unreachable("Invalid ConversionSpecifier Kind!");
00660 }
00661 
00662 bool FormatSpecifier::hasStandardLengthConversionCombination() const {
00663   if (LM.getKind() == LengthModifier::AsLongDouble) {
00664     switch(CS.getKind()) {
00665         case ConversionSpecifier::dArg:
00666         case ConversionSpecifier::iArg:
00667         case ConversionSpecifier::oArg:
00668         case ConversionSpecifier::uArg:
00669         case ConversionSpecifier::xArg:
00670         case ConversionSpecifier::XArg:
00671           return false;
00672         default:
00673           return true;
00674     }
00675   }
00676   return true;
00677 }