clang API Documentation

FormatString.h
Go to the documentation of this file.
00001 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines APIs for analyzing the format strings of printf, fscanf,
00011 // and friends.
00012 //
00013 // The structure of format strings for fprintf are described in C99 7.19.6.1.
00014 //
00015 // The structure of format strings for fscanf are described in C99 7.19.6.2.
00016 //
00017 //===----------------------------------------------------------------------===//
00018 
00019 #ifndef LLVM_CLANG_FORMAT_H
00020 #define LLVM_CLANG_FORMAT_H
00021 
00022 #include "clang/AST/CanonicalType.h"
00023 
00024 namespace clang {
00025 
00026 //===----------------------------------------------------------------------===//
00027 /// Common components of both fprintf and fscanf format strings.
00028 namespace analyze_format_string {
00029 
00030 /// Class representing optional flags with location and representation
00031 /// information.
00032 class OptionalFlag {
00033 public:
00034   OptionalFlag(const char *Representation)
00035       : representation(Representation), flag(false) {}
00036   bool isSet() { return flag; }
00037   void set() { flag = true; }
00038   void clear() { flag = false; }
00039   void setPosition(const char *position) {
00040     assert(position);
00041     this->position = position;
00042   }
00043   const char *getPosition() const {
00044     assert(position);
00045     return position;
00046   }
00047   const char *toString() const { return representation; }
00048 
00049   // Overloaded operators for bool like qualities
00050   operator bool() const { return flag; }
00051   OptionalFlag& operator=(const bool &rhs) {
00052     flag = rhs;
00053     return *this;  // Return a reference to myself.
00054   }
00055 private:
00056   const char *representation;
00057   const char *position;
00058   bool flag;
00059 };
00060 
00061 /// Represents the length modifier in a format string in scanf/printf.
00062 class LengthModifier {
00063 public:
00064   enum Kind {
00065     None,
00066     AsChar,       // 'hh'
00067     AsShort,      // 'h'
00068     AsLong,       // 'l'
00069     AsLongLong,   // 'll'
00070     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
00071     AsIntMax,     // 'j'
00072     AsSizeT,      // 'z'
00073     AsPtrDiff,    // 't'
00074     AsLongDouble, // 'L'
00075     AsAllocate,   // for '%as', GNU extension to C90 scanf
00076     AsMAllocate,  // for '%ms', GNU extension to scanf
00077     AsWideChar = AsLong // for '%ls', only makes sense for printf
00078   };
00079 
00080   LengthModifier()
00081     : Position(0), kind(None) {}
00082   LengthModifier(const char *pos, Kind k)
00083     : Position(pos), kind(k) {}
00084 
00085   const char *getStart() const {
00086     return Position;
00087   }
00088 
00089   unsigned getLength() const {
00090     switch (kind) {
00091       default:
00092         return 1;
00093       case AsLongLong:
00094       case AsChar:
00095         return 2;
00096       case None:
00097         return 0;
00098     }
00099   }
00100 
00101   Kind getKind() const { return kind; }
00102   void setKind(Kind k) { kind = k; }
00103 
00104   const char *toString() const;
00105 
00106 private:
00107   const char *Position;
00108   Kind kind;
00109 };
00110 
00111 class ConversionSpecifier {
00112 public:
00113   enum Kind {
00114     InvalidSpecifier = 0,
00115       // C99 conversion specifiers.
00116     cArg,
00117     dArg,
00118     iArg,
00119     IntArgBeg = cArg, IntArgEnd = iArg,
00120 
00121     oArg,
00122     uArg,
00123     xArg,
00124     XArg,
00125     UIntArgBeg = oArg, UIntArgEnd = XArg,
00126 
00127     fArg,
00128     FArg,
00129     eArg,
00130     EArg,
00131     gArg,
00132     GArg,
00133     aArg,
00134     AArg,
00135     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
00136 
00137     sArg,
00138     pArg,
00139     nArg,
00140     PercentArg,
00141     CArg,
00142     SArg,
00143 
00144     // ** Printf-specific **
00145 
00146     // Objective-C specific specifiers.
00147     ObjCObjArg,  // '@'
00148     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
00149 
00150     // GlibC specific specifiers.
00151     PrintErrno,   // 'm'
00152 
00153     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
00154 
00155     // ** Scanf-specific **
00156     ScanListArg, // '['
00157     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
00158   };
00159 
00160   ConversionSpecifier(bool isPrintf)
00161     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
00162 
00163   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
00164     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
00165 
00166   const char *getStart() const {
00167     return Position;
00168   }
00169 
00170   StringRef getCharacters() const {
00171     return StringRef(getStart(), getLength());
00172   }
00173 
00174   bool consumesDataArgument() const {
00175     switch (kind) {
00176       case PrintErrno:
00177         assert(IsPrintf);
00178         return false;
00179       case PercentArg:
00180         return false;
00181       default:
00182         return true;
00183     }
00184   }
00185 
00186   Kind getKind() const { return kind; }
00187   void setKind(Kind k) { kind = k; }
00188   unsigned getLength() const {
00189     return EndScanList ? EndScanList - Position : 1;
00190   }
00191 
00192   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
00193   const char *toString() const;
00194 
00195   bool isPrintfKind() const { return IsPrintf; }
00196 
00197 protected:
00198   bool IsPrintf;
00199   const char *Position;
00200   const char *EndScanList;
00201   Kind kind;
00202 };
00203 
00204 class ArgTypeResult {
00205 public:
00206   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
00207               AnyCharTy, CStrTy, WCStrTy, WIntTy };
00208 private:
00209   const Kind K;
00210   QualType T;
00211   const char *Name;
00212   ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
00213 public:
00214   ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
00215   ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
00216   ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
00217   ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
00218   ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
00219 
00220   static ArgTypeResult Invalid() { return ArgTypeResult(true); }
00221 
00222   bool isValid() const { return K != InvalidTy; }
00223 
00224   const QualType *getSpecificType() const {
00225     return K == SpecificTy ? &T : 0;
00226   }
00227 
00228   bool matchesType(ASTContext &C, QualType argTy) const;
00229 
00230   bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
00231 
00232   QualType getRepresentativeType(ASTContext &C) const;
00233 
00234   std::string getRepresentativeTypeName(ASTContext &C) const;
00235 };
00236 
00237 class OptionalAmount {
00238 public:
00239   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
00240 
00241   OptionalAmount(HowSpecified howSpecified,
00242                  unsigned amount,
00243                  const char *amountStart,
00244                  unsigned amountLength,
00245                  bool usesPositionalArg)
00246   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
00247   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
00248 
00249   OptionalAmount(bool valid = true)
00250   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
00251   UsesPositionalArg(0), UsesDotPrefix(0) {}
00252 
00253   bool isInvalid() const {
00254     return hs == Invalid;
00255   }
00256 
00257   HowSpecified getHowSpecified() const { return hs; }
00258   void setHowSpecified(HowSpecified h) { hs = h; }
00259 
00260   bool hasDataArgument() const { return hs == Arg; }
00261 
00262   unsigned getArgIndex() const {
00263     assert(hasDataArgument());
00264     return amt;
00265   }
00266 
00267   unsigned getConstantAmount() const {
00268     assert(hs == Constant);
00269     return amt;
00270   }
00271 
00272   const char *getStart() const {
00273       // We include the . character if it is given.
00274     return start - UsesDotPrefix;
00275   }
00276 
00277   unsigned getConstantLength() const {
00278     assert(hs == Constant);
00279     return length + UsesDotPrefix;
00280   }
00281 
00282   ArgTypeResult getArgType(ASTContext &Ctx) const;
00283 
00284   void toString(raw_ostream &os) const;
00285 
00286   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
00287   unsigned getPositionalArgIndex() const {
00288     assert(hasDataArgument());
00289     return amt + 1;
00290   }
00291 
00292   bool usesDotPrefix() const { return UsesDotPrefix; }
00293   void setUsesDotPrefix() { UsesDotPrefix = true; }
00294 
00295 private:
00296   const char *start;
00297   unsigned length;
00298   HowSpecified hs;
00299   unsigned amt;
00300   bool UsesPositionalArg : 1;
00301   bool UsesDotPrefix;
00302 };
00303 
00304 
00305 class FormatSpecifier {
00306 protected:
00307   LengthModifier LM;
00308   OptionalAmount FieldWidth;
00309   ConversionSpecifier CS;
00310   /// Positional arguments, an IEEE extension:
00311   ///  IEEE Std 1003.1, 2004 Edition
00312   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
00313   bool UsesPositionalArg;
00314   unsigned argIndex;
00315 public:
00316   FormatSpecifier(bool isPrintf)
00317     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
00318 
00319   void setLengthModifier(LengthModifier lm) {
00320     LM = lm;
00321   }
00322 
00323   void setUsesPositionalArg() { UsesPositionalArg = true; }
00324 
00325   void setArgIndex(unsigned i) {
00326     argIndex = i;
00327   }
00328 
00329   unsigned getArgIndex() const {
00330     return argIndex;
00331   }
00332 
00333   unsigned getPositionalArgIndex() const {
00334     return argIndex + 1;
00335   }
00336 
00337   const LengthModifier &getLengthModifier() const {
00338     return LM;
00339   }
00340 
00341   const OptionalAmount &getFieldWidth() const {
00342     return FieldWidth;
00343   }
00344 
00345   void setFieldWidth(const OptionalAmount &Amt) {
00346     FieldWidth = Amt;
00347   }
00348 
00349   bool usesPositionalArg() const { return UsesPositionalArg; }
00350 
00351   bool hasValidLengthModifier() const;
00352 
00353   bool hasStandardLengthModifier() const;
00354 
00355   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
00356 
00357   bool hasStandardLengthConversionCombination() const;
00358 };
00359 
00360 } // end analyze_format_string namespace
00361 
00362 //===----------------------------------------------------------------------===//
00363 /// Pieces specific to fprintf format strings.
00364 
00365 namespace analyze_printf {
00366 
00367 class PrintfConversionSpecifier :
00368   public analyze_format_string::ConversionSpecifier  {
00369 public:
00370   PrintfConversionSpecifier()
00371     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
00372 
00373   PrintfConversionSpecifier(const char *pos, Kind k)
00374     : ConversionSpecifier(true, pos, k) {}
00375 
00376   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
00377   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
00378   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
00379                                     kind <= DoubleArgEnd; }
00380   unsigned getLength() const {
00381       // Conversion specifiers currently only are represented by
00382       // single characters, but we be flexible.
00383     return 1;
00384   }
00385 
00386   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
00387     return CS->isPrintfKind();
00388   }
00389 };
00390 
00391 using analyze_format_string::ArgTypeResult;
00392 using analyze_format_string::LengthModifier;
00393 using analyze_format_string::OptionalAmount;
00394 using analyze_format_string::OptionalFlag;
00395 
00396 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
00397   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
00398   OptionalFlag IsLeftJustified; // '-'
00399   OptionalFlag HasPlusPrefix; // '+'
00400   OptionalFlag HasSpacePrefix; // ' '
00401   OptionalFlag HasAlternativeForm; // '#'
00402   OptionalFlag HasLeadingZeroes; // '0'
00403   OptionalAmount Precision;
00404 public:
00405   PrintfSpecifier() :
00406     FormatSpecifier(/* isPrintf = */ true),
00407     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
00408     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
00409 
00410   static PrintfSpecifier Parse(const char *beg, const char *end);
00411 
00412     // Methods for incrementally constructing the PrintfSpecifier.
00413   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
00414     CS = cs;
00415   }
00416   void setHasThousandsGrouping(const char *position) {
00417     HasThousandsGrouping = true;
00418     HasThousandsGrouping.setPosition(position);
00419   }
00420   void setIsLeftJustified(const char *position) {
00421     IsLeftJustified = true;
00422     IsLeftJustified.setPosition(position);
00423   }
00424   void setHasPlusPrefix(const char *position) {
00425     HasPlusPrefix = true;
00426     HasPlusPrefix.setPosition(position);
00427   }
00428   void setHasSpacePrefix(const char *position) {
00429     HasSpacePrefix = true;
00430     HasSpacePrefix.setPosition(position);
00431   }
00432   void setHasAlternativeForm(const char *position) {
00433     HasAlternativeForm = true;
00434     HasAlternativeForm.setPosition(position);
00435   }
00436   void setHasLeadingZeros(const char *position) {
00437     HasLeadingZeroes = true;
00438     HasLeadingZeroes.setPosition(position);
00439   }
00440   void setUsesPositionalArg() { UsesPositionalArg = true; }
00441 
00442     // Methods for querying the format specifier.
00443 
00444   const PrintfConversionSpecifier &getConversionSpecifier() const {
00445     return cast<PrintfConversionSpecifier>(CS);
00446   }
00447 
00448   void setPrecision(const OptionalAmount &Amt) {
00449     Precision = Amt;
00450     Precision.setUsesDotPrefix();
00451   }
00452 
00453   const OptionalAmount &getPrecision() const {
00454     return Precision;
00455   }
00456 
00457   bool consumesDataArgument() const {
00458     return getConversionSpecifier().consumesDataArgument();
00459   }
00460 
00461   /// \brief Returns the builtin type that a data argument
00462   /// paired with this format specifier should have.  This method
00463   /// will return null if the format specifier does not have
00464   /// a matching data argument or the matching argument matches
00465   /// more than one type.
00466   ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
00467 
00468   const OptionalFlag &hasThousandsGrouping() const {
00469       return HasThousandsGrouping;
00470   }
00471   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
00472   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
00473   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
00474   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
00475   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
00476   bool usesPositionalArg() const { return UsesPositionalArg; }
00477 
00478   /// Changes the specifier and length according to a QualType, retaining any
00479   /// flags or options. Returns true on success, or false when a conversion
00480   /// was not successful.
00481   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
00482                bool IsObjCLiteral);
00483 
00484   void toString(raw_ostream &os) const;
00485 
00486   // Validation methods - to check if any element results in undefined behavior
00487   bool hasValidPlusPrefix() const;
00488   bool hasValidAlternativeForm() const;
00489   bool hasValidLeadingZeros() const;
00490   bool hasValidSpacePrefix() const;
00491   bool hasValidLeftJustified() const;
00492   bool hasValidThousandsGroupingPrefix() const;
00493 
00494   bool hasValidPrecision() const;
00495   bool hasValidFieldWidth() const;
00496 };
00497 }  // end analyze_printf namespace
00498 
00499 //===----------------------------------------------------------------------===//
00500 /// Pieces specific to fscanf format strings.
00501 
00502 namespace analyze_scanf {
00503 
00504 class ScanfConversionSpecifier :
00505     public analyze_format_string::ConversionSpecifier  {
00506 public:
00507   ScanfConversionSpecifier()
00508     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
00509 
00510   ScanfConversionSpecifier(const char *pos, Kind k)
00511     : ConversionSpecifier(false, pos, k) {}
00512 
00513   void setEndScanList(const char *pos) { EndScanList = pos; }
00514 
00515   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
00516     return !CS->isPrintfKind();
00517   }
00518 };
00519 
00520 using analyze_format_string::ArgTypeResult;
00521 using analyze_format_string::LengthModifier;
00522 using analyze_format_string::OptionalAmount;
00523 using analyze_format_string::OptionalFlag;
00524 
00525 class ScanfArgTypeResult : public ArgTypeResult {
00526 public:
00527   enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
00528 private:
00529   Kind K;
00530   ArgTypeResult A;
00531   const char *Name;
00532   QualType getRepresentativeType(ASTContext &C) const;
00533 public:
00534   ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
00535   ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
00536       : K(PtrToArgTypeResultTy), A(a), Name(n) {
00537     assert(A.isValid());
00538   }
00539 
00540   static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
00541 
00542   bool isValid() const { return K != InvalidTy; }
00543 
00544   bool matchesType(ASTContext& C, QualType argTy) const;
00545 
00546   std::string getRepresentativeTypeName(ASTContext& C) const;
00547 };
00548 
00549 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
00550   OptionalFlag SuppressAssignment; // '*'
00551 public:
00552   ScanfSpecifier() :
00553     FormatSpecifier(/* isPrintf = */ false),
00554     SuppressAssignment("*") {}
00555 
00556   void setSuppressAssignment(const char *position) {
00557     SuppressAssignment = true;
00558     SuppressAssignment.setPosition(position);
00559   }
00560 
00561   const OptionalFlag &getSuppressAssignment() const {
00562     return SuppressAssignment;
00563   }
00564 
00565   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
00566     CS = cs;
00567   }
00568 
00569   const ScanfConversionSpecifier &getConversionSpecifier() const {
00570     return cast<ScanfConversionSpecifier>(CS);
00571   }
00572 
00573   bool consumesDataArgument() const {
00574     return CS.consumesDataArgument() && !SuppressAssignment;
00575   }
00576 
00577   ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
00578 
00579   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
00580 
00581   void toString(raw_ostream &os) const;
00582 
00583   static ScanfSpecifier Parse(const char *beg, const char *end);
00584 };
00585 
00586 } // end analyze_scanf namespace
00587 
00588 //===----------------------------------------------------------------------===//
00589 // Parsing and processing of format strings (both fprintf and fscanf).
00590 
00591 namespace analyze_format_string {
00592 
00593 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
00594 
00595 class FormatStringHandler {
00596 public:
00597   FormatStringHandler() {}
00598   virtual ~FormatStringHandler();
00599 
00600   virtual void HandleNullChar(const char *nullCharacter) {}
00601 
00602   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
00603 
00604   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
00605                                      PositionContext p) {}
00606 
00607   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
00608 
00609   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
00610                                          unsigned specifierLen) {}
00611 
00612   // Printf-specific handlers.
00613 
00614   virtual bool HandleInvalidPrintfConversionSpecifier(
00615                                       const analyze_printf::PrintfSpecifier &FS,
00616                                       const char *startSpecifier,
00617                                       unsigned specifierLen) {
00618     return true;
00619   }
00620 
00621   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
00622                                      const char *startSpecifier,
00623                                      unsigned specifierLen) {
00624     return true;
00625   }
00626 
00627     // Scanf-specific handlers.
00628 
00629   virtual bool HandleInvalidScanfConversionSpecifier(
00630                                         const analyze_scanf::ScanfSpecifier &FS,
00631                                         const char *startSpecifier,
00632                                         unsigned specifierLen) {
00633     return true;
00634   }
00635 
00636   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
00637                                     const char *startSpecifier,
00638                                     unsigned specifierLen) {
00639     return true;
00640   }
00641 
00642   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
00643 };
00644 
00645 bool ParsePrintfString(FormatStringHandler &H,
00646                        const char *beg, const char *end, const LangOptions &LO);
00647 
00648 bool ParseScanfString(FormatStringHandler &H,
00649                       const char *beg, const char *end, const LangOptions &LO);
00650 
00651 } // end analyze_format_string namespace
00652 } // end clang namespace
00653 #endif