clang API Documentation
00001 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines APIs for analyzing the format strings of printf, fscanf, 00011 // and friends. 00012 // 00013 // The structure of format strings for fprintf are described in C99 7.19.6.1. 00014 // 00015 // The structure of format strings for fscanf are described in C99 7.19.6.2. 00016 // 00017 //===----------------------------------------------------------------------===// 00018 00019 #ifndef LLVM_CLANG_FORMAT_H 00020 #define LLVM_CLANG_FORMAT_H 00021 00022 #include "clang/AST/CanonicalType.h" 00023 00024 namespace clang { 00025 00026 //===----------------------------------------------------------------------===// 00027 /// Common components of both fprintf and fscanf format strings. 00028 namespace analyze_format_string { 00029 00030 /// Class representing optional flags with location and representation 00031 /// information. 00032 class OptionalFlag { 00033 public: 00034 OptionalFlag(const char *Representation) 00035 : representation(Representation), flag(false) {} 00036 bool isSet() { return flag; } 00037 void set() { flag = true; } 00038 void clear() { flag = false; } 00039 void setPosition(const char *position) { 00040 assert(position); 00041 this->position = position; 00042 } 00043 const char *getPosition() const { 00044 assert(position); 00045 return position; 00046 } 00047 const char *toString() const { return representation; } 00048 00049 // Overloaded operators for bool like qualities 00050 operator bool() const { return flag; } 00051 OptionalFlag& operator=(const bool &rhs) { 00052 flag = rhs; 00053 return *this; // Return a reference to myself. 00054 } 00055 private: 00056 const char *representation; 00057 const char *position; 00058 bool flag; 00059 }; 00060 00061 /// Represents the length modifier in a format string in scanf/printf. 00062 class LengthModifier { 00063 public: 00064 enum Kind { 00065 None, 00066 AsChar, // 'hh' 00067 AsShort, // 'h' 00068 AsLong, // 'l' 00069 AsLongLong, // 'll' 00070 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 00071 AsIntMax, // 'j' 00072 AsSizeT, // 'z' 00073 AsPtrDiff, // 't' 00074 AsLongDouble, // 'L' 00075 AsAllocate, // for '%as', GNU extension to C90 scanf 00076 AsMAllocate, // for '%ms', GNU extension to scanf 00077 AsWideChar = AsLong // for '%ls', only makes sense for printf 00078 }; 00079 00080 LengthModifier() 00081 : Position(0), kind(None) {} 00082 LengthModifier(const char *pos, Kind k) 00083 : Position(pos), kind(k) {} 00084 00085 const char *getStart() const { 00086 return Position; 00087 } 00088 00089 unsigned getLength() const { 00090 switch (kind) { 00091 default: 00092 return 1; 00093 case AsLongLong: 00094 case AsChar: 00095 return 2; 00096 case None: 00097 return 0; 00098 } 00099 } 00100 00101 Kind getKind() const { return kind; } 00102 void setKind(Kind k) { kind = k; } 00103 00104 const char *toString() const; 00105 00106 private: 00107 const char *Position; 00108 Kind kind; 00109 }; 00110 00111 class ConversionSpecifier { 00112 public: 00113 enum Kind { 00114 InvalidSpecifier = 0, 00115 // C99 conversion specifiers. 00116 cArg, 00117 dArg, 00118 iArg, 00119 IntArgBeg = cArg, IntArgEnd = iArg, 00120 00121 oArg, 00122 uArg, 00123 xArg, 00124 XArg, 00125 UIntArgBeg = oArg, UIntArgEnd = XArg, 00126 00127 fArg, 00128 FArg, 00129 eArg, 00130 EArg, 00131 gArg, 00132 GArg, 00133 aArg, 00134 AArg, 00135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 00136 00137 sArg, 00138 pArg, 00139 nArg, 00140 PercentArg, 00141 CArg, 00142 SArg, 00143 00144 // ** Printf-specific ** 00145 00146 // Objective-C specific specifiers. 00147 ObjCObjArg, // '@' 00148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 00149 00150 // GlibC specific specifiers. 00151 PrintErrno, // 'm' 00152 00153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 00154 00155 // ** Scanf-specific ** 00156 ScanListArg, // '[' 00157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 00158 }; 00159 00160 ConversionSpecifier(bool isPrintf) 00161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 00162 00163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 00164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 00165 00166 const char *getStart() const { 00167 return Position; 00168 } 00169 00170 StringRef getCharacters() const { 00171 return StringRef(getStart(), getLength()); 00172 } 00173 00174 bool consumesDataArgument() const { 00175 switch (kind) { 00176 case PrintErrno: 00177 assert(IsPrintf); 00178 return false; 00179 case PercentArg: 00180 return false; 00181 default: 00182 return true; 00183 } 00184 } 00185 00186 Kind getKind() const { return kind; } 00187 void setKind(Kind k) { kind = k; } 00188 unsigned getLength() const { 00189 return EndScanList ? EndScanList - Position : 1; 00190 } 00191 00192 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 00193 const char *toString() const; 00194 00195 bool isPrintfKind() const { return IsPrintf; } 00196 00197 protected: 00198 bool IsPrintf; 00199 const char *Position; 00200 const char *EndScanList; 00201 Kind kind; 00202 }; 00203 00204 class ArgTypeResult { 00205 public: 00206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 00207 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 00208 private: 00209 const Kind K; 00210 QualType T; 00211 const char *Name; 00212 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 00213 public: 00214 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 00215 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 00216 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 00217 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 00218 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 00219 00220 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 00221 00222 bool isValid() const { return K != InvalidTy; } 00223 00224 const QualType *getSpecificType() const { 00225 return K == SpecificTy ? &T : 0; 00226 } 00227 00228 bool matchesType(ASTContext &C, QualType argTy) const; 00229 00230 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 00231 00232 QualType getRepresentativeType(ASTContext &C) const; 00233 00234 std::string getRepresentativeTypeName(ASTContext &C) const; 00235 }; 00236 00237 class OptionalAmount { 00238 public: 00239 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 00240 00241 OptionalAmount(HowSpecified howSpecified, 00242 unsigned amount, 00243 const char *amountStart, 00244 unsigned amountLength, 00245 bool usesPositionalArg) 00246 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 00247 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 00248 00249 OptionalAmount(bool valid = true) 00250 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 00251 UsesPositionalArg(0), UsesDotPrefix(0) {} 00252 00253 bool isInvalid() const { 00254 return hs == Invalid; 00255 } 00256 00257 HowSpecified getHowSpecified() const { return hs; } 00258 void setHowSpecified(HowSpecified h) { hs = h; } 00259 00260 bool hasDataArgument() const { return hs == Arg; } 00261 00262 unsigned getArgIndex() const { 00263 assert(hasDataArgument()); 00264 return amt; 00265 } 00266 00267 unsigned getConstantAmount() const { 00268 assert(hs == Constant); 00269 return amt; 00270 } 00271 00272 const char *getStart() const { 00273 // We include the . character if it is given. 00274 return start - UsesDotPrefix; 00275 } 00276 00277 unsigned getConstantLength() const { 00278 assert(hs == Constant); 00279 return length + UsesDotPrefix; 00280 } 00281 00282 ArgTypeResult getArgType(ASTContext &Ctx) const; 00283 00284 void toString(raw_ostream &os) const; 00285 00286 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 00287 unsigned getPositionalArgIndex() const { 00288 assert(hasDataArgument()); 00289 return amt + 1; 00290 } 00291 00292 bool usesDotPrefix() const { return UsesDotPrefix; } 00293 void setUsesDotPrefix() { UsesDotPrefix = true; } 00294 00295 private: 00296 const char *start; 00297 unsigned length; 00298 HowSpecified hs; 00299 unsigned amt; 00300 bool UsesPositionalArg : 1; 00301 bool UsesDotPrefix; 00302 }; 00303 00304 00305 class FormatSpecifier { 00306 protected: 00307 LengthModifier LM; 00308 OptionalAmount FieldWidth; 00309 ConversionSpecifier CS; 00310 /// Positional arguments, an IEEE extension: 00311 /// IEEE Std 1003.1, 2004 Edition 00312 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 00313 bool UsesPositionalArg; 00314 unsigned argIndex; 00315 public: 00316 FormatSpecifier(bool isPrintf) 00317 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 00318 00319 void setLengthModifier(LengthModifier lm) { 00320 LM = lm; 00321 } 00322 00323 void setUsesPositionalArg() { UsesPositionalArg = true; } 00324 00325 void setArgIndex(unsigned i) { 00326 argIndex = i; 00327 } 00328 00329 unsigned getArgIndex() const { 00330 return argIndex; 00331 } 00332 00333 unsigned getPositionalArgIndex() const { 00334 return argIndex + 1; 00335 } 00336 00337 const LengthModifier &getLengthModifier() const { 00338 return LM; 00339 } 00340 00341 const OptionalAmount &getFieldWidth() const { 00342 return FieldWidth; 00343 } 00344 00345 void setFieldWidth(const OptionalAmount &Amt) { 00346 FieldWidth = Amt; 00347 } 00348 00349 bool usesPositionalArg() const { return UsesPositionalArg; } 00350 00351 bool hasValidLengthModifier() const; 00352 00353 bool hasStandardLengthModifier() const; 00354 00355 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 00356 00357 bool hasStandardLengthConversionCombination() const; 00358 }; 00359 00360 } // end analyze_format_string namespace 00361 00362 //===----------------------------------------------------------------------===// 00363 /// Pieces specific to fprintf format strings. 00364 00365 namespace analyze_printf { 00366 00367 class PrintfConversionSpecifier : 00368 public analyze_format_string::ConversionSpecifier { 00369 public: 00370 PrintfConversionSpecifier() 00371 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 00372 00373 PrintfConversionSpecifier(const char *pos, Kind k) 00374 : ConversionSpecifier(true, pos, k) {} 00375 00376 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 00377 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 00378 bool isDoubleArg() const { return kind >= DoubleArgBeg && 00379 kind <= DoubleArgEnd; } 00380 unsigned getLength() const { 00381 // Conversion specifiers currently only are represented by 00382 // single characters, but we be flexible. 00383 return 1; 00384 } 00385 00386 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 00387 return CS->isPrintfKind(); 00388 } 00389 }; 00390 00391 using analyze_format_string::ArgTypeResult; 00392 using analyze_format_string::LengthModifier; 00393 using analyze_format_string::OptionalAmount; 00394 using analyze_format_string::OptionalFlag; 00395 00396 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 00397 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 00398 OptionalFlag IsLeftJustified; // '-' 00399 OptionalFlag HasPlusPrefix; // '+' 00400 OptionalFlag HasSpacePrefix; // ' ' 00401 OptionalFlag HasAlternativeForm; // '#' 00402 OptionalFlag HasLeadingZeroes; // '0' 00403 OptionalAmount Precision; 00404 public: 00405 PrintfSpecifier() : 00406 FormatSpecifier(/* isPrintf = */ true), 00407 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 00408 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 00409 00410 static PrintfSpecifier Parse(const char *beg, const char *end); 00411 00412 // Methods for incrementally constructing the PrintfSpecifier. 00413 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 00414 CS = cs; 00415 } 00416 void setHasThousandsGrouping(const char *position) { 00417 HasThousandsGrouping = true; 00418 HasThousandsGrouping.setPosition(position); 00419 } 00420 void setIsLeftJustified(const char *position) { 00421 IsLeftJustified = true; 00422 IsLeftJustified.setPosition(position); 00423 } 00424 void setHasPlusPrefix(const char *position) { 00425 HasPlusPrefix = true; 00426 HasPlusPrefix.setPosition(position); 00427 } 00428 void setHasSpacePrefix(const char *position) { 00429 HasSpacePrefix = true; 00430 HasSpacePrefix.setPosition(position); 00431 } 00432 void setHasAlternativeForm(const char *position) { 00433 HasAlternativeForm = true; 00434 HasAlternativeForm.setPosition(position); 00435 } 00436 void setHasLeadingZeros(const char *position) { 00437 HasLeadingZeroes = true; 00438 HasLeadingZeroes.setPosition(position); 00439 } 00440 void setUsesPositionalArg() { UsesPositionalArg = true; } 00441 00442 // Methods for querying the format specifier. 00443 00444 const PrintfConversionSpecifier &getConversionSpecifier() const { 00445 return cast<PrintfConversionSpecifier>(CS); 00446 } 00447 00448 void setPrecision(const OptionalAmount &Amt) { 00449 Precision = Amt; 00450 Precision.setUsesDotPrefix(); 00451 } 00452 00453 const OptionalAmount &getPrecision() const { 00454 return Precision; 00455 } 00456 00457 bool consumesDataArgument() const { 00458 return getConversionSpecifier().consumesDataArgument(); 00459 } 00460 00461 /// \brief Returns the builtin type that a data argument 00462 /// paired with this format specifier should have. This method 00463 /// will return null if the format specifier does not have 00464 /// a matching data argument or the matching argument matches 00465 /// more than one type. 00466 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 00467 00468 const OptionalFlag &hasThousandsGrouping() const { 00469 return HasThousandsGrouping; 00470 } 00471 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 00472 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 00473 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 00474 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 00475 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 00476 bool usesPositionalArg() const { return UsesPositionalArg; } 00477 00478 /// Changes the specifier and length according to a QualType, retaining any 00479 /// flags or options. Returns true on success, or false when a conversion 00480 /// was not successful. 00481 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 00482 bool IsObjCLiteral); 00483 00484 void toString(raw_ostream &os) const; 00485 00486 // Validation methods - to check if any element results in undefined behavior 00487 bool hasValidPlusPrefix() const; 00488 bool hasValidAlternativeForm() const; 00489 bool hasValidLeadingZeros() const; 00490 bool hasValidSpacePrefix() const; 00491 bool hasValidLeftJustified() const; 00492 bool hasValidThousandsGroupingPrefix() const; 00493 00494 bool hasValidPrecision() const; 00495 bool hasValidFieldWidth() const; 00496 }; 00497 } // end analyze_printf namespace 00498 00499 //===----------------------------------------------------------------------===// 00500 /// Pieces specific to fscanf format strings. 00501 00502 namespace analyze_scanf { 00503 00504 class ScanfConversionSpecifier : 00505 public analyze_format_string::ConversionSpecifier { 00506 public: 00507 ScanfConversionSpecifier() 00508 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 00509 00510 ScanfConversionSpecifier(const char *pos, Kind k) 00511 : ConversionSpecifier(false, pos, k) {} 00512 00513 void setEndScanList(const char *pos) { EndScanList = pos; } 00514 00515 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 00516 return !CS->isPrintfKind(); 00517 } 00518 }; 00519 00520 using analyze_format_string::ArgTypeResult; 00521 using analyze_format_string::LengthModifier; 00522 using analyze_format_string::OptionalAmount; 00523 using analyze_format_string::OptionalFlag; 00524 00525 class ScanfArgTypeResult : public ArgTypeResult { 00526 public: 00527 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 00528 private: 00529 Kind K; 00530 ArgTypeResult A; 00531 const char *Name; 00532 QualType getRepresentativeType(ASTContext &C) const; 00533 public: 00534 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 00535 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 00536 : K(PtrToArgTypeResultTy), A(a), Name(n) { 00537 assert(A.isValid()); 00538 } 00539 00540 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 00541 00542 bool isValid() const { return K != InvalidTy; } 00543 00544 bool matchesType(ASTContext& C, QualType argTy) const; 00545 00546 std::string getRepresentativeTypeName(ASTContext& C) const; 00547 }; 00548 00549 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 00550 OptionalFlag SuppressAssignment; // '*' 00551 public: 00552 ScanfSpecifier() : 00553 FormatSpecifier(/* isPrintf = */ false), 00554 SuppressAssignment("*") {} 00555 00556 void setSuppressAssignment(const char *position) { 00557 SuppressAssignment = true; 00558 SuppressAssignment.setPosition(position); 00559 } 00560 00561 const OptionalFlag &getSuppressAssignment() const { 00562 return SuppressAssignment; 00563 } 00564 00565 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 00566 CS = cs; 00567 } 00568 00569 const ScanfConversionSpecifier &getConversionSpecifier() const { 00570 return cast<ScanfConversionSpecifier>(CS); 00571 } 00572 00573 bool consumesDataArgument() const { 00574 return CS.consumesDataArgument() && !SuppressAssignment; 00575 } 00576 00577 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 00578 00579 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 00580 00581 void toString(raw_ostream &os) const; 00582 00583 static ScanfSpecifier Parse(const char *beg, const char *end); 00584 }; 00585 00586 } // end analyze_scanf namespace 00587 00588 //===----------------------------------------------------------------------===// 00589 // Parsing and processing of format strings (both fprintf and fscanf). 00590 00591 namespace analyze_format_string { 00592 00593 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 00594 00595 class FormatStringHandler { 00596 public: 00597 FormatStringHandler() {} 00598 virtual ~FormatStringHandler(); 00599 00600 virtual void HandleNullChar(const char *nullCharacter) {} 00601 00602 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 00603 00604 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 00605 PositionContext p) {} 00606 00607 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 00608 00609 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 00610 unsigned specifierLen) {} 00611 00612 // Printf-specific handlers. 00613 00614 virtual bool HandleInvalidPrintfConversionSpecifier( 00615 const analyze_printf::PrintfSpecifier &FS, 00616 const char *startSpecifier, 00617 unsigned specifierLen) { 00618 return true; 00619 } 00620 00621 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 00622 const char *startSpecifier, 00623 unsigned specifierLen) { 00624 return true; 00625 } 00626 00627 // Scanf-specific handlers. 00628 00629 virtual bool HandleInvalidScanfConversionSpecifier( 00630 const analyze_scanf::ScanfSpecifier &FS, 00631 const char *startSpecifier, 00632 unsigned specifierLen) { 00633 return true; 00634 } 00635 00636 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 00637 const char *startSpecifier, 00638 unsigned specifierLen) { 00639 return true; 00640 } 00641 00642 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 00643 }; 00644 00645 bool ParsePrintfString(FormatStringHandler &H, 00646 const char *beg, const char *end, const LangOptions &LO); 00647 00648 bool ParseScanfString(FormatStringHandler &H, 00649 const char *beg, const char *end, const LangOptions &LO); 00650 00651 } // end analyze_format_string namespace 00652 } // end clang namespace 00653 #endif