clang API Documentation
00001 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Shared details for processing format strings of printf and scanf 00011 // (and friends). 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "FormatStringParsing.h" 00016 #include "clang/Basic/LangOptions.h" 00017 00018 using clang::analyze_format_string::ArgTypeResult; 00019 using clang::analyze_format_string::FormatStringHandler; 00020 using clang::analyze_format_string::FormatSpecifier; 00021 using clang::analyze_format_string::LengthModifier; 00022 using clang::analyze_format_string::OptionalAmount; 00023 using clang::analyze_format_string::PositionContext; 00024 using clang::analyze_format_string::ConversionSpecifier; 00025 using namespace clang; 00026 00027 // Key function to FormatStringHandler. 00028 FormatStringHandler::~FormatStringHandler() {} 00029 00030 //===----------------------------------------------------------------------===// 00031 // Functions for parsing format strings components in both printf and 00032 // scanf format strings. 00033 //===----------------------------------------------------------------------===// 00034 00035 OptionalAmount 00036 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 00037 const char *I = Beg; 00038 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 00039 00040 unsigned accumulator = 0; 00041 bool hasDigits = false; 00042 00043 for ( ; I != E; ++I) { 00044 char c = *I; 00045 if (c >= '0' && c <= '9') { 00046 hasDigits = true; 00047 accumulator = (accumulator * 10) + (c - '0'); 00048 continue; 00049 } 00050 00051 if (hasDigits) 00052 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 00053 false); 00054 00055 break; 00056 } 00057 00058 return OptionalAmount(); 00059 } 00060 00061 OptionalAmount 00062 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 00063 const char *E, 00064 unsigned &argIndex) { 00065 if (*Beg == '*') { 00066 ++Beg; 00067 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 00068 } 00069 00070 return ParseAmount(Beg, E); 00071 } 00072 00073 OptionalAmount 00074 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 00075 const char *Start, 00076 const char *&Beg, 00077 const char *E, 00078 PositionContext p) { 00079 if (*Beg == '*') { 00080 const char *I = Beg + 1; 00081 const OptionalAmount &Amt = ParseAmount(I, E); 00082 00083 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 00084 H.HandleInvalidPosition(Beg, I - Beg, p); 00085 return OptionalAmount(false); 00086 } 00087 00088 if (I == E) { 00089 // No more characters left? 00090 H.HandleIncompleteSpecifier(Start, E - Start); 00091 return OptionalAmount(false); 00092 } 00093 00094 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 00095 00096 if (*I == '$') { 00097 // Handle positional arguments 00098 00099 // Special case: '*0$', since this is an easy mistake. 00100 if (Amt.getConstantAmount() == 0) { 00101 H.HandleZeroPosition(Beg, I - Beg + 1); 00102 return OptionalAmount(false); 00103 } 00104 00105 const char *Tmp = Beg; 00106 Beg = ++I; 00107 00108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 00109 Tmp, 0, true); 00110 } 00111 00112 H.HandleInvalidPosition(Beg, I - Beg, p); 00113 return OptionalAmount(false); 00114 } 00115 00116 return ParseAmount(Beg, E); 00117 } 00118 00119 00120 bool 00121 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 00122 FormatSpecifier &CS, 00123 const char *Start, 00124 const char *&Beg, const char *E, 00125 unsigned *argIndex) { 00126 // FIXME: Support negative field widths. 00127 if (argIndex) { 00128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 00129 } 00130 else { 00131 const OptionalAmount Amt = 00132 ParsePositionAmount(H, Start, Beg, E, 00133 analyze_format_string::FieldWidthPos); 00134 00135 if (Amt.isInvalid()) 00136 return true; 00137 CS.setFieldWidth(Amt); 00138 } 00139 return false; 00140 } 00141 00142 bool 00143 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 00144 FormatSpecifier &FS, 00145 const char *Start, 00146 const char *&Beg, 00147 const char *E) { 00148 const char *I = Beg; 00149 00150 const OptionalAmount &Amt = ParseAmount(I, E); 00151 00152 if (I == E) { 00153 // No more characters left? 00154 H.HandleIncompleteSpecifier(Start, E - Start); 00155 return true; 00156 } 00157 00158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 00159 // Warn that positional arguments are non-standard. 00160 H.HandlePosition(Start, I - Start); 00161 00162 // Special case: '%0$', since this is an easy mistake. 00163 if (Amt.getConstantAmount() == 0) { 00164 H.HandleZeroPosition(Start, I - Start); 00165 return true; 00166 } 00167 00168 FS.setArgIndex(Amt.getConstantAmount() - 1); 00169 FS.setUsesPositionalArg(); 00170 // Update the caller's pointer if we decided to consume 00171 // these characters. 00172 Beg = I; 00173 return false; 00174 } 00175 00176 return false; 00177 } 00178 00179 bool 00180 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 00181 const char *&I, 00182 const char *E, 00183 const LangOptions &LO, 00184 bool IsScanf) { 00185 LengthModifier::Kind lmKind = LengthModifier::None; 00186 const char *lmPosition = I; 00187 switch (*I) { 00188 default: 00189 return false; 00190 case 'h': 00191 ++I; 00192 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) 00193 : LengthModifier::AsShort; 00194 break; 00195 case 'l': 00196 ++I; 00197 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) 00198 : LengthModifier::AsLong; 00199 break; 00200 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 00201 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 00202 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 00203 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 00204 case 'q': lmKind = LengthModifier::AsQuad; ++I; break; 00205 case 'a': 00206 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { 00207 // For scanf in C90, look at the next character to see if this should 00208 // be parsed as the GNU extension 'a' length modifier. If not, this 00209 // will be parsed as a conversion specifier. 00210 ++I; 00211 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { 00212 lmKind = LengthModifier::AsAllocate; 00213 break; 00214 } 00215 --I; 00216 } 00217 return false; 00218 case 'm': 00219 if (IsScanf) { 00220 lmKind = LengthModifier::AsMAllocate; 00221 ++I; 00222 break; 00223 } 00224 return false; 00225 } 00226 LengthModifier lm(lmPosition, lmKind); 00227 FS.setLengthModifier(lm); 00228 return true; 00229 } 00230 00231 //===----------------------------------------------------------------------===// 00232 // Methods on ArgTypeResult. 00233 //===----------------------------------------------------------------------===// 00234 00235 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 00236 switch (K) { 00237 case InvalidTy: 00238 llvm_unreachable("ArgTypeResult must be valid"); 00239 00240 case UnknownTy: 00241 return true; 00242 00243 case AnyCharTy: { 00244 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 00245 switch (BT->getKind()) { 00246 default: 00247 break; 00248 case BuiltinType::Char_S: 00249 case BuiltinType::SChar: 00250 case BuiltinType::UChar: 00251 case BuiltinType::Char_U: 00252 return true; 00253 } 00254 return false; 00255 } 00256 00257 case SpecificTy: { 00258 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 00259 if (T == argTy) 00260 return true; 00261 // Check for "compatible types". 00262 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 00263 switch (BT->getKind()) { 00264 default: 00265 break; 00266 case BuiltinType::Char_S: 00267 case BuiltinType::SChar: 00268 case BuiltinType::Char_U: 00269 case BuiltinType::UChar: 00270 return T == C.UnsignedCharTy || T == C.SignedCharTy; 00271 case BuiltinType::Short: 00272 return T == C.UnsignedShortTy; 00273 case BuiltinType::UShort: 00274 return T == C.ShortTy; 00275 case BuiltinType::Int: 00276 return T == C.UnsignedIntTy; 00277 case BuiltinType::UInt: 00278 return T == C.IntTy; 00279 case BuiltinType::Long: 00280 return T == C.UnsignedLongTy; 00281 case BuiltinType::ULong: 00282 return T == C.LongTy; 00283 case BuiltinType::LongLong: 00284 return T == C.UnsignedLongLongTy; 00285 case BuiltinType::ULongLong: 00286 return T == C.LongLongTy; 00287 } 00288 return false; 00289 } 00290 00291 case CStrTy: { 00292 const PointerType *PT = argTy->getAs<PointerType>(); 00293 if (!PT) 00294 return false; 00295 QualType pointeeTy = PT->getPointeeType(); 00296 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 00297 switch (BT->getKind()) { 00298 case BuiltinType::Void: 00299 case BuiltinType::Char_U: 00300 case BuiltinType::UChar: 00301 case BuiltinType::Char_S: 00302 case BuiltinType::SChar: 00303 return true; 00304 default: 00305 break; 00306 } 00307 00308 return false; 00309 } 00310 00311 case WCStrTy: { 00312 const PointerType *PT = argTy->getAs<PointerType>(); 00313 if (!PT) 00314 return false; 00315 QualType pointeeTy = 00316 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 00317 return pointeeTy == C.getWCharType(); 00318 } 00319 00320 case WIntTy: { 00321 00322 QualType PromoArg = 00323 argTy->isPromotableIntegerType() 00324 ? C.getPromotedIntegerType(argTy) : argTy; 00325 00326 QualType WInt = C.getCanonicalType(C.getWIntType()).getUnqualifiedType(); 00327 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 00328 00329 // If the promoted argument is the corresponding signed type of the 00330 // wint_t type, then it should match. 00331 if (PromoArg->hasSignedIntegerRepresentation() && 00332 C.getCorrespondingUnsignedType(PromoArg) == WInt) 00333 return true; 00334 00335 return WInt == PromoArg; 00336 } 00337 00338 case CPointerTy: 00339 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 00340 argTy->isBlockPointerType() || argTy->isNullPtrType(); 00341 00342 case ObjCPointerTy: { 00343 if (argTy->getAs<ObjCObjectPointerType>() || 00344 argTy->getAs<BlockPointerType>()) 00345 return true; 00346 00347 // Handle implicit toll-free bridging. 00348 if (const PointerType *PT = argTy->getAs<PointerType>()) { 00349 // Things such as CFTypeRef are really just opaque pointers 00350 // to C structs representing CF types that can often be bridged 00351 // to Objective-C objects. Since the compiler doesn't know which 00352 // structs can be toll-free bridged, we just accept them all. 00353 QualType pointee = PT->getPointeeType(); 00354 if (pointee->getAsStructureType() || pointee->isVoidType()) 00355 return true; 00356 } 00357 return false; 00358 } 00359 } 00360 00361 llvm_unreachable("Invalid ArgTypeResult Kind!"); 00362 } 00363 00364 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 00365 switch (K) { 00366 case InvalidTy: 00367 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 00368 case UnknownTy: 00369 return QualType(); 00370 case AnyCharTy: 00371 return C.CharTy; 00372 case SpecificTy: 00373 return T; 00374 case CStrTy: 00375 return C.getPointerType(C.CharTy); 00376 case WCStrTy: 00377 return C.getPointerType(C.getWCharType()); 00378 case ObjCPointerTy: 00379 return C.ObjCBuiltinIdTy; 00380 case CPointerTy: 00381 return C.VoidPtrTy; 00382 case WIntTy: { 00383 return C.getWIntType(); 00384 } 00385 } 00386 00387 llvm_unreachable("Invalid ArgTypeResult Kind!"); 00388 } 00389 00390 std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { 00391 std::string S = getRepresentativeType(C).getAsString(); 00392 if (Name && S != Name) 00393 return std::string("'") + Name + "' (aka '" + S + "')"; 00394 return std::string("'") + S + "'"; 00395 } 00396 00397 00398 //===----------------------------------------------------------------------===// 00399 // Methods on OptionalAmount. 00400 //===----------------------------------------------------------------------===// 00401 00402 ArgTypeResult 00403 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 00404 return Ctx.IntTy; 00405 } 00406 00407 //===----------------------------------------------------------------------===// 00408 // Methods on LengthModifier. 00409 //===----------------------------------------------------------------------===// 00410 00411 const char * 00412 analyze_format_string::LengthModifier::toString() const { 00413 switch (kind) { 00414 case AsChar: 00415 return "hh"; 00416 case AsShort: 00417 return "h"; 00418 case AsLong: // or AsWideChar 00419 return "l"; 00420 case AsLongLong: 00421 return "ll"; 00422 case AsQuad: 00423 return "q"; 00424 case AsIntMax: 00425 return "j"; 00426 case AsSizeT: 00427 return "z"; 00428 case AsPtrDiff: 00429 return "t"; 00430 case AsLongDouble: 00431 return "L"; 00432 case AsAllocate: 00433 return "a"; 00434 case AsMAllocate: 00435 return "m"; 00436 case None: 00437 return ""; 00438 } 00439 return NULL; 00440 } 00441 00442 //===----------------------------------------------------------------------===// 00443 // Methods on ConversionSpecifier. 00444 //===----------------------------------------------------------------------===// 00445 00446 const char *ConversionSpecifier::toString() const { 00447 switch (kind) { 00448 case dArg: return "d"; 00449 case iArg: return "i"; 00450 case oArg: return "o"; 00451 case uArg: return "u"; 00452 case xArg: return "x"; 00453 case XArg: return "X"; 00454 case fArg: return "f"; 00455 case FArg: return "F"; 00456 case eArg: return "e"; 00457 case EArg: return "E"; 00458 case gArg: return "g"; 00459 case GArg: return "G"; 00460 case aArg: return "a"; 00461 case AArg: return "A"; 00462 case cArg: return "c"; 00463 case sArg: return "s"; 00464 case pArg: return "p"; 00465 case nArg: return "n"; 00466 case PercentArg: return "%"; 00467 case ScanListArg: return "["; 00468 case InvalidSpecifier: return NULL; 00469 00470 // MacOS X unicode extensions. 00471 case CArg: return "C"; 00472 case SArg: return "S"; 00473 00474 // Objective-C specific specifiers. 00475 case ObjCObjArg: return "@"; 00476 00477 // GlibC specific specifiers. 00478 case PrintErrno: return "m"; 00479 } 00480 return NULL; 00481 } 00482 00483 //===----------------------------------------------------------------------===// 00484 // Methods on OptionalAmount. 00485 //===----------------------------------------------------------------------===// 00486 00487 void OptionalAmount::toString(raw_ostream &os) const { 00488 switch (hs) { 00489 case Invalid: 00490 case NotSpecified: 00491 return; 00492 case Arg: 00493 if (UsesDotPrefix) 00494 os << "."; 00495 if (usesPositionalArg()) 00496 os << "*" << getPositionalArgIndex() << "$"; 00497 else 00498 os << "*"; 00499 break; 00500 case Constant: 00501 if (UsesDotPrefix) 00502 os << "."; 00503 os << amt; 00504 break; 00505 } 00506 } 00507 00508 bool FormatSpecifier::hasValidLengthModifier() const { 00509 switch (LM.getKind()) { 00510 case LengthModifier::None: 00511 return true; 00512 00513 // Handle most integer flags 00514 case LengthModifier::AsChar: 00515 case LengthModifier::AsShort: 00516 case LengthModifier::AsLongLong: 00517 case LengthModifier::AsQuad: 00518 case LengthModifier::AsIntMax: 00519 case LengthModifier::AsSizeT: 00520 case LengthModifier::AsPtrDiff: 00521 switch (CS.getKind()) { 00522 case ConversionSpecifier::dArg: 00523 case ConversionSpecifier::iArg: 00524 case ConversionSpecifier::oArg: 00525 case ConversionSpecifier::uArg: 00526 case ConversionSpecifier::xArg: 00527 case ConversionSpecifier::XArg: 00528 case ConversionSpecifier::nArg: 00529 return true; 00530 default: 00531 return false; 00532 } 00533 00534 // Handle 'l' flag 00535 case LengthModifier::AsLong: 00536 switch (CS.getKind()) { 00537 case ConversionSpecifier::dArg: 00538 case ConversionSpecifier::iArg: 00539 case ConversionSpecifier::oArg: 00540 case ConversionSpecifier::uArg: 00541 case ConversionSpecifier::xArg: 00542 case ConversionSpecifier::XArg: 00543 case ConversionSpecifier::aArg: 00544 case ConversionSpecifier::AArg: 00545 case ConversionSpecifier::fArg: 00546 case ConversionSpecifier::FArg: 00547 case ConversionSpecifier::eArg: 00548 case ConversionSpecifier::EArg: 00549 case ConversionSpecifier::gArg: 00550 case ConversionSpecifier::GArg: 00551 case ConversionSpecifier::nArg: 00552 case ConversionSpecifier::cArg: 00553 case ConversionSpecifier::sArg: 00554 case ConversionSpecifier::ScanListArg: 00555 return true; 00556 default: 00557 return false; 00558 } 00559 00560 case LengthModifier::AsLongDouble: 00561 switch (CS.getKind()) { 00562 case ConversionSpecifier::aArg: 00563 case ConversionSpecifier::AArg: 00564 case ConversionSpecifier::fArg: 00565 case ConversionSpecifier::FArg: 00566 case ConversionSpecifier::eArg: 00567 case ConversionSpecifier::EArg: 00568 case ConversionSpecifier::gArg: 00569 case ConversionSpecifier::GArg: 00570 return true; 00571 // GNU extension. 00572 case ConversionSpecifier::dArg: 00573 case ConversionSpecifier::iArg: 00574 case ConversionSpecifier::oArg: 00575 case ConversionSpecifier::uArg: 00576 case ConversionSpecifier::xArg: 00577 case ConversionSpecifier::XArg: 00578 return true; 00579 default: 00580 return false; 00581 } 00582 00583 case LengthModifier::AsAllocate: 00584 switch (CS.getKind()) { 00585 case ConversionSpecifier::sArg: 00586 case ConversionSpecifier::SArg: 00587 case ConversionSpecifier::ScanListArg: 00588 return true; 00589 default: 00590 return false; 00591 } 00592 00593 case LengthModifier::AsMAllocate: 00594 switch (CS.getKind()) { 00595 case ConversionSpecifier::cArg: 00596 case ConversionSpecifier::CArg: 00597 case ConversionSpecifier::sArg: 00598 case ConversionSpecifier::SArg: 00599 case ConversionSpecifier::ScanListArg: 00600 return true; 00601 default: 00602 return false; 00603 } 00604 } 00605 llvm_unreachable("Invalid LengthModifier Kind!"); 00606 } 00607 00608 bool FormatSpecifier::hasStandardLengthModifier() const { 00609 switch (LM.getKind()) { 00610 case LengthModifier::None: 00611 case LengthModifier::AsChar: 00612 case LengthModifier::AsShort: 00613 case LengthModifier::AsLong: 00614 case LengthModifier::AsLongLong: 00615 case LengthModifier::AsIntMax: 00616 case LengthModifier::AsSizeT: 00617 case LengthModifier::AsPtrDiff: 00618 case LengthModifier::AsLongDouble: 00619 return true; 00620 case LengthModifier::AsAllocate: 00621 case LengthModifier::AsMAllocate: 00622 case LengthModifier::AsQuad: 00623 return false; 00624 } 00625 llvm_unreachable("Invalid LengthModifier Kind!"); 00626 } 00627 00628 bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const { 00629 switch (CS.getKind()) { 00630 case ConversionSpecifier::cArg: 00631 case ConversionSpecifier::dArg: 00632 case ConversionSpecifier::iArg: 00633 case ConversionSpecifier::oArg: 00634 case ConversionSpecifier::uArg: 00635 case ConversionSpecifier::xArg: 00636 case ConversionSpecifier::XArg: 00637 case ConversionSpecifier::fArg: 00638 case ConversionSpecifier::FArg: 00639 case ConversionSpecifier::eArg: 00640 case ConversionSpecifier::EArg: 00641 case ConversionSpecifier::gArg: 00642 case ConversionSpecifier::GArg: 00643 case ConversionSpecifier::aArg: 00644 case ConversionSpecifier::AArg: 00645 case ConversionSpecifier::sArg: 00646 case ConversionSpecifier::pArg: 00647 case ConversionSpecifier::nArg: 00648 case ConversionSpecifier::ObjCObjArg: 00649 case ConversionSpecifier::ScanListArg: 00650 case ConversionSpecifier::PercentArg: 00651 return true; 00652 case ConversionSpecifier::CArg: 00653 case ConversionSpecifier::SArg: 00654 return LangOpt.ObjC1 || LangOpt.ObjC2; 00655 case ConversionSpecifier::InvalidSpecifier: 00656 case ConversionSpecifier::PrintErrno: 00657 return false; 00658 } 00659 llvm_unreachable("Invalid ConversionSpecifier Kind!"); 00660 } 00661 00662 bool FormatSpecifier::hasStandardLengthConversionCombination() const { 00663 if (LM.getKind() == LengthModifier::AsLongDouble) { 00664 switch(CS.getKind()) { 00665 case ConversionSpecifier::dArg: 00666 case ConversionSpecifier::iArg: 00667 case ConversionSpecifier::oArg: 00668 case ConversionSpecifier::uArg: 00669 case ConversionSpecifier::xArg: 00670 case ConversionSpecifier::XArg: 00671 return false; 00672 default: 00673 return true; 00674 } 00675 } 00676 return true; 00677 }