clang API Documentation
00001 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Handling of format string in scanf and friends. The structure of format 00011 // strings for fscanf() are described in C99 7.19.6.2. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "clang/Analysis/Analyses/FormatString.h" 00016 #include "FormatStringParsing.h" 00017 00018 using clang::analyze_format_string::ArgTypeResult; 00019 using clang::analyze_format_string::FormatStringHandler; 00020 using clang::analyze_format_string::LengthModifier; 00021 using clang::analyze_format_string::OptionalAmount; 00022 using clang::analyze_format_string::ConversionSpecifier; 00023 using clang::analyze_scanf::ScanfArgTypeResult; 00024 using clang::analyze_scanf::ScanfConversionSpecifier; 00025 using clang::analyze_scanf::ScanfSpecifier; 00026 using clang::UpdateOnReturn; 00027 using namespace clang; 00028 00029 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 00030 ScanfSpecifierResult; 00031 00032 static bool ParseScanList(FormatStringHandler &H, 00033 ScanfConversionSpecifier &CS, 00034 const char *&Beg, const char *E) { 00035 const char *I = Beg; 00036 const char *start = I - 1; 00037 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 00038 00039 // No more characters? 00040 if (I == E) { 00041 H.HandleIncompleteScanList(start, I); 00042 return true; 00043 } 00044 00045 // Special case: ']' is the first character. 00046 if (*I == ']') { 00047 if (++I == E) { 00048 H.HandleIncompleteScanList(start, I - 1); 00049 return true; 00050 } 00051 } 00052 00053 // Look for a ']' character which denotes the end of the scan list. 00054 while (*I != ']') { 00055 if (++I == E) { 00056 H.HandleIncompleteScanList(start, I - 1); 00057 return true; 00058 } 00059 } 00060 00061 CS.setEndScanList(I); 00062 return false; 00063 } 00064 00065 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 00066 // We can possibly refactor. 00067 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 00068 const char *&Beg, 00069 const char *E, 00070 unsigned &argIndex, 00071 const LangOptions &LO) { 00072 00073 using namespace clang::analyze_scanf; 00074 const char *I = Beg; 00075 const char *Start = 0; 00076 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 00077 00078 // Look for a '%' character that indicates the start of a format specifier. 00079 for ( ; I != E ; ++I) { 00080 char c = *I; 00081 if (c == '\0') { 00082 // Detect spurious null characters, which are likely errors. 00083 H.HandleNullChar(I); 00084 return true; 00085 } 00086 if (c == '%') { 00087 Start = I++; // Record the start of the format specifier. 00088 break; 00089 } 00090 } 00091 00092 // No format specifier found? 00093 if (!Start) 00094 return false; 00095 00096 if (I == E) { 00097 // No more characters left? 00098 H.HandleIncompleteSpecifier(Start, E - Start); 00099 return true; 00100 } 00101 00102 ScanfSpecifier FS; 00103 if (ParseArgPosition(H, FS, Start, I, E)) 00104 return true; 00105 00106 if (I == E) { 00107 // No more characters left? 00108 H.HandleIncompleteSpecifier(Start, E - Start); 00109 return true; 00110 } 00111 00112 // Look for '*' flag if it is present. 00113 if (*I == '*') { 00114 FS.setSuppressAssignment(I); 00115 if (++I == E) { 00116 H.HandleIncompleteSpecifier(Start, E - Start); 00117 return true; 00118 } 00119 } 00120 00121 // Look for the field width (if any). Unlike printf, this is either 00122 // a fixed integer or isn't present. 00123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 00124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 00125 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 00126 FS.setFieldWidth(Amt); 00127 00128 if (I == E) { 00129 // No more characters left? 00130 H.HandleIncompleteSpecifier(Start, E - Start); 00131 return true; 00132 } 00133 } 00134 00135 // Look for the length modifier. 00136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 00137 // No more characters left? 00138 H.HandleIncompleteSpecifier(Start, E - Start); 00139 return true; 00140 } 00141 00142 // Detect spurious null characters, which are likely errors. 00143 if (*I == '\0') { 00144 H.HandleNullChar(I); 00145 return true; 00146 } 00147 00148 // Finally, look for the conversion specifier. 00149 const char *conversionPosition = I++; 00150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 00151 switch (*conversionPosition) { 00152 default: 00153 break; 00154 case '%': k = ConversionSpecifier::PercentArg; break; 00155 case 'A': k = ConversionSpecifier::AArg; break; 00156 case 'E': k = ConversionSpecifier::EArg; break; 00157 case 'F': k = ConversionSpecifier::FArg; break; 00158 case 'G': k = ConversionSpecifier::GArg; break; 00159 case 'X': k = ConversionSpecifier::XArg; break; 00160 case 'a': k = ConversionSpecifier::aArg; break; 00161 case 'd': k = ConversionSpecifier::dArg; break; 00162 case 'e': k = ConversionSpecifier::eArg; break; 00163 case 'f': k = ConversionSpecifier::fArg; break; 00164 case 'g': k = ConversionSpecifier::gArg; break; 00165 case 'i': k = ConversionSpecifier::iArg; break; 00166 case 'n': k = ConversionSpecifier::nArg; break; 00167 case 'c': k = ConversionSpecifier::cArg; break; 00168 case 'C': k = ConversionSpecifier::CArg; break; 00169 case 'S': k = ConversionSpecifier::SArg; break; 00170 case '[': k = ConversionSpecifier::ScanListArg; break; 00171 case 'u': k = ConversionSpecifier::uArg; break; 00172 case 'x': k = ConversionSpecifier::xArg; break; 00173 case 'o': k = ConversionSpecifier::oArg; break; 00174 case 's': k = ConversionSpecifier::sArg; break; 00175 case 'p': k = ConversionSpecifier::pArg; break; 00176 } 00177 ScanfConversionSpecifier CS(conversionPosition, k); 00178 if (k == ScanfConversionSpecifier::ScanListArg) { 00179 if (ParseScanList(H, CS, I, E)) 00180 return true; 00181 } 00182 FS.setConversionSpecifier(CS); 00183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 00184 && !FS.usesPositionalArg()) 00185 FS.setArgIndex(argIndex++); 00186 00187 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 00188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 00189 00190 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 00191 // Assume the conversion takes one argument. 00192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 00193 } 00194 return ScanfSpecifierResult(Start, FS); 00195 } 00196 00197 ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { 00198 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 00199 00200 if (!CS.consumesDataArgument()) 00201 return ScanfArgTypeResult::Invalid(); 00202 00203 switch(CS.getKind()) { 00204 // Signed int. 00205 case ConversionSpecifier::dArg: 00206 case ConversionSpecifier::iArg: 00207 switch (LM.getKind()) { 00208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); 00209 case LengthModifier::AsChar: 00210 return ArgTypeResult(ArgTypeResult::AnyCharTy); 00211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); 00212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); 00213 case LengthModifier::AsLongLong: 00214 case LengthModifier::AsQuad: 00215 return ArgTypeResult(Ctx.LongLongTy); 00216 case LengthModifier::AsIntMax: 00217 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); 00218 case LengthModifier::AsSizeT: 00219 // FIXME: ssize_t. 00220 return ScanfArgTypeResult(); 00221 case LengthModifier::AsPtrDiff: 00222 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); 00223 case LengthModifier::AsLongDouble: 00224 // GNU extension. 00225 return ArgTypeResult(Ctx.LongLongTy); 00226 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 00227 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 00228 } 00229 00230 // Unsigned int. 00231 case ConversionSpecifier::oArg: 00232 case ConversionSpecifier::uArg: 00233 case ConversionSpecifier::xArg: 00234 case ConversionSpecifier::XArg: 00235 switch (LM.getKind()) { 00236 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); 00237 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); 00238 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); 00239 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); 00240 case LengthModifier::AsLongLong: 00241 case LengthModifier::AsQuad: 00242 return ArgTypeResult(Ctx.UnsignedLongLongTy); 00243 case LengthModifier::AsIntMax: 00244 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); 00245 case LengthModifier::AsSizeT: 00246 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); 00247 case LengthModifier::AsPtrDiff: 00248 // FIXME: Unsigned version of ptrdiff_t? 00249 return ScanfArgTypeResult(); 00250 case LengthModifier::AsLongDouble: 00251 // GNU extension. 00252 return ArgTypeResult(Ctx.UnsignedLongLongTy); 00253 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 00254 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 00255 } 00256 00257 // Float. 00258 case ConversionSpecifier::aArg: 00259 case ConversionSpecifier::AArg: 00260 case ConversionSpecifier::eArg: 00261 case ConversionSpecifier::EArg: 00262 case ConversionSpecifier::fArg: 00263 case ConversionSpecifier::FArg: 00264 case ConversionSpecifier::gArg: 00265 case ConversionSpecifier::GArg: 00266 switch (LM.getKind()) { 00267 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); 00268 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); 00269 case LengthModifier::AsLongDouble: 00270 return ArgTypeResult(Ctx.LongDoubleTy); 00271 default: 00272 return ScanfArgTypeResult::Invalid(); 00273 } 00274 00275 // Char, string and scanlist. 00276 case ConversionSpecifier::cArg: 00277 case ConversionSpecifier::sArg: 00278 case ConversionSpecifier::ScanListArg: 00279 switch (LM.getKind()) { 00280 case LengthModifier::None: return ScanfArgTypeResult::CStrTy; 00281 case LengthModifier::AsLong: 00282 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 00283 case LengthModifier::AsAllocate: 00284 case LengthModifier::AsMAllocate: 00285 return ScanfArgTypeResult(ArgTypeResult::CStrTy); 00286 default: 00287 return ScanfArgTypeResult::Invalid(); 00288 } 00289 case ConversionSpecifier::CArg: 00290 case ConversionSpecifier::SArg: 00291 // FIXME: Mac OS X specific? 00292 switch (LM.getKind()) { 00293 case LengthModifier::None: 00294 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 00295 case LengthModifier::AsAllocate: 00296 case LengthModifier::AsMAllocate: 00297 return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **"); 00298 default: 00299 return ScanfArgTypeResult::Invalid(); 00300 } 00301 00302 // Pointer. 00303 case ConversionSpecifier::pArg: 00304 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); 00305 00306 default: 00307 break; 00308 } 00309 00310 return ScanfArgTypeResult(); 00311 } 00312 00313 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 00314 ASTContext &Ctx) { 00315 if (!QT->isPointerType()) 00316 return false; 00317 00318 QualType PT = QT->getPointeeType(); 00319 const BuiltinType *BT = PT->getAs<BuiltinType>(); 00320 if (!BT) 00321 return false; 00322 00323 // Pointer to a character. 00324 if (PT->isAnyCharacterType()) { 00325 CS.setKind(ConversionSpecifier::sArg); 00326 if (PT->isWideCharType()) 00327 LM.setKind(LengthModifier::AsWideChar); 00328 else 00329 LM.setKind(LengthModifier::None); 00330 return true; 00331 } 00332 00333 // Figure out the length modifier. 00334 switch (BT->getKind()) { 00335 // no modifier 00336 case BuiltinType::UInt: 00337 case BuiltinType::Int: 00338 case BuiltinType::Float: 00339 LM.setKind(LengthModifier::None); 00340 break; 00341 00342 // hh 00343 case BuiltinType::Char_U: 00344 case BuiltinType::UChar: 00345 case BuiltinType::Char_S: 00346 case BuiltinType::SChar: 00347 LM.setKind(LengthModifier::AsChar); 00348 break; 00349 00350 // h 00351 case BuiltinType::Short: 00352 case BuiltinType::UShort: 00353 LM.setKind(LengthModifier::AsShort); 00354 break; 00355 00356 // l 00357 case BuiltinType::Long: 00358 case BuiltinType::ULong: 00359 case BuiltinType::Double: 00360 LM.setKind(LengthModifier::AsLong); 00361 break; 00362 00363 // ll 00364 case BuiltinType::LongLong: 00365 case BuiltinType::ULongLong: 00366 LM.setKind(LengthModifier::AsLongLong); 00367 break; 00368 00369 // L 00370 case BuiltinType::LongDouble: 00371 LM.setKind(LengthModifier::AsLongDouble); 00372 break; 00373 00374 // Don't know. 00375 default: 00376 return false; 00377 } 00378 00379 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 00380 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 00381 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 00382 if (Identifier->getName() == "size_t") { 00383 LM.setKind(LengthModifier::AsSizeT); 00384 } else if (Identifier->getName() == "ssize_t") { 00385 // Not C99, but common in Unix. 00386 LM.setKind(LengthModifier::AsSizeT); 00387 } else if (Identifier->getName() == "intmax_t") { 00388 LM.setKind(LengthModifier::AsIntMax); 00389 } else if (Identifier->getName() == "uintmax_t") { 00390 LM.setKind(LengthModifier::AsIntMax); 00391 } else if (Identifier->getName() == "ptrdiff_t") { 00392 LM.setKind(LengthModifier::AsPtrDiff); 00393 } 00394 } 00395 00396 // If fixing the length modifier was enough, we are done. 00397 const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx); 00398 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT)) 00399 return true; 00400 00401 // Figure out the conversion specifier. 00402 if (PT->isRealFloatingType()) 00403 CS.setKind(ConversionSpecifier::fArg); 00404 else if (PT->isSignedIntegerType()) 00405 CS.setKind(ConversionSpecifier::dArg); 00406 else if (PT->isUnsignedIntegerType()) 00407 CS.setKind(ConversionSpecifier::uArg); 00408 else 00409 llvm_unreachable("Unexpected type"); 00410 00411 return true; 00412 } 00413 00414 void ScanfSpecifier::toString(raw_ostream &os) const { 00415 os << "%"; 00416 00417 if (usesPositionalArg()) 00418 os << getPositionalArgIndex() << "$"; 00419 if (SuppressAssignment) 00420 os << "*"; 00421 00422 FieldWidth.toString(os); 00423 os << LM.toString(); 00424 os << CS.toString(); 00425 } 00426 00427 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 00428 const char *I, 00429 const char *E, 00430 const LangOptions &LO) { 00431 00432 unsigned argIndex = 0; 00433 00434 // Keep looking for a format specifier until we have exhausted the string. 00435 while (I != E) { 00436 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 00437 LO); 00438 // Did a fail-stop error of any kind occur when parsing the specifier? 00439 // If so, don't do any more processing. 00440 if (FSR.shouldStop()) 00441 return true;; 00442 // Did we exhaust the string or encounter an error that 00443 // we can recover from? 00444 if (!FSR.hasValue()) 00445 continue; 00446 // We have a format specifier. Pass it to the callback. 00447 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 00448 I - FSR.getStart())) { 00449 return true; 00450 } 00451 } 00452 assert(I == E && "Format string not exhausted"); 00453 return false; 00454 } 00455 00456 bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { 00457 switch (K) { 00458 case InvalidTy: 00459 llvm_unreachable("ArgTypeResult must be valid"); 00460 case UnknownTy: 00461 return true; 00462 case CStrTy: 00463 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); 00464 case WCStrTy: 00465 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); 00466 case PtrToArgTypeResultTy: { 00467 const PointerType *PT = argTy->getAs<PointerType>(); 00468 if (!PT) 00469 return false; 00470 return A.matchesType(C, PT->getPointeeType()); 00471 } 00472 } 00473 00474 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 00475 } 00476 00477 QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { 00478 switch (K) { 00479 case InvalidTy: 00480 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 00481 case UnknownTy: 00482 return QualType(); 00483 case CStrTy: 00484 return C.getPointerType(C.CharTy); 00485 case WCStrTy: 00486 return C.getPointerType(C.getWCharType()); 00487 case PtrToArgTypeResultTy: 00488 return C.getPointerType(A.getRepresentativeType(C)); 00489 } 00490 00491 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 00492 } 00493 00494 std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { 00495 std::string S = getRepresentativeType(C).getAsString(); 00496 if (!Name) 00497 return std::string("'") + S + "'"; 00498 return std::string("'") + Name + "' (aka '" + S + "')"; 00499 }