clang API Documentation
00001 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This checker defines the attack surface for generic taint propagation. 00011 // 00012 // The taint information produced by it might be useful to other checkers. For 00013 // example, checkers should report errors which involve tainted data more 00014 // aggressively, even if the involved symbols are under constrained. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 #include "ClangSACheckers.h" 00018 #include "clang/StaticAnalyzer/Core/Checker.h" 00019 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 00020 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 00021 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 00022 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 00023 #include "clang/Basic/Builtins.h" 00024 #include <climits> 00025 00026 using namespace clang; 00027 using namespace ento; 00028 00029 namespace { 00030 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 00031 check::PreStmt<CallExpr> > { 00032 public: 00033 static void *getTag() { static int Tag; return &Tag; } 00034 00035 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 00036 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 00037 00038 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 00039 00040 private: 00041 static const unsigned InvalidArgIndex = UINT_MAX; 00042 /// Denotes the return vale. 00043 static const unsigned ReturnValueIndex = UINT_MAX - 1; 00044 00045 mutable OwningPtr<BugType> BT; 00046 inline void initBugType() const { 00047 if (!BT) 00048 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 00049 } 00050 00051 /// \brief Catch taint related bugs. Check if tainted data is passed to a 00052 /// system call etc. 00053 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 00054 00055 /// \brief Add taint sources on a pre-visit. 00056 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 00057 00058 /// \brief Propagate taint generated at pre-visit. 00059 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 00060 00061 /// \brief Add taint sources on a post visit. 00062 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 00063 00064 /// Check if the region the expression evaluates to is the standard input, 00065 /// and thus, is tainted. 00066 static bool isStdin(const Expr *E, CheckerContext &C); 00067 00068 /// \brief Given a pointer argument, get the symbol of the value it contains 00069 /// (points to). 00070 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 00071 00072 /// Functions defining the attack surface. 00073 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 00074 CheckerContext &C) const; 00075 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 00076 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 00077 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 00078 00079 /// Taint the scanned input if the file is tainted. 00080 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 00081 00082 /// Check for CWE-134: Uncontrolled Format String. 00083 static const char MsgUncontrolledFormatString[]; 00084 bool checkUncontrolledFormatString(const CallExpr *CE, 00085 CheckerContext &C) const; 00086 00087 /// Check for: 00088 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 00089 /// CWE-78, "Failure to Sanitize Data into an OS Command" 00090 static const char MsgSanitizeSystemArgs[]; 00091 bool checkSystemCall(const CallExpr *CE, StringRef Name, 00092 CheckerContext &C) const; 00093 00094 /// Check if tainted data is used as a buffer size ins strn.. functions, 00095 /// and allocators. 00096 static const char MsgTaintedBufferSize[]; 00097 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 00098 CheckerContext &C) const; 00099 00100 /// Generate a report if the expression is tainted or points to tainted data. 00101 bool generateReportIfTainted(const Expr *E, const char Msg[], 00102 CheckerContext &C) const; 00103 00104 00105 typedef llvm::SmallVector<unsigned, 2> ArgVector; 00106 00107 /// \brief A struct used to specify taint propagation rules for a function. 00108 /// 00109 /// If any of the possible taint source arguments is tainted, all of the 00110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 00111 /// src list to specify that all of the arguments can introduce taint. Use 00112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 00113 /// pointer and reference arguments might be tainted on return. If 00114 /// ReturnValueIndex is added to the dst list, the return value will be 00115 /// tainted. 00116 struct TaintPropagationRule { 00117 /// List of arguments which can be taint sources and should be checked. 00118 ArgVector SrcArgs; 00119 /// List of arguments which should be tainted on function return. 00120 ArgVector DstArgs; 00121 // TODO: Check if using other data structures would be more optimal. 00122 00123 TaintPropagationRule() {} 00124 00125 TaintPropagationRule(unsigned SArg, 00126 unsigned DArg, bool TaintRet = false) { 00127 SrcArgs.push_back(SArg); 00128 DstArgs.push_back(DArg); 00129 if (TaintRet) 00130 DstArgs.push_back(ReturnValueIndex); 00131 } 00132 00133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 00134 unsigned DArg, bool TaintRet = false) { 00135 SrcArgs.push_back(SArg1); 00136 SrcArgs.push_back(SArg2); 00137 DstArgs.push_back(DArg); 00138 if (TaintRet) 00139 DstArgs.push_back(ReturnValueIndex); 00140 } 00141 00142 /// Get the propagation rule for a given function. 00143 static TaintPropagationRule 00144 getTaintPropagationRule(const FunctionDecl *FDecl, 00145 StringRef Name, 00146 CheckerContext &C); 00147 00148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 00149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 00150 00151 inline bool isNull() const { return SrcArgs.empty(); } 00152 00153 inline bool isDestinationArgument(unsigned ArgNum) const { 00154 return (std::find(DstArgs.begin(), 00155 DstArgs.end(), ArgNum) != DstArgs.end()); 00156 } 00157 00158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 00159 ProgramStateRef State, 00160 CheckerContext &C) { 00161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 00162 (E->getType().getTypePtr()->isPointerType() && 00163 State->isTainted(getPointedToSymbol(C, E)))); 00164 } 00165 00166 /// \brief Pre-process a function which propagates taint according to the 00167 /// taint rule. 00168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 00169 00170 }; 00171 }; 00172 00173 const unsigned GenericTaintChecker::ReturnValueIndex; 00174 const unsigned GenericTaintChecker::InvalidArgIndex; 00175 00176 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 00177 "Untrusted data is used as a format string " 00178 "(CWE-134: Uncontrolled Format String)"; 00179 00180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 00181 "Untrusted data is passed to a system call " 00182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 00183 00184 const char GenericTaintChecker::MsgTaintedBufferSize[] = 00185 "Untrusted data is used to specify the buffer size " 00186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 00187 "character data and the null terminator)"; 00188 00189 } // end of anonymous namespace 00190 00191 /// A set which is used to pass information from call pre-visit instruction 00192 /// to the call post-visit. The values are unsigned integers, which are either 00193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 00194 /// points to data, which should be tainted on return. 00195 namespace { struct TaintArgsOnPostVisit{}; } 00196 namespace clang { namespace ento { 00197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 00198 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 00199 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 00200 }; 00201 }} 00202 00203 GenericTaintChecker::TaintPropagationRule 00204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 00205 const FunctionDecl *FDecl, 00206 StringRef Name, 00207 CheckerContext &C) { 00208 // TODO: Currently, we might loose precision here: we always mark a return 00209 // value as tainted even if it's just a pointer, pointing to tainted data. 00210 00211 // Check for exact name match for functions without builtin substitutes. 00212 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 00213 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 00214 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 00215 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 00216 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 00217 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 00218 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 00219 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 00220 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 00221 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 00222 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 00223 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 00224 .Case("read", TaintPropagationRule(0, 2, 1, true)) 00225 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 00226 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 00227 .Case("fgets", TaintPropagationRule(2, 0, true)) 00228 .Case("getline", TaintPropagationRule(2, 0)) 00229 .Case("getdelim", TaintPropagationRule(3, 0)) 00230 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 00231 .Default(TaintPropagationRule()); 00232 00233 if (!Rule.isNull()) 00234 return Rule; 00235 00236 // Check if it's one of the memory setting/copying functions. 00237 // This check is specialized but faster then calling isCLibraryFunction. 00238 unsigned BId = 0; 00239 if ( (BId = FDecl->getMemoryFunctionKind()) ) 00240 switch(BId) { 00241 case Builtin::BImemcpy: 00242 case Builtin::BImemmove: 00243 case Builtin::BIstrncpy: 00244 case Builtin::BIstrncat: 00245 return TaintPropagationRule(1, 2, 0, true); 00246 case Builtin::BIstrlcpy: 00247 case Builtin::BIstrlcat: 00248 return TaintPropagationRule(1, 2, 0, false); 00249 case Builtin::BIstrndup: 00250 return TaintPropagationRule(0, 1, ReturnValueIndex); 00251 00252 default: 00253 break; 00254 }; 00255 00256 // Process all other functions which could be defined as builtins. 00257 if (Rule.isNull()) { 00258 if (C.isCLibraryFunction(FDecl, "snprintf") || 00259 C.isCLibraryFunction(FDecl, "sprintf")) 00260 return TaintPropagationRule(InvalidArgIndex, 0, true); 00261 else if (C.isCLibraryFunction(FDecl, "strcpy") || 00262 C.isCLibraryFunction(FDecl, "stpcpy") || 00263 C.isCLibraryFunction(FDecl, "strcat")) 00264 return TaintPropagationRule(1, 0, true); 00265 else if (C.isCLibraryFunction(FDecl, "bcopy")) 00266 return TaintPropagationRule(0, 2, 1, false); 00267 else if (C.isCLibraryFunction(FDecl, "strdup") || 00268 C.isCLibraryFunction(FDecl, "strdupa")) 00269 return TaintPropagationRule(0, ReturnValueIndex); 00270 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 00271 return TaintPropagationRule(0, ReturnValueIndex); 00272 } 00273 00274 // Skipping the following functions, since they might be used for cleansing 00275 // or smart memory copy: 00276 // - memccpy - copying untill hitting a special character. 00277 00278 return TaintPropagationRule(); 00279 } 00280 00281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 00282 CheckerContext &C) const { 00283 // Check for errors first. 00284 if (checkPre(CE, C)) 00285 return; 00286 00287 // Add taint second. 00288 addSourcesPre(CE, C); 00289 } 00290 00291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 00292 CheckerContext &C) const { 00293 if (propagateFromPre(CE, C)) 00294 return; 00295 addSourcesPost(CE, C); 00296 } 00297 00298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 00299 CheckerContext &C) const { 00300 ProgramStateRef State = 0; 00301 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00302 StringRef Name = C.getCalleeName(FDecl); 00303 if (Name.empty()) 00304 return; 00305 00306 // First, try generating a propagation rule for this function. 00307 TaintPropagationRule Rule = 00308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 00309 if (!Rule.isNull()) { 00310 State = Rule.process(CE, C); 00311 if (!State) 00312 return; 00313 C.addTransition(State); 00314 return; 00315 } 00316 00317 // Otherwise, check if we have custom pre-processing implemented. 00318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 00319 .Case("fscanf", &GenericTaintChecker::preFscanf) 00320 .Default(0); 00321 // Check and evaluate the call. 00322 if (evalFunction) 00323 State = (this->*evalFunction)(CE, C); 00324 if (!State) 00325 return; 00326 C.addTransition(State); 00327 00328 } 00329 00330 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 00331 CheckerContext &C) const { 00332 ProgramStateRef State = C.getState(); 00333 00334 // Depending on what was tainted at pre-visit, we determined a set of 00335 // arguments which should be tainted after the function returns. These are 00336 // stored in the state as TaintArgsOnPostVisit set. 00337 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 00338 if (TaintArgs.isEmpty()) 00339 return false; 00340 00341 for (llvm::ImmutableSet<unsigned>::iterator 00342 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 00343 unsigned ArgNum = *I; 00344 00345 // Special handling for the tainted return value. 00346 if (ArgNum == ReturnValueIndex) { 00347 State = State->addTaint(CE, C.getLocationContext()); 00348 continue; 00349 } 00350 00351 // The arguments are pointer arguments. The data they are pointing at is 00352 // tainted after the call. 00353 if (CE->getNumArgs() < (ArgNum + 1)) 00354 return false; 00355 const Expr* Arg = CE->getArg(ArgNum); 00356 SymbolRef Sym = getPointedToSymbol(C, Arg); 00357 if (Sym) 00358 State = State->addTaint(Sym); 00359 } 00360 00361 // Clear up the taint info from the state. 00362 State = State->remove<TaintArgsOnPostVisit>(); 00363 00364 if (State != C.getState()) { 00365 C.addTransition(State); 00366 return true; 00367 } 00368 return false; 00369 } 00370 00371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 00372 CheckerContext &C) const { 00373 // Define the attack surface. 00374 // Set the evaluation function by switching on the callee name. 00375 StringRef Name = C.getCalleeName(CE); 00376 if (Name.empty()) 00377 return; 00378 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 00379 .Case("scanf", &GenericTaintChecker::postScanf) 00380 // TODO: Add support for vfscanf & family. 00381 .Case("getchar", &GenericTaintChecker::postRetTaint) 00382 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 00383 .Case("getenv", &GenericTaintChecker::postRetTaint) 00384 .Case("fopen", &GenericTaintChecker::postRetTaint) 00385 .Case("fdopen", &GenericTaintChecker::postRetTaint) 00386 .Case("freopen", &GenericTaintChecker::postRetTaint) 00387 .Case("getch", &GenericTaintChecker::postRetTaint) 00388 .Case("wgetch", &GenericTaintChecker::postRetTaint) 00389 .Case("socket", &GenericTaintChecker::postSocket) 00390 .Default(0); 00391 00392 // If the callee isn't defined, it is not of security concern. 00393 // Check and evaluate the call. 00394 ProgramStateRef State = 0; 00395 if (evalFunction) 00396 State = (this->*evalFunction)(CE, C); 00397 if (!State) 00398 return; 00399 00400 C.addTransition(State); 00401 } 00402 00403 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 00404 00405 if (checkUncontrolledFormatString(CE, C)) 00406 return true; 00407 00408 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00409 StringRef Name = C.getCalleeName(FDecl); 00410 if (Name.empty()) 00411 return false; 00412 00413 if (checkSystemCall(CE, Name, C)) 00414 return true; 00415 00416 if (checkTaintedBufferSize(CE, FDecl, C)) 00417 return true; 00418 00419 return false; 00420 } 00421 00422 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 00423 const Expr* Arg) { 00424 ProgramStateRef State = C.getState(); 00425 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 00426 if (AddrVal.isUnknownOrUndef()) 00427 return 0; 00428 00429 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 00430 if (!AddrLoc) 00431 return 0; 00432 00433 const PointerType *ArgTy = 00434 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 00435 SVal Val = State->getSVal(*AddrLoc, 00436 ArgTy ? ArgTy->getPointeeType(): QualType()); 00437 return Val.getAsSymbol(); 00438 } 00439 00440 ProgramStateRef 00441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 00442 CheckerContext &C) const { 00443 ProgramStateRef State = C.getState(); 00444 00445 // Check for taint in arguments. 00446 bool IsTainted = false; 00447 for (ArgVector::const_iterator I = SrcArgs.begin(), 00448 E = SrcArgs.end(); I != E; ++I) { 00449 unsigned ArgNum = *I; 00450 00451 if (ArgNum == InvalidArgIndex) { 00452 // Check if any of the arguments is tainted, but skip the 00453 // destination arguments. 00454 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 00455 if (isDestinationArgument(i)) 00456 continue; 00457 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 00458 break; 00459 } 00460 break; 00461 } 00462 00463 if (CE->getNumArgs() < (ArgNum + 1)) 00464 return State; 00465 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 00466 break; 00467 } 00468 if (!IsTainted) 00469 return State; 00470 00471 // Mark the arguments which should be tainted after the function returns. 00472 for (ArgVector::const_iterator I = DstArgs.begin(), 00473 E = DstArgs.end(); I != E; ++I) { 00474 unsigned ArgNum = *I; 00475 00476 // Should we mark all arguments as tainted? 00477 if (ArgNum == InvalidArgIndex) { 00478 // For all pointer and references that were passed in: 00479 // If they are not pointing to const data, mark data as tainted. 00480 // TODO: So far we are just going one level down; ideally we'd need to 00481 // recurse here. 00482 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 00483 const Expr *Arg = CE->getArg(i); 00484 // Process pointer argument. 00485 const Type *ArgTy = Arg->getType().getTypePtr(); 00486 QualType PType = ArgTy->getPointeeType(); 00487 if ((!PType.isNull() && !PType.isConstQualified()) 00488 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 00489 State = State->add<TaintArgsOnPostVisit>(i); 00490 } 00491 continue; 00492 } 00493 00494 // Should mark the return value? 00495 if (ArgNum == ReturnValueIndex) { 00496 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 00497 continue; 00498 } 00499 00500 // Mark the given argument. 00501 assert(ArgNum < CE->getNumArgs()); 00502 State = State->add<TaintArgsOnPostVisit>(ArgNum); 00503 } 00504 00505 return State; 00506 } 00507 00508 00509 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 00510 // and arg 1 should get taint. 00511 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 00512 CheckerContext &C) const { 00513 assert(CE->getNumArgs() >= 2); 00514 ProgramStateRef State = C.getState(); 00515 00516 // Check is the file descriptor is tainted. 00517 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 00518 isStdin(CE->getArg(0), C)) { 00519 // All arguments except for the first two should get taint. 00520 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 00521 State = State->add<TaintArgsOnPostVisit>(i); 00522 return State; 00523 } 00524 00525 return 0; 00526 } 00527 00528 00529 // If argument 0(protocol domain) is network, the return value should get taint. 00530 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 00531 CheckerContext &C) const { 00532 ProgramStateRef State = C.getState(); 00533 if (CE->getNumArgs() < 3) 00534 return State; 00535 00536 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 00537 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 00538 // White list the internal communication protocols. 00539 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 00540 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 00541 return State; 00542 State = State->addTaint(CE, C.getLocationContext()); 00543 return State; 00544 } 00545 00546 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 00547 CheckerContext &C) const { 00548 ProgramStateRef State = C.getState(); 00549 if (CE->getNumArgs() < 2) 00550 return State; 00551 00552 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 00553 // All arguments except for the very first one should get taint. 00554 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 00555 // The arguments are pointer arguments. The data they are pointing at is 00556 // tainted after the call. 00557 const Expr* Arg = CE->getArg(i); 00558 SymbolRef Sym = getPointedToSymbol(C, Arg); 00559 if (Sym) 00560 State = State->addTaint(Sym); 00561 } 00562 return State; 00563 } 00564 00565 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 00566 CheckerContext &C) const { 00567 return C.getState()->addTaint(CE, C.getLocationContext()); 00568 } 00569 00570 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 00571 ProgramStateRef State = C.getState(); 00572 SVal Val = State->getSVal(E, C.getLocationContext()); 00573 00574 // stdin is a pointer, so it would be a region. 00575 const MemRegion *MemReg = Val.getAsRegion(); 00576 00577 // The region should be symbolic, we do not know it's value. 00578 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 00579 if (!SymReg) 00580 return false; 00581 00582 // Get it's symbol and find the declaration region it's pointing to. 00583 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 00584 if (!Sm) 00585 return false; 00586 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 00587 if (!DeclReg) 00588 return false; 00589 00590 // This region corresponds to a declaration, find out if it's a global/extern 00591 // variable named stdin with the proper type. 00592 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 00593 D = D->getCanonicalDecl(); 00594 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 00595 if (const PointerType * PtrTy = 00596 dyn_cast<PointerType>(D->getType().getTypePtr())) 00597 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 00598 return true; 00599 } 00600 return false; 00601 } 00602 00603 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 00604 const CheckerContext &C, 00605 unsigned int &ArgNum) { 00606 // Find if the function contains a format string argument. 00607 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 00608 // vsnprintf, syslog, custom annotated functions. 00609 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00610 if (!FDecl) 00611 return false; 00612 for (specific_attr_iterator<FormatAttr> 00613 i = FDecl->specific_attr_begin<FormatAttr>(), 00614 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 00615 00616 const FormatAttr *Format = *i; 00617 ArgNum = Format->getFormatIdx() - 1; 00618 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 00619 return true; 00620 } 00621 00622 // Or if a function is named setproctitle (this is a heuristic). 00623 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 00624 ArgNum = 0; 00625 return true; 00626 } 00627 00628 return false; 00629 } 00630 00631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 00632 const char Msg[], 00633 CheckerContext &C) const { 00634 assert(E); 00635 00636 // Check for taint. 00637 ProgramStateRef State = C.getState(); 00638 if (!State->isTainted(getPointedToSymbol(C, E)) && 00639 !State->isTainted(E, C.getLocationContext())) 00640 return false; 00641 00642 // Generate diagnostic. 00643 if (ExplodedNode *N = C.addTransition()) { 00644 initBugType(); 00645 BugReport *report = new BugReport(*BT, Msg, N); 00646 report->addRange(E->getSourceRange()); 00647 C.EmitReport(report); 00648 return true; 00649 } 00650 return false; 00651 } 00652 00653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 00654 CheckerContext &C) const{ 00655 // Check if the function contains a format string argument. 00656 unsigned int ArgNum = 0; 00657 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 00658 return false; 00659 00660 // If either the format string content or the pointer itself are tainted, warn. 00661 if (generateReportIfTainted(CE->getArg(ArgNum), 00662 MsgUncontrolledFormatString, C)) 00663 return true; 00664 return false; 00665 } 00666 00667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 00668 StringRef Name, 00669 CheckerContext &C) const { 00670 // TODO: It might make sense to run this check on demand. In some cases, 00671 // we should check if the environment has been cleansed here. We also might 00672 // need to know if the user was reset before these calls(seteuid). 00673 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 00674 .Case("system", 0) 00675 .Case("popen", 0) 00676 .Case("execl", 0) 00677 .Case("execle", 0) 00678 .Case("execlp", 0) 00679 .Case("execv", 0) 00680 .Case("execvp", 0) 00681 .Case("execvP", 0) 00682 .Case("execve", 0) 00683 .Case("dlopen", 0) 00684 .Default(UINT_MAX); 00685 00686 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 00687 return false; 00688 00689 if (generateReportIfTainted(CE->getArg(ArgNum), 00690 MsgSanitizeSystemArgs, C)) 00691 return true; 00692 00693 return false; 00694 } 00695 00696 // TODO: Should this check be a part of the CString checker? 00697 // If yes, should taint be a global setting? 00698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 00699 const FunctionDecl *FDecl, 00700 CheckerContext &C) const { 00701 // If the function has a buffer size argument, set ArgNum. 00702 unsigned ArgNum = InvalidArgIndex; 00703 unsigned BId = 0; 00704 if ( (BId = FDecl->getMemoryFunctionKind()) ) 00705 switch(BId) { 00706 case Builtin::BImemcpy: 00707 case Builtin::BImemmove: 00708 case Builtin::BIstrncpy: 00709 ArgNum = 2; 00710 break; 00711 case Builtin::BIstrndup: 00712 ArgNum = 1; 00713 break; 00714 default: 00715 break; 00716 }; 00717 00718 if (ArgNum == InvalidArgIndex) { 00719 if (C.isCLibraryFunction(FDecl, "malloc") || 00720 C.isCLibraryFunction(FDecl, "calloc") || 00721 C.isCLibraryFunction(FDecl, "alloca")) 00722 ArgNum = 0; 00723 else if (C.isCLibraryFunction(FDecl, "memccpy")) 00724 ArgNum = 3; 00725 else if (C.isCLibraryFunction(FDecl, "realloc")) 00726 ArgNum = 1; 00727 else if (C.isCLibraryFunction(FDecl, "bcopy")) 00728 ArgNum = 2; 00729 } 00730 00731 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 00732 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 00733 return true; 00734 00735 return false; 00736 } 00737 00738 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 00739 mgr.registerChecker<GenericTaintChecker>(); 00740 }