clang API Documentation

GenericTaintChecker.cpp
Go to the documentation of this file.
00001 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This checker defines the attack surface for generic taint propagation.
00011 //
00012 // The taint information produced by it might be useful to other checkers. For
00013 // example, checkers should report errors which involve tainted data more
00014 // aggressively, even if the involved symbols are under constrained.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 #include "ClangSACheckers.h"
00018 #include "clang/StaticAnalyzer/Core/Checker.h"
00019 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
00020 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
00021 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
00022 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
00023 #include "clang/Basic/Builtins.h"
00024 #include <climits>
00025 
00026 using namespace clang;
00027 using namespace ento;
00028 
00029 namespace {
00030 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
00031                                             check::PreStmt<CallExpr> > {
00032 public:
00033   static void *getTag() { static int Tag; return &Tag; }
00034 
00035   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
00036   void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
00037 
00038   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
00039 
00040 private:
00041   static const unsigned InvalidArgIndex = UINT_MAX;
00042   /// Denotes the return vale.
00043   static const unsigned ReturnValueIndex = UINT_MAX - 1;
00044 
00045   mutable OwningPtr<BugType> BT;
00046   inline void initBugType() const {
00047     if (!BT)
00048       BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
00049   }
00050 
00051   /// \brief Catch taint related bugs. Check if tainted data is passed to a
00052   /// system call etc.
00053   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
00054 
00055   /// \brief Add taint sources on a pre-visit.
00056   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
00057 
00058   /// \brief Propagate taint generated at pre-visit.
00059   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
00060 
00061   /// \brief Add taint sources on a post visit.
00062   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
00063 
00064   /// Check if the region the expression evaluates to is the standard input,
00065   /// and thus, is tainted.
00066   static bool isStdin(const Expr *E, CheckerContext &C);
00067 
00068   /// \brief Given a pointer argument, get the symbol of the value it contains
00069   /// (points to).
00070   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
00071 
00072   /// Functions defining the attack surface.
00073   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
00074                                                        CheckerContext &C) const;
00075   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
00076   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
00077   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
00078 
00079   /// Taint the scanned input if the file is tainted.
00080   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
00081 
00082   /// Check for CWE-134: Uncontrolled Format String.
00083   static const char MsgUncontrolledFormatString[];
00084   bool checkUncontrolledFormatString(const CallExpr *CE,
00085                                      CheckerContext &C) const;
00086 
00087   /// Check for:
00088   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
00089   /// CWE-78, "Failure to Sanitize Data into an OS Command"
00090   static const char MsgSanitizeSystemArgs[];
00091   bool checkSystemCall(const CallExpr *CE, StringRef Name,
00092                        CheckerContext &C) const;
00093 
00094   /// Check if tainted data is used as a buffer size ins strn.. functions,
00095   /// and allocators.
00096   static const char MsgTaintedBufferSize[];
00097   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
00098                               CheckerContext &C) const;
00099 
00100   /// Generate a report if the expression is tainted or points to tainted data.
00101   bool generateReportIfTainted(const Expr *E, const char Msg[],
00102                                CheckerContext &C) const;
00103                                
00104   
00105   typedef llvm::SmallVector<unsigned, 2> ArgVector;
00106 
00107   /// \brief A struct used to specify taint propagation rules for a function.
00108   ///
00109   /// If any of the possible taint source arguments is tainted, all of the
00110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
00111   /// src list to specify that all of the arguments can introduce taint. Use
00112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
00113   /// pointer and reference arguments might be tainted on return. If
00114   /// ReturnValueIndex is added to the dst list, the return value will be
00115   /// tainted.
00116   struct TaintPropagationRule {
00117     /// List of arguments which can be taint sources and should be checked.
00118     ArgVector SrcArgs;
00119     /// List of arguments which should be tainted on function return.
00120     ArgVector DstArgs;
00121     // TODO: Check if using other data structures would be more optimal.
00122 
00123     TaintPropagationRule() {}
00124 
00125     TaintPropagationRule(unsigned SArg,
00126                          unsigned DArg, bool TaintRet = false) {
00127       SrcArgs.push_back(SArg);
00128       DstArgs.push_back(DArg);
00129       if (TaintRet)
00130         DstArgs.push_back(ReturnValueIndex);
00131     }
00132 
00133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
00134                          unsigned DArg, bool TaintRet = false) {
00135       SrcArgs.push_back(SArg1);
00136       SrcArgs.push_back(SArg2);
00137       DstArgs.push_back(DArg);
00138       if (TaintRet)
00139         DstArgs.push_back(ReturnValueIndex);
00140     }
00141 
00142     /// Get the propagation rule for a given function.
00143     static TaintPropagationRule
00144       getTaintPropagationRule(const FunctionDecl *FDecl,
00145                               StringRef Name,
00146                               CheckerContext &C);
00147 
00148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
00149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
00150 
00151     inline bool isNull() const { return SrcArgs.empty(); }
00152 
00153     inline bool isDestinationArgument(unsigned ArgNum) const {
00154       return (std::find(DstArgs.begin(),
00155                         DstArgs.end(), ArgNum) != DstArgs.end());
00156     }
00157 
00158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
00159                                                   ProgramStateRef State,
00160                                                   CheckerContext &C) {
00161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
00162               (E->getType().getTypePtr()->isPointerType() &&
00163                State->isTainted(getPointedToSymbol(C, E))));
00164     }
00165 
00166     /// \brief Pre-process a function which propagates taint according to the
00167     /// taint rule.
00168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
00169 
00170   };
00171 };
00172 
00173 const unsigned GenericTaintChecker::ReturnValueIndex;
00174 const unsigned GenericTaintChecker::InvalidArgIndex;
00175 
00176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
00177   "Untrusted data is used as a format string "
00178   "(CWE-134: Uncontrolled Format String)";
00179 
00180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
00181   "Untrusted data is passed to a system call "
00182   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
00183 
00184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
00185   "Untrusted data is used to specify the buffer size "
00186   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
00187   "character data and the null terminator)";
00188 
00189 } // end of anonymous namespace
00190 
00191 /// A set which is used to pass information from call pre-visit instruction
00192 /// to the call post-visit. The values are unsigned integers, which are either
00193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
00194 /// points to data, which should be tainted on return.
00195 namespace { struct TaintArgsOnPostVisit{}; }
00196 namespace clang { namespace ento {
00197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
00198     :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
00199   static void *GDMIndex() { return GenericTaintChecker::getTag(); }
00200 };
00201 }}
00202 
00203 GenericTaintChecker::TaintPropagationRule
00204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
00205                                                      const FunctionDecl *FDecl,
00206                                                      StringRef Name,
00207                                                      CheckerContext &C) {
00208   // TODO: Currently, we might loose precision here: we always mark a return
00209   // value as tainted even if it's just a pointer, pointing to tainted data.
00210 
00211   // Check for exact name match for functions without builtin substitutes.
00212   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
00213     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
00214     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
00215     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
00216     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
00217     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
00218     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
00219     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
00220     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
00221     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
00222     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
00223     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
00224     .Case("read", TaintPropagationRule(0, 2, 1, true))
00225     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
00226     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
00227     .Case("fgets", TaintPropagationRule(2, 0, true))
00228     .Case("getline", TaintPropagationRule(2, 0))
00229     .Case("getdelim", TaintPropagationRule(3, 0))
00230     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
00231     .Default(TaintPropagationRule());
00232 
00233   if (!Rule.isNull())
00234     return Rule;
00235 
00236   // Check if it's one of the memory setting/copying functions.
00237   // This check is specialized but faster then calling isCLibraryFunction.
00238   unsigned BId = 0;
00239   if ( (BId = FDecl->getMemoryFunctionKind()) )
00240     switch(BId) {
00241     case Builtin::BImemcpy:
00242     case Builtin::BImemmove:
00243     case Builtin::BIstrncpy:
00244     case Builtin::BIstrncat:
00245       return TaintPropagationRule(1, 2, 0, true);
00246     case Builtin::BIstrlcpy:
00247     case Builtin::BIstrlcat:
00248       return TaintPropagationRule(1, 2, 0, false);
00249     case Builtin::BIstrndup:
00250       return TaintPropagationRule(0, 1, ReturnValueIndex);
00251 
00252     default:
00253       break;
00254     };
00255 
00256   // Process all other functions which could be defined as builtins.
00257   if (Rule.isNull()) {
00258     if (C.isCLibraryFunction(FDecl, "snprintf") ||
00259         C.isCLibraryFunction(FDecl, "sprintf"))
00260       return TaintPropagationRule(InvalidArgIndex, 0, true);
00261     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
00262              C.isCLibraryFunction(FDecl, "stpcpy") ||
00263              C.isCLibraryFunction(FDecl, "strcat"))
00264       return TaintPropagationRule(1, 0, true);
00265     else if (C.isCLibraryFunction(FDecl, "bcopy"))
00266       return TaintPropagationRule(0, 2, 1, false);
00267     else if (C.isCLibraryFunction(FDecl, "strdup") ||
00268              C.isCLibraryFunction(FDecl, "strdupa"))
00269       return TaintPropagationRule(0, ReturnValueIndex);
00270     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
00271       return TaintPropagationRule(0, ReturnValueIndex);
00272   }
00273 
00274   // Skipping the following functions, since they might be used for cleansing
00275   // or smart memory copy:
00276   // - memccpy - copying untill hitting a special character.
00277 
00278   return TaintPropagationRule();
00279 }
00280 
00281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
00282                                        CheckerContext &C) const {
00283   // Check for errors first.
00284   if (checkPre(CE, C))
00285     return;
00286 
00287   // Add taint second.
00288   addSourcesPre(CE, C);
00289 }
00290 
00291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
00292                                         CheckerContext &C) const {
00293   if (propagateFromPre(CE, C))
00294     return;
00295   addSourcesPost(CE, C);
00296 }
00297 
00298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
00299                                         CheckerContext &C) const {
00300   ProgramStateRef State = 0;
00301   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00302   StringRef Name = C.getCalleeName(FDecl);
00303   if (Name.empty())
00304     return;
00305 
00306   // First, try generating a propagation rule for this function.
00307   TaintPropagationRule Rule =
00308     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
00309   if (!Rule.isNull()) {
00310     State = Rule.process(CE, C);
00311     if (!State)
00312       return;
00313     C.addTransition(State);
00314     return;
00315   }
00316 
00317   // Otherwise, check if we have custom pre-processing implemented.
00318   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
00319     .Case("fscanf", &GenericTaintChecker::preFscanf)
00320     .Default(0);
00321   // Check and evaluate the call.
00322   if (evalFunction)
00323     State = (this->*evalFunction)(CE, C);
00324   if (!State)
00325     return;
00326   C.addTransition(State);
00327 
00328 }
00329 
00330 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
00331                                            CheckerContext &C) const {
00332   ProgramStateRef State = C.getState();
00333 
00334   // Depending on what was tainted at pre-visit, we determined a set of
00335   // arguments which should be tainted after the function returns. These are
00336   // stored in the state as TaintArgsOnPostVisit set.
00337   llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
00338   if (TaintArgs.isEmpty())
00339     return false;
00340 
00341   for (llvm::ImmutableSet<unsigned>::iterator
00342          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
00343     unsigned ArgNum  = *I;
00344 
00345     // Special handling for the tainted return value.
00346     if (ArgNum == ReturnValueIndex) {
00347       State = State->addTaint(CE, C.getLocationContext());
00348       continue;
00349     }
00350 
00351     // The arguments are pointer arguments. The data they are pointing at is
00352     // tainted after the call.
00353     if (CE->getNumArgs() < (ArgNum + 1))
00354       return false;
00355     const Expr* Arg = CE->getArg(ArgNum);
00356     SymbolRef Sym = getPointedToSymbol(C, Arg);
00357     if (Sym)
00358       State = State->addTaint(Sym);
00359   }
00360 
00361   // Clear up the taint info from the state.
00362   State = State->remove<TaintArgsOnPostVisit>();
00363 
00364   if (State != C.getState()) {
00365     C.addTransition(State);
00366     return true;
00367   }
00368   return false;
00369 }
00370 
00371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
00372                                          CheckerContext &C) const {
00373   // Define the attack surface.
00374   // Set the evaluation function by switching on the callee name.
00375   StringRef Name = C.getCalleeName(CE);
00376   if (Name.empty())
00377     return;
00378   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
00379     .Case("scanf", &GenericTaintChecker::postScanf)
00380     // TODO: Add support for vfscanf & family.
00381     .Case("getchar", &GenericTaintChecker::postRetTaint)
00382     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
00383     .Case("getenv", &GenericTaintChecker::postRetTaint)
00384     .Case("fopen", &GenericTaintChecker::postRetTaint)
00385     .Case("fdopen", &GenericTaintChecker::postRetTaint)
00386     .Case("freopen", &GenericTaintChecker::postRetTaint)
00387     .Case("getch", &GenericTaintChecker::postRetTaint)
00388     .Case("wgetch", &GenericTaintChecker::postRetTaint)
00389     .Case("socket", &GenericTaintChecker::postSocket)
00390     .Default(0);
00391 
00392   // If the callee isn't defined, it is not of security concern.
00393   // Check and evaluate the call.
00394   ProgramStateRef State = 0;
00395   if (evalFunction)
00396     State = (this->*evalFunction)(CE, C);
00397   if (!State)
00398     return;
00399 
00400   C.addTransition(State);
00401 }
00402 
00403 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
00404 
00405   if (checkUncontrolledFormatString(CE, C))
00406     return true;
00407 
00408   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00409   StringRef Name = C.getCalleeName(FDecl);
00410   if (Name.empty())
00411     return false;
00412 
00413   if (checkSystemCall(CE, Name, C))
00414     return true;
00415 
00416   if (checkTaintedBufferSize(CE, FDecl, C))
00417     return true;
00418 
00419   return false;
00420 }
00421 
00422 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
00423                                                   const Expr* Arg) {
00424   ProgramStateRef State = C.getState();
00425   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
00426   if (AddrVal.isUnknownOrUndef())
00427     return 0;
00428 
00429   Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
00430   if (!AddrLoc)
00431     return 0;
00432 
00433   const PointerType *ArgTy =
00434     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
00435   SVal Val = State->getSVal(*AddrLoc,
00436                             ArgTy ? ArgTy->getPointeeType(): QualType());
00437   return Val.getAsSymbol();
00438 }
00439 
00440 ProgramStateRef 
00441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
00442                                                    CheckerContext &C) const {
00443   ProgramStateRef State = C.getState();
00444 
00445   // Check for taint in arguments.
00446   bool IsTainted = false;
00447   for (ArgVector::const_iterator I = SrcArgs.begin(),
00448                                  E = SrcArgs.end(); I != E; ++I) {
00449     unsigned ArgNum = *I;
00450 
00451     if (ArgNum == InvalidArgIndex) {
00452       // Check if any of the arguments is tainted, but skip the
00453       // destination arguments.
00454       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
00455         if (isDestinationArgument(i))
00456           continue;
00457         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
00458           break;
00459       }
00460       break;
00461     }
00462 
00463     if (CE->getNumArgs() < (ArgNum + 1))
00464       return State;
00465     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
00466       break;
00467   }
00468   if (!IsTainted)
00469     return State;
00470 
00471   // Mark the arguments which should be tainted after the function returns.
00472   for (ArgVector::const_iterator I = DstArgs.begin(),
00473                                  E = DstArgs.end(); I != E; ++I) {
00474     unsigned ArgNum = *I;
00475 
00476     // Should we mark all arguments as tainted?
00477     if (ArgNum == InvalidArgIndex) {
00478       // For all pointer and references that were passed in:
00479       //   If they are not pointing to const data, mark data as tainted.
00480       //   TODO: So far we are just going one level down; ideally we'd need to
00481       //         recurse here.
00482       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
00483         const Expr *Arg = CE->getArg(i);
00484         // Process pointer argument.
00485         const Type *ArgTy = Arg->getType().getTypePtr();
00486         QualType PType = ArgTy->getPointeeType();
00487         if ((!PType.isNull() && !PType.isConstQualified())
00488             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
00489           State = State->add<TaintArgsOnPostVisit>(i);
00490       }
00491       continue;
00492     }
00493 
00494     // Should mark the return value?
00495     if (ArgNum == ReturnValueIndex) {
00496       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
00497       continue;
00498     }
00499 
00500     // Mark the given argument.
00501     assert(ArgNum < CE->getNumArgs());
00502     State = State->add<TaintArgsOnPostVisit>(ArgNum);
00503   }
00504 
00505   return State;
00506 }
00507 
00508 
00509 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
00510 // and arg 1 should get taint.
00511 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
00512                                                    CheckerContext &C) const {
00513   assert(CE->getNumArgs() >= 2);
00514   ProgramStateRef State = C.getState();
00515 
00516   // Check is the file descriptor is tainted.
00517   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
00518       isStdin(CE->getArg(0), C)) {
00519     // All arguments except for the first two should get taint.
00520     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
00521         State = State->add<TaintArgsOnPostVisit>(i);
00522     return State;
00523   }
00524 
00525   return 0;
00526 }
00527 
00528 
00529 // If argument 0(protocol domain) is network, the return value should get taint.
00530 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
00531                                                 CheckerContext &C) const {
00532   ProgramStateRef State = C.getState();
00533   if (CE->getNumArgs() < 3)
00534     return State;
00535 
00536   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
00537   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
00538   // White list the internal communication protocols.
00539   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
00540       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
00541     return State;
00542   State = State->addTaint(CE, C.getLocationContext());
00543   return State;
00544 }
00545 
00546 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
00547                                                    CheckerContext &C) const {
00548   ProgramStateRef State = C.getState();
00549   if (CE->getNumArgs() < 2)
00550     return State;
00551 
00552   SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
00553   // All arguments except for the very first one should get taint.
00554   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
00555     // The arguments are pointer arguments. The data they are pointing at is
00556     // tainted after the call.
00557     const Expr* Arg = CE->getArg(i);
00558         SymbolRef Sym = getPointedToSymbol(C, Arg);
00559     if (Sym)
00560       State = State->addTaint(Sym);
00561   }
00562   return State;
00563 }
00564 
00565 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
00566                                                   CheckerContext &C) const {
00567   return C.getState()->addTaint(CE, C.getLocationContext());
00568 }
00569 
00570 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
00571   ProgramStateRef State = C.getState();
00572   SVal Val = State->getSVal(E, C.getLocationContext());
00573 
00574   // stdin is a pointer, so it would be a region.
00575   const MemRegion *MemReg = Val.getAsRegion();
00576 
00577   // The region should be symbolic, we do not know it's value.
00578   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
00579   if (!SymReg)
00580     return false;
00581 
00582   // Get it's symbol and find the declaration region it's pointing to.
00583   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
00584   if (!Sm)
00585     return false;
00586   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
00587   if (!DeclReg)
00588     return false;
00589 
00590   // This region corresponds to a declaration, find out if it's a global/extern
00591   // variable named stdin with the proper type.
00592   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
00593     D = D->getCanonicalDecl();
00594     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
00595         if (const PointerType * PtrTy =
00596               dyn_cast<PointerType>(D->getType().getTypePtr()))
00597           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
00598             return true;
00599   }
00600   return false;
00601 }
00602 
00603 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
00604                                        const CheckerContext &C,
00605                                        unsigned int &ArgNum) {
00606   // Find if the function contains a format string argument.
00607   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
00608   // vsnprintf, syslog, custom annotated functions.
00609   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00610   if (!FDecl)
00611     return false;
00612   for (specific_attr_iterator<FormatAttr>
00613          i = FDecl->specific_attr_begin<FormatAttr>(),
00614          e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
00615 
00616     const FormatAttr *Format = *i;
00617     ArgNum = Format->getFormatIdx() - 1;
00618     if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
00619       return true;
00620   }
00621 
00622   // Or if a function is named setproctitle (this is a heuristic).
00623   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
00624     ArgNum = 0;
00625     return true;
00626   }
00627 
00628   return false;
00629 }
00630 
00631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
00632                                                   const char Msg[],
00633                                                   CheckerContext &C) const {
00634   assert(E);
00635 
00636   // Check for taint.
00637   ProgramStateRef State = C.getState();
00638   if (!State->isTainted(getPointedToSymbol(C, E)) &&
00639       !State->isTainted(E, C.getLocationContext()))
00640     return false;
00641 
00642   // Generate diagnostic.
00643   if (ExplodedNode *N = C.addTransition()) {
00644     initBugType();
00645     BugReport *report = new BugReport(*BT, Msg, N);
00646     report->addRange(E->getSourceRange());
00647     C.EmitReport(report);
00648     return true;
00649   }
00650   return false;
00651 }
00652 
00653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
00654                                                         CheckerContext &C) const{
00655   // Check if the function contains a format string argument.
00656   unsigned int ArgNum = 0;
00657   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
00658     return false;
00659 
00660   // If either the format string content or the pointer itself are tainted, warn.
00661   if (generateReportIfTainted(CE->getArg(ArgNum),
00662                               MsgUncontrolledFormatString, C))
00663     return true;
00664   return false;
00665 }
00666 
00667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
00668                                           StringRef Name,
00669                                           CheckerContext &C) const {
00670   // TODO: It might make sense to run this check on demand. In some cases, 
00671   // we should check if the environment has been cleansed here. We also might 
00672   // need to know if the user was reset before these calls(seteuid).
00673   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
00674     .Case("system", 0)
00675     .Case("popen", 0)
00676     .Case("execl", 0)
00677     .Case("execle", 0)
00678     .Case("execlp", 0)
00679     .Case("execv", 0)
00680     .Case("execvp", 0)
00681     .Case("execvP", 0)
00682     .Case("execve", 0)
00683     .Case("dlopen", 0)
00684     .Default(UINT_MAX);
00685 
00686   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
00687     return false;
00688 
00689   if (generateReportIfTainted(CE->getArg(ArgNum),
00690                               MsgSanitizeSystemArgs, C))
00691     return true;
00692 
00693   return false;
00694 }
00695 
00696 // TODO: Should this check be a part of the CString checker?
00697 // If yes, should taint be a global setting?
00698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
00699                                                  const FunctionDecl *FDecl,
00700                                                  CheckerContext &C) const {
00701   // If the function has a buffer size argument, set ArgNum.
00702   unsigned ArgNum = InvalidArgIndex;
00703   unsigned BId = 0;
00704   if ( (BId = FDecl->getMemoryFunctionKind()) )
00705     switch(BId) {
00706     case Builtin::BImemcpy:
00707     case Builtin::BImemmove:
00708     case Builtin::BIstrncpy:
00709       ArgNum = 2;
00710       break;
00711     case Builtin::BIstrndup:
00712       ArgNum = 1;
00713       break;
00714     default:
00715       break;
00716     };
00717 
00718   if (ArgNum == InvalidArgIndex) {
00719     if (C.isCLibraryFunction(FDecl, "malloc") ||
00720         C.isCLibraryFunction(FDecl, "calloc") ||
00721         C.isCLibraryFunction(FDecl, "alloca"))
00722       ArgNum = 0;
00723     else if (C.isCLibraryFunction(FDecl, "memccpy"))
00724       ArgNum = 3;
00725     else if (C.isCLibraryFunction(FDecl, "realloc"))
00726       ArgNum = 1;
00727     else if (C.isCLibraryFunction(FDecl, "bcopy"))
00728       ArgNum = 2;
00729   }
00730 
00731   if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
00732       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
00733     return true;
00734 
00735   return false;
00736 }
00737 
00738 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
00739   mgr.registerChecker<GenericTaintChecker>();
00740 }