clang  6.0.0svn
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32  check::PreStmt<CallExpr> > {
33 public:
34  static void *getTag() { static int Tag; return &Tag; }
35 
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45  mutable std::unique_ptr<BugType> BT;
46  inline void initBugType() const {
47  if (!BT)
48  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49  }
50 
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67 
68  /// \brief Given a pointer argument, return the value it points to.
69  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
70 
71  /// Functions defining the attack surface.
72  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
73  CheckerContext &C) const;
74  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
75  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
77 
78  /// Taint the scanned input if the file is tainted.
79  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
80 
81  /// Check for CWE-134: Uncontrolled Format String.
82  static const char MsgUncontrolledFormatString[];
83  bool checkUncontrolledFormatString(const CallExpr *CE,
84  CheckerContext &C) const;
85 
86  /// Check for:
87  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
88  /// CWE-78, "Failure to Sanitize Data into an OS Command"
89  static const char MsgSanitizeSystemArgs[];
90  bool checkSystemCall(const CallExpr *CE, StringRef Name,
91  CheckerContext &C) const;
92 
93  /// Check if tainted data is used as a buffer size ins strn.. functions,
94  /// and allocators.
95  static const char MsgTaintedBufferSize[];
96  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
97  CheckerContext &C) const;
98 
99  /// Generate a report if the expression is tainted or points to tainted data.
100  bool generateReportIfTainted(const Expr *E, const char Msg[],
101  CheckerContext &C) const;
102 
103  /// The bug visitor prints a diagnostic message at the location where a given
104  /// variable was tainted.
105  class TaintBugVisitor
106  : public BugReporterVisitorImpl<TaintBugVisitor> {
107  private:
108  const SVal V;
109 
110  public:
111  TaintBugVisitor(const SVal V) : V(V) {}
112  void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
113 
114  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
115  const ExplodedNode *PrevN,
116  BugReporterContext &BRC,
117  BugReport &BR) override;
118  };
119 
120  typedef SmallVector<unsigned, 2> ArgVector;
121 
122  /// \brief A struct used to specify taint propagation rules for a function.
123  ///
124  /// If any of the possible taint source arguments is tainted, all of the
125  /// destination arguments should also be tainted. Use InvalidArgIndex in the
126  /// src list to specify that all of the arguments can introduce taint. Use
127  /// InvalidArgIndex in the dst arguments to signify that all the non-const
128  /// pointer and reference arguments might be tainted on return. If
129  /// ReturnValueIndex is added to the dst list, the return value will be
130  /// tainted.
131  struct TaintPropagationRule {
132  /// List of arguments which can be taint sources and should be checked.
133  ArgVector SrcArgs;
134  /// List of arguments which should be tainted on function return.
135  ArgVector DstArgs;
136  // TODO: Check if using other data structures would be more optimal.
137 
138  TaintPropagationRule() {}
139 
140  TaintPropagationRule(unsigned SArg,
141  unsigned DArg, bool TaintRet = false) {
142  SrcArgs.push_back(SArg);
143  DstArgs.push_back(DArg);
144  if (TaintRet)
145  DstArgs.push_back(ReturnValueIndex);
146  }
147 
148  TaintPropagationRule(unsigned SArg1, unsigned SArg2,
149  unsigned DArg, bool TaintRet = false) {
150  SrcArgs.push_back(SArg1);
151  SrcArgs.push_back(SArg2);
152  DstArgs.push_back(DArg);
153  if (TaintRet)
154  DstArgs.push_back(ReturnValueIndex);
155  }
156 
157  /// Get the propagation rule for a given function.
158  static TaintPropagationRule
159  getTaintPropagationRule(const FunctionDecl *FDecl,
160  StringRef Name,
161  CheckerContext &C);
162 
163  inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
164  inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
165 
166  inline bool isNull() const { return SrcArgs.empty(); }
167 
168  inline bool isDestinationArgument(unsigned ArgNum) const {
169  return (std::find(DstArgs.begin(),
170  DstArgs.end(), ArgNum) != DstArgs.end());
171  }
172 
173  static inline bool isTaintedOrPointsToTainted(const Expr *E,
175  CheckerContext &C) {
176  if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
177  return true;
178 
179  if (!E->getType().getTypePtr()->isPointerType())
180  return false;
181 
182  Optional<SVal> V = getPointedToSVal(C, E);
183  return (V && State->isTainted(*V));
184  }
185 
186  /// \brief Pre-process a function which propagates taint according to the
187  /// taint rule.
188  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
189 
190  };
191 };
192 
193 const unsigned GenericTaintChecker::ReturnValueIndex;
194 const unsigned GenericTaintChecker::InvalidArgIndex;
195 
196 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
197  "Untrusted data is used as a format string "
198  "(CWE-134: Uncontrolled Format String)";
199 
200 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
201  "Untrusted data is passed to a system call "
202  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
203 
204 const char GenericTaintChecker::MsgTaintedBufferSize[] =
205  "Untrusted data is used to specify the buffer size "
206  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
207  "character data and the null terminator)";
208 
209 } // end of anonymous namespace
210 
211 /// A set which is used to pass information from call pre-visit instruction
212 /// to the call post-visit. The values are unsigned integers, which are either
213 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
214 /// points to data, which should be tainted on return.
215 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
216 
217 std::shared_ptr<PathDiagnosticPiece>
218 GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N,
219  const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) {
220 
221  // Find the ExplodedNode where the taint was first introduced
222  if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
223  return nullptr;
224 
226  if (!S)
227  return nullptr;
228 
229  const LocationContext *NCtx = N->getLocationContext();
232  if (!L.isValid() || !L.asLocation().isValid())
233  return nullptr;
234 
235  return std::make_shared<PathDiagnosticEventPiece>(
236  L, "Taint originated here");
237 }
238 
239 GenericTaintChecker::TaintPropagationRule
240 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
241  const FunctionDecl *FDecl,
242  StringRef Name,
243  CheckerContext &C) {
244  // TODO: Currently, we might lose precision here: we always mark a return
245  // value as tainted even if it's just a pointer, pointing to tainted data.
246 
247  // Check for exact name match for functions without builtin substitutes.
248  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
249  .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
250  .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
251  .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
252  .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
253  .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
254  .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
255  .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
256  .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
257  .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
258  .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
259  .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
260  .Case("read", TaintPropagationRule(0, 2, 1, true))
261  .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
262  .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
263  .Case("fgets", TaintPropagationRule(2, 0, true))
264  .Case("getline", TaintPropagationRule(2, 0))
265  .Case("getdelim", TaintPropagationRule(3, 0))
266  .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
267  .Default(TaintPropagationRule());
268 
269  if (!Rule.isNull())
270  return Rule;
271 
272  // Check if it's one of the memory setting/copying functions.
273  // This check is specialized but faster then calling isCLibraryFunction.
274  unsigned BId = 0;
275  if ( (BId = FDecl->getMemoryFunctionKind()) )
276  switch(BId) {
277  case Builtin::BImemcpy:
278  case Builtin::BImemmove:
279  case Builtin::BIstrncpy:
280  case Builtin::BIstrncat:
281  return TaintPropagationRule(1, 2, 0, true);
282  case Builtin::BIstrlcpy:
283  case Builtin::BIstrlcat:
284  return TaintPropagationRule(1, 2, 0, false);
285  case Builtin::BIstrndup:
286  return TaintPropagationRule(0, 1, ReturnValueIndex);
287 
288  default:
289  break;
290  };
291 
292  // Process all other functions which could be defined as builtins.
293  if (Rule.isNull()) {
294  if (C.isCLibraryFunction(FDecl, "snprintf") ||
295  C.isCLibraryFunction(FDecl, "sprintf"))
296  return TaintPropagationRule(InvalidArgIndex, 0, true);
297  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
298  C.isCLibraryFunction(FDecl, "stpcpy") ||
299  C.isCLibraryFunction(FDecl, "strcat"))
300  return TaintPropagationRule(1, 0, true);
301  else if (C.isCLibraryFunction(FDecl, "bcopy"))
302  return TaintPropagationRule(0, 2, 1, false);
303  else if (C.isCLibraryFunction(FDecl, "strdup") ||
304  C.isCLibraryFunction(FDecl, "strdupa"))
305  return TaintPropagationRule(0, ReturnValueIndex);
306  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
307  return TaintPropagationRule(0, ReturnValueIndex);
308  }
309 
310  // Skipping the following functions, since they might be used for cleansing
311  // or smart memory copy:
312  // - memccpy - copying until hitting a special character.
313 
314  return TaintPropagationRule();
315 }
316 
317 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
318  CheckerContext &C) const {
319  // Check for errors first.
320  if (checkPre(CE, C))
321  return;
322 
323  // Add taint second.
324  addSourcesPre(CE, C);
325 }
326 
327 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
328  CheckerContext &C) const {
329  if (propagateFromPre(CE, C))
330  return;
331  addSourcesPost(CE, C);
332 }
333 
334 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
335  CheckerContext &C) const {
336  ProgramStateRef State = nullptr;
337  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
338  if (!FDecl || FDecl->getKind() != Decl::Function)
339  return;
340 
341  StringRef Name = C.getCalleeName(FDecl);
342  if (Name.empty())
343  return;
344 
345  // First, try generating a propagation rule for this function.
346  TaintPropagationRule Rule =
347  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
348  if (!Rule.isNull()) {
349  State = Rule.process(CE, C);
350  if (!State)
351  return;
352  C.addTransition(State);
353  return;
354  }
355 
356  // Otherwise, check if we have custom pre-processing implemented.
357  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
358  .Case("fscanf", &GenericTaintChecker::preFscanf)
359  .Default(nullptr);
360  // Check and evaluate the call.
361  if (evalFunction)
362  State = (this->*evalFunction)(CE, C);
363  if (!State)
364  return;
365  C.addTransition(State);
366 
367 }
368 
369 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
370  CheckerContext &C) const {
371  ProgramStateRef State = C.getState();
372 
373  // Depending on what was tainted at pre-visit, we determined a set of
374  // arguments which should be tainted after the function returns. These are
375  // stored in the state as TaintArgsOnPostVisit set.
376  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
377  if (TaintArgs.isEmpty())
378  return false;
379 
381  I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
382  unsigned ArgNum = *I;
383 
384  // Special handling for the tainted return value.
385  if (ArgNum == ReturnValueIndex) {
386  State = State->addTaint(CE, C.getLocationContext());
387  continue;
388  }
389 
390  // The arguments are pointer arguments. The data they are pointing at is
391  // tainted after the call.
392  if (CE->getNumArgs() < (ArgNum + 1))
393  return false;
394  const Expr* Arg = CE->getArg(ArgNum);
395  Optional<SVal> V = getPointedToSVal(C, Arg);
396  if (V)
397  State = State->addTaint(*V);
398  }
399 
400  // Clear up the taint info from the state.
401  State = State->remove<TaintArgsOnPostVisit>();
402 
403  if (State != C.getState()) {
404  C.addTransition(State);
405  return true;
406  }
407  return false;
408 }
409 
410 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
411  CheckerContext &C) const {
412  // Define the attack surface.
413  // Set the evaluation function by switching on the callee name.
414  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
415  if (!FDecl || FDecl->getKind() != Decl::Function)
416  return;
417 
418  StringRef Name = C.getCalleeName(FDecl);
419  if (Name.empty())
420  return;
421  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
422  .Case("scanf", &GenericTaintChecker::postScanf)
423  // TODO: Add support for vfscanf & family.
424  .Case("getchar", &GenericTaintChecker::postRetTaint)
425  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
426  .Case("getenv", &GenericTaintChecker::postRetTaint)
427  .Case("fopen", &GenericTaintChecker::postRetTaint)
428  .Case("fdopen", &GenericTaintChecker::postRetTaint)
429  .Case("freopen", &GenericTaintChecker::postRetTaint)
430  .Case("getch", &GenericTaintChecker::postRetTaint)
431  .Case("wgetch", &GenericTaintChecker::postRetTaint)
432  .Case("socket", &GenericTaintChecker::postSocket)
433  .Default(nullptr);
434 
435  // If the callee isn't defined, it is not of security concern.
436  // Check and evaluate the call.
437  ProgramStateRef State = nullptr;
438  if (evalFunction)
439  State = (this->*evalFunction)(CE, C);
440  if (!State)
441  return;
442 
443  C.addTransition(State);
444 }
445 
446 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
447 
448  if (checkUncontrolledFormatString(CE, C))
449  return true;
450 
451  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
452  if (!FDecl || FDecl->getKind() != Decl::Function)
453  return false;
454 
455  StringRef Name = C.getCalleeName(FDecl);
456  if (Name.empty())
457  return false;
458 
459  if (checkSystemCall(CE, Name, C))
460  return true;
461 
462  if (checkTaintedBufferSize(CE, FDecl, C))
463  return true;
464 
465  return false;
466 }
467 
468 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
469  const Expr* Arg) {
470  ProgramStateRef State = C.getState();
471  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
472  if (AddrVal.isUnknownOrUndef())
473  return None;
474 
475  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
476  if (!AddrLoc)
477  return None;
478 
479  const PointerType *ArgTy =
480  dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
481  return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
482 }
483 
485 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
486  CheckerContext &C) const {
487  ProgramStateRef State = C.getState();
488 
489  // Check for taint in arguments.
490  bool IsTainted = false;
491  for (ArgVector::const_iterator I = SrcArgs.begin(),
492  E = SrcArgs.end(); I != E; ++I) {
493  unsigned ArgNum = *I;
494 
495  if (ArgNum == InvalidArgIndex) {
496  // Check if any of the arguments is tainted, but skip the
497  // destination arguments.
498  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
499  if (isDestinationArgument(i))
500  continue;
501  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
502  break;
503  }
504  break;
505  }
506 
507  if (CE->getNumArgs() < (ArgNum + 1))
508  return State;
509  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
510  break;
511  }
512  if (!IsTainted)
513  return State;
514 
515  // Mark the arguments which should be tainted after the function returns.
516  for (ArgVector::const_iterator I = DstArgs.begin(),
517  E = DstArgs.end(); I != E; ++I) {
518  unsigned ArgNum = *I;
519 
520  // Should we mark all arguments as tainted?
521  if (ArgNum == InvalidArgIndex) {
522  // For all pointer and references that were passed in:
523  // If they are not pointing to const data, mark data as tainted.
524  // TODO: So far we are just going one level down; ideally we'd need to
525  // recurse here.
526  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
527  const Expr *Arg = CE->getArg(i);
528  // Process pointer argument.
529  const Type *ArgTy = Arg->getType().getTypePtr();
530  QualType PType = ArgTy->getPointeeType();
531  if ((!PType.isNull() && !PType.isConstQualified())
532  || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
533  State = State->add<TaintArgsOnPostVisit>(i);
534  }
535  continue;
536  }
537 
538  // Should mark the return value?
539  if (ArgNum == ReturnValueIndex) {
540  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
541  continue;
542  }
543 
544  // Mark the given argument.
545  assert(ArgNum < CE->getNumArgs());
546  State = State->add<TaintArgsOnPostVisit>(ArgNum);
547  }
548 
549  return State;
550 }
551 
552 
553 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
554 // and arg 1 should get taint.
555 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
556  CheckerContext &C) const {
557  assert(CE->getNumArgs() >= 2);
558  ProgramStateRef State = C.getState();
559 
560  // Check is the file descriptor is tainted.
561  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
562  isStdin(CE->getArg(0), C)) {
563  // All arguments except for the first two should get taint.
564  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
565  State = State->add<TaintArgsOnPostVisit>(i);
566  return State;
567  }
568 
569  return nullptr;
570 }
571 
572 
573 // If argument 0(protocol domain) is network, the return value should get taint.
574 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
575  CheckerContext &C) const {
576  ProgramStateRef State = C.getState();
577  if (CE->getNumArgs() < 3)
578  return State;
579 
580  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
581  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
582  // White list the internal communication protocols.
583  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
584  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
585  return State;
586  State = State->addTaint(CE, C.getLocationContext());
587  return State;
588 }
589 
590 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
591  CheckerContext &C) const {
592  ProgramStateRef State = C.getState();
593  if (CE->getNumArgs() < 2)
594  return State;
595 
596  // All arguments except for the very first one should get taint.
597  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
598  // The arguments are pointer arguments. The data they are pointing at is
599  // tainted after the call.
600  const Expr* Arg = CE->getArg(i);
601  Optional<SVal> V = getPointedToSVal(C, Arg);
602  if (V)
603  State = State->addTaint(*V);
604  }
605  return State;
606 }
607 
608 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
609  CheckerContext &C) const {
610  return C.getState()->addTaint(CE, C.getLocationContext());
611 }
612 
613 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
614  ProgramStateRef State = C.getState();
615  SVal Val = State->getSVal(E, C.getLocationContext());
616 
617  // stdin is a pointer, so it would be a region.
618  const MemRegion *MemReg = Val.getAsRegion();
619 
620  // The region should be symbolic, we do not know it's value.
621  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
622  if (!SymReg)
623  return false;
624 
625  // Get it's symbol and find the declaration region it's pointing to.
626  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
627  if (!Sm)
628  return false;
629  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
630  if (!DeclReg)
631  return false;
632 
633  // This region corresponds to a declaration, find out if it's a global/extern
634  // variable named stdin with the proper type.
635  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
636  D = D->getCanonicalDecl();
637  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
638  if (const PointerType * PtrTy =
639  dyn_cast<PointerType>(D->getType().getTypePtr()))
640  if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
641  return true;
642  }
643  return false;
644 }
645 
646 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
647  const CheckerContext &C,
648  unsigned int &ArgNum) {
649  // Find if the function contains a format string argument.
650  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
651  // vsnprintf, syslog, custom annotated functions.
652  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
653  if (!FDecl)
654  return false;
655  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
656  ArgNum = Format->getFormatIdx() - 1;
657  if ((Format->getType()->getName() == "printf") &&
658  CE->getNumArgs() > ArgNum)
659  return true;
660  }
661 
662  // Or if a function is named setproctitle (this is a heuristic).
663  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
664  ArgNum = 0;
665  return true;
666  }
667 
668  return false;
669 }
670 
671 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
672  const char Msg[],
673  CheckerContext &C) const {
674  assert(E);
675 
676  // Check for taint.
677  ProgramStateRef State = C.getState();
678  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
679  SVal TaintedSVal;
680  if (PointedToSVal && State->isTainted(*PointedToSVal))
681  TaintedSVal = *PointedToSVal;
682  else if (State->isTainted(E, C.getLocationContext()))
683  TaintedSVal = C.getSVal(E);
684  else
685  return false;
686 
687  // Generate diagnostic.
689  initBugType();
690  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
691  report->addRange(E->getSourceRange());
692  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
693  C.emitReport(std::move(report));
694  return true;
695  }
696  return false;
697 }
698 
699 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
700  CheckerContext &C) const{
701  // Check if the function contains a format string argument.
702  unsigned int ArgNum = 0;
703  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
704  return false;
705 
706  // If either the format string content or the pointer itself are tainted, warn.
707  return generateReportIfTainted(CE->getArg(ArgNum),
708  MsgUncontrolledFormatString, C);
709 }
710 
711 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
712  StringRef Name,
713  CheckerContext &C) const {
714  // TODO: It might make sense to run this check on demand. In some cases,
715  // we should check if the environment has been cleansed here. We also might
716  // need to know if the user was reset before these calls(seteuid).
717  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
718  .Case("system", 0)
719  .Case("popen", 0)
720  .Case("execl", 0)
721  .Case("execle", 0)
722  .Case("execlp", 0)
723  .Case("execv", 0)
724  .Case("execvp", 0)
725  .Case("execvP", 0)
726  .Case("execve", 0)
727  .Case("dlopen", 0)
728  .Default(UINT_MAX);
729 
730  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
731  return false;
732 
733  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
734 }
735 
736 // TODO: Should this check be a part of the CString checker?
737 // If yes, should taint be a global setting?
738 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
739  const FunctionDecl *FDecl,
740  CheckerContext &C) const {
741  // If the function has a buffer size argument, set ArgNum.
742  unsigned ArgNum = InvalidArgIndex;
743  unsigned BId = 0;
744  if ( (BId = FDecl->getMemoryFunctionKind()) )
745  switch(BId) {
746  case Builtin::BImemcpy:
747  case Builtin::BImemmove:
748  case Builtin::BIstrncpy:
749  ArgNum = 2;
750  break;
751  case Builtin::BIstrndup:
752  ArgNum = 1;
753  break;
754  default:
755  break;
756  };
757 
758  if (ArgNum == InvalidArgIndex) {
759  if (C.isCLibraryFunction(FDecl, "malloc") ||
760  C.isCLibraryFunction(FDecl, "calloc") ||
761  C.isCLibraryFunction(FDecl, "alloca"))
762  ArgNum = 0;
763  else if (C.isCLibraryFunction(FDecl, "memccpy"))
764  ArgNum = 3;
765  else if (C.isCLibraryFunction(FDecl, "realloc"))
766  ArgNum = 1;
767  else if (C.isCLibraryFunction(FDecl, "bcopy"))
768  ArgNum = 2;
769  }
770 
771  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
772  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
773 }
774 
775 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
776  mgr.registerChecker<GenericTaintChecker>();
777 }
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1698
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3496
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2228
QualType getPointeeType() const
Definition: Type.h:2242
A (possibly-)qualified type.
Definition: Type.h:614
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:79
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2339
Stmt - This represents one statement.
Definition: Stmt.h:66
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2327
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:435
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1306
const ProgramStateRef & getState() const
VarDecl - An instance of this class is created to represent a variable declaration or definition...
Definition: Decl.h:807
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
LineState State
This class provides a convenience implementation for clone() using the Curiously-Recurring Template P...
bool isReferenceType() const
Definition: Type.h:5781
const LocationContext * getLocationContext() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:5549
#define UINT_MAX
Definition: limits.h:72
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:742
Expr - This represents one expression.
Definition: Expr.h:119
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
const Decl * getDecl() const
Definition: MemRegion.h:904
QualType getFILEType() const
Retrieve the C FILE type.
Definition: ASTContext.h:1654
QualType getType() const
Definition: Expr.h:140
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:681
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
Definition: SVals.h:100
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:5608
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
static const Stmt * getStmt(const ExplodedNode *N)
Given an exploded node, retrieve the statement that should be used for the diagnostic location...
static PathDiagnosticLocation createBegin(const Decl *D, const SourceManager &SM)
Create a location for the beginning of the declaration.
QualType getCanonicalType() const
Definition: Type.h:5588
CHECKER * registerChecker()
Used to register checkers.
const TypedValueRegion * getRegion() const
Definition: SymbolManager.h:52
Encodes a location in the source.
SymbolRef getSymbol() const
Definition: MemRegion.h:757
const MemRegion * getAsRegion() const
Definition: SVals.cpp:140
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
Definition: SVals.h:63
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:249
A symbol representing the value stored at a MemRegion.
Definition: SymbolManager.h:42
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
Kind getKind() const
Definition: DeclBase.h:419
const ProgramStateRef & getState() const
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:517
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:265
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2270
bool isPointerType() const
Definition: Type.h:5772
This class provides an interface through which checkers can create individual bug reports...
Definition: BugReporter.h:55
const LocationContext * getLocationContext() const
Defines enum values for all the target-independent builtin functions.
SourceManager & getSourceManager()
Definition: BugReporter.h:565
bool isUnknownOrUndef() const
Definition: SVals.h:133
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
Definition: Expr.cpp:2460