clang  8.0.0svn
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32  check::PreStmt<CallExpr> > {
33 public:
34  static void *getTag() { static int Tag; return &Tag; }
35 
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45  mutable std::unique_ptr<BugType> BT;
46  inline void initBugType() const {
47  if (!BT)
48  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49  }
50 
51  /// Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55  /// Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58  /// Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61  /// Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67 
68  /// Given a pointer argument, return the value it points to.
69  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
70 
71  /// Functions defining the attack surface.
72  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
73  CheckerContext &C) const;
74  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
75  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
77 
78  /// Taint the scanned input if the file is tainted.
79  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
80 
81  /// Check for CWE-134: Uncontrolled Format String.
82  static const char MsgUncontrolledFormatString[];
83  bool checkUncontrolledFormatString(const CallExpr *CE,
84  CheckerContext &C) const;
85 
86  /// Check for:
87  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
88  /// CWE-78, "Failure to Sanitize Data into an OS Command"
89  static const char MsgSanitizeSystemArgs[];
90  bool checkSystemCall(const CallExpr *CE, StringRef Name,
91  CheckerContext &C) const;
92 
93  /// Check if tainted data is used as a buffer size ins strn.. functions,
94  /// and allocators.
95  static const char MsgTaintedBufferSize[];
96  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
97  CheckerContext &C) const;
98 
99  /// Generate a report if the expression is tainted or points to tainted data.
100  bool generateReportIfTainted(const Expr *E, const char Msg[],
101  CheckerContext &C) const;
102 
103  typedef SmallVector<unsigned, 2> ArgVector;
104 
105  /// A struct used to specify taint propagation rules for a function.
106  ///
107  /// If any of the possible taint source arguments is tainted, all of the
108  /// destination arguments should also be tainted. Use InvalidArgIndex in the
109  /// src list to specify that all of the arguments can introduce taint. Use
110  /// InvalidArgIndex in the dst arguments to signify that all the non-const
111  /// pointer and reference arguments might be tainted on return. If
112  /// ReturnValueIndex is added to the dst list, the return value will be
113  /// tainted.
114  struct TaintPropagationRule {
115  /// List of arguments which can be taint sources and should be checked.
116  ArgVector SrcArgs;
117  /// List of arguments which should be tainted on function return.
118  ArgVector DstArgs;
119  // TODO: Check if using other data structures would be more optimal.
120 
121  TaintPropagationRule() {}
122 
123  TaintPropagationRule(unsigned SArg,
124  unsigned DArg, bool TaintRet = false) {
125  SrcArgs.push_back(SArg);
126  DstArgs.push_back(DArg);
127  if (TaintRet)
128  DstArgs.push_back(ReturnValueIndex);
129  }
130 
131  TaintPropagationRule(unsigned SArg1, unsigned SArg2,
132  unsigned DArg, bool TaintRet = false) {
133  SrcArgs.push_back(SArg1);
134  SrcArgs.push_back(SArg2);
135  DstArgs.push_back(DArg);
136  if (TaintRet)
137  DstArgs.push_back(ReturnValueIndex);
138  }
139 
140  /// Get the propagation rule for a given function.
141  static TaintPropagationRule
142  getTaintPropagationRule(const FunctionDecl *FDecl,
143  StringRef Name,
144  CheckerContext &C);
145 
146  inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
147  inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
148 
149  inline bool isNull() const { return SrcArgs.empty(); }
150 
151  inline bool isDestinationArgument(unsigned ArgNum) const {
152  return (std::find(DstArgs.begin(),
153  DstArgs.end(), ArgNum) != DstArgs.end());
154  }
155 
156  static inline bool isTaintedOrPointsToTainted(const Expr *E,
158  CheckerContext &C) {
159  if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
160  return true;
161 
162  if (!E->getType().getTypePtr()->isPointerType())
163  return false;
164 
165  Optional<SVal> V = getPointedToSVal(C, E);
166  return (V && State->isTainted(*V));
167  }
168 
169  /// Pre-process a function which propagates taint according to the
170  /// taint rule.
171  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
172 
173  };
174 };
175 
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
178 
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180  "Untrusted data is used as a format string "
181  "(CWE-134: Uncontrolled Format String)";
182 
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184  "Untrusted data is passed to a system call "
185  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
186 
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188  "Untrusted data is used to specify the buffer size "
189  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
190  "character data and the null terminator)";
191 
192 } // end of anonymous namespace
193 
194 /// A set which is used to pass information from call pre-visit instruction
195 /// to the call post-visit. The values are unsigned integers, which are either
196 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
197 /// points to data, which should be tainted on return.
198 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
199 
200 GenericTaintChecker::TaintPropagationRule
201 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
202  const FunctionDecl *FDecl,
203  StringRef Name,
204  CheckerContext &C) {
205  // TODO: Currently, we might lose precision here: we always mark a return
206  // value as tainted even if it's just a pointer, pointing to tainted data.
207 
208  // Check for exact name match for functions without builtin substitutes.
209  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
210  .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
211  .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
212  .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
213  .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
214  .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
215  .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
216  .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
217  .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
218  .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
219  .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
220  .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
221  .Case("read", TaintPropagationRule(0, 2, 1, true))
222  .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
223  .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
224  .Case("fgets", TaintPropagationRule(2, 0, true))
225  .Case("getline", TaintPropagationRule(2, 0))
226  .Case("getdelim", TaintPropagationRule(3, 0))
227  .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
228  .Default(TaintPropagationRule());
229 
230  if (!Rule.isNull())
231  return Rule;
232 
233  // Check if it's one of the memory setting/copying functions.
234  // This check is specialized but faster then calling isCLibraryFunction.
235  unsigned BId = 0;
236  if ( (BId = FDecl->getMemoryFunctionKind()) )
237  switch(BId) {
238  case Builtin::BImemcpy:
239  case Builtin::BImemmove:
240  case Builtin::BIstrncpy:
241  case Builtin::BIstrncat:
242  return TaintPropagationRule(1, 2, 0, true);
243  case Builtin::BIstrlcpy:
244  case Builtin::BIstrlcat:
245  return TaintPropagationRule(1, 2, 0, false);
246  case Builtin::BIstrndup:
247  return TaintPropagationRule(0, 1, ReturnValueIndex);
248 
249  default:
250  break;
251  };
252 
253  // Process all other functions which could be defined as builtins.
254  if (Rule.isNull()) {
255  if (C.isCLibraryFunction(FDecl, "snprintf") ||
256  C.isCLibraryFunction(FDecl, "sprintf"))
257  return TaintPropagationRule(InvalidArgIndex, 0, true);
258  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
259  C.isCLibraryFunction(FDecl, "stpcpy") ||
260  C.isCLibraryFunction(FDecl, "strcat"))
261  return TaintPropagationRule(1, 0, true);
262  else if (C.isCLibraryFunction(FDecl, "bcopy"))
263  return TaintPropagationRule(0, 2, 1, false);
264  else if (C.isCLibraryFunction(FDecl, "strdup") ||
265  C.isCLibraryFunction(FDecl, "strdupa"))
266  return TaintPropagationRule(0, ReturnValueIndex);
267  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
268  return TaintPropagationRule(0, ReturnValueIndex);
269  }
270 
271  // Skipping the following functions, since they might be used for cleansing
272  // or smart memory copy:
273  // - memccpy - copying until hitting a special character.
274 
275  return TaintPropagationRule();
276 }
277 
278 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
279  CheckerContext &C) const {
280  // Check for errors first.
281  if (checkPre(CE, C))
282  return;
283 
284  // Add taint second.
285  addSourcesPre(CE, C);
286 }
287 
288 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
289  CheckerContext &C) const {
290  if (propagateFromPre(CE, C))
291  return;
292  addSourcesPost(CE, C);
293 }
294 
295 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
296  CheckerContext &C) const {
297  ProgramStateRef State = nullptr;
298  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
299  if (!FDecl || FDecl->getKind() != Decl::Function)
300  return;
301 
302  StringRef Name = C.getCalleeName(FDecl);
303  if (Name.empty())
304  return;
305 
306  // First, try generating a propagation rule for this function.
307  TaintPropagationRule Rule =
308  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
309  if (!Rule.isNull()) {
310  State = Rule.process(CE, C);
311  if (!State)
312  return;
313  C.addTransition(State);
314  return;
315  }
316 
317  // Otherwise, check if we have custom pre-processing implemented.
318  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
319  .Case("fscanf", &GenericTaintChecker::preFscanf)
320  .Default(nullptr);
321  // Check and evaluate the call.
322  if (evalFunction)
323  State = (this->*evalFunction)(CE, C);
324  if (!State)
325  return;
326  C.addTransition(State);
327 
328 }
329 
330 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
331  CheckerContext &C) const {
332  ProgramStateRef State = C.getState();
333 
334  // Depending on what was tainted at pre-visit, we determined a set of
335  // arguments which should be tainted after the function returns. These are
336  // stored in the state as TaintArgsOnPostVisit set.
337  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
338  if (TaintArgs.isEmpty())
339  return false;
340 
341  for (llvm::ImmutableSet<unsigned>::iterator
342  I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
343  unsigned ArgNum = *I;
344 
345  // Special handling for the tainted return value.
346  if (ArgNum == ReturnValueIndex) {
347  State = State->addTaint(CE, C.getLocationContext());
348  continue;
349  }
350 
351  // The arguments are pointer arguments. The data they are pointing at is
352  // tainted after the call.
353  if (CE->getNumArgs() < (ArgNum + 1))
354  return false;
355  const Expr* Arg = CE->getArg(ArgNum);
356  Optional<SVal> V = getPointedToSVal(C, Arg);
357  if (V)
358  State = State->addTaint(*V);
359  }
360 
361  // Clear up the taint info from the state.
362  State = State->remove<TaintArgsOnPostVisit>();
363 
364  if (State != C.getState()) {
365  C.addTransition(State);
366  return true;
367  }
368  return false;
369 }
370 
371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
372  CheckerContext &C) const {
373  // Define the attack surface.
374  // Set the evaluation function by switching on the callee name.
375  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
376  if (!FDecl || FDecl->getKind() != Decl::Function)
377  return;
378 
379  StringRef Name = C.getCalleeName(FDecl);
380  if (Name.empty())
381  return;
382  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
383  .Case("scanf", &GenericTaintChecker::postScanf)
384  // TODO: Add support for vfscanf & family.
385  .Case("getchar", &GenericTaintChecker::postRetTaint)
386  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
387  .Case("getenv", &GenericTaintChecker::postRetTaint)
388  .Case("fopen", &GenericTaintChecker::postRetTaint)
389  .Case("fdopen", &GenericTaintChecker::postRetTaint)
390  .Case("freopen", &GenericTaintChecker::postRetTaint)
391  .Case("getch", &GenericTaintChecker::postRetTaint)
392  .Case("wgetch", &GenericTaintChecker::postRetTaint)
393  .Case("socket", &GenericTaintChecker::postSocket)
394  .Default(nullptr);
395 
396  // If the callee isn't defined, it is not of security concern.
397  // Check and evaluate the call.
398  ProgramStateRef State = nullptr;
399  if (evalFunction)
400  State = (this->*evalFunction)(CE, C);
401  if (!State)
402  return;
403 
404  C.addTransition(State);
405 }
406 
407 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
408 
409  if (checkUncontrolledFormatString(CE, C))
410  return true;
411 
412  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
413  if (!FDecl || FDecl->getKind() != Decl::Function)
414  return false;
415 
416  StringRef Name = C.getCalleeName(FDecl);
417  if (Name.empty())
418  return false;
419 
420  if (checkSystemCall(CE, Name, C))
421  return true;
422 
423  if (checkTaintedBufferSize(CE, FDecl, C))
424  return true;
425 
426  return false;
427 }
428 
429 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
430  const Expr *Arg) {
431  ProgramStateRef State = C.getState();
432  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
433  if (AddrVal.isUnknownOrUndef())
434  return None;
435 
436  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
437  if (!AddrLoc)
438  return None;
439 
440  QualType ArgTy = Arg->getType().getCanonicalType();
441  if (!ArgTy->isPointerType())
442  return None;
443 
444  QualType ValTy = ArgTy->getPointeeType();
445 
446  // Do not dereference void pointers. Treat them as byte pointers instead.
447  // FIXME: we might want to consider more than just the first byte.
448  if (ValTy->isVoidType())
449  ValTy = C.getASTContext().CharTy;
450 
451  return State->getSVal(*AddrLoc, ValTy);
452 }
453 
455 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
456  CheckerContext &C) const {
457  ProgramStateRef State = C.getState();
458 
459  // Check for taint in arguments.
460  bool IsTainted = false;
461  for (ArgVector::const_iterator I = SrcArgs.begin(),
462  E = SrcArgs.end(); I != E; ++I) {
463  unsigned ArgNum = *I;
464 
465  if (ArgNum == InvalidArgIndex) {
466  // Check if any of the arguments is tainted, but skip the
467  // destination arguments.
468  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
469  if (isDestinationArgument(i))
470  continue;
471  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
472  break;
473  }
474  break;
475  }
476 
477  if (CE->getNumArgs() < (ArgNum + 1))
478  return State;
479  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
480  break;
481  }
482  if (!IsTainted)
483  return State;
484 
485  // Mark the arguments which should be tainted after the function returns.
486  for (ArgVector::const_iterator I = DstArgs.begin(),
487  E = DstArgs.end(); I != E; ++I) {
488  unsigned ArgNum = *I;
489 
490  // Should we mark all arguments as tainted?
491  if (ArgNum == InvalidArgIndex) {
492  // For all pointer and references that were passed in:
493  // If they are not pointing to const data, mark data as tainted.
494  // TODO: So far we are just going one level down; ideally we'd need to
495  // recurse here.
496  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
497  const Expr *Arg = CE->getArg(i);
498  // Process pointer argument.
499  const Type *ArgTy = Arg->getType().getTypePtr();
500  QualType PType = ArgTy->getPointeeType();
501  if ((!PType.isNull() && !PType.isConstQualified())
502  || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
503  State = State->add<TaintArgsOnPostVisit>(i);
504  }
505  continue;
506  }
507 
508  // Should mark the return value?
509  if (ArgNum == ReturnValueIndex) {
510  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
511  continue;
512  }
513 
514  // Mark the given argument.
515  assert(ArgNum < CE->getNumArgs());
516  State = State->add<TaintArgsOnPostVisit>(ArgNum);
517  }
518 
519  return State;
520 }
521 
522 
523 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
524 // and arg 1 should get taint.
525 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
526  CheckerContext &C) const {
527  assert(CE->getNumArgs() >= 2);
528  ProgramStateRef State = C.getState();
529 
530  // Check is the file descriptor is tainted.
531  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
532  isStdin(CE->getArg(0), C)) {
533  // All arguments except for the first two should get taint.
534  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
535  State = State->add<TaintArgsOnPostVisit>(i);
536  return State;
537  }
538 
539  return nullptr;
540 }
541 
542 
543 // If argument 0(protocol domain) is network, the return value should get taint.
544 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
545  CheckerContext &C) const {
546  ProgramStateRef State = C.getState();
547  if (CE->getNumArgs() < 3)
548  return State;
549 
550  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
551  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
552  // White list the internal communication protocols.
553  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
554  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
555  return State;
556  State = State->addTaint(CE, C.getLocationContext());
557  return State;
558 }
559 
560 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
561  CheckerContext &C) const {
562  ProgramStateRef State = C.getState();
563  if (CE->getNumArgs() < 2)
564  return State;
565 
566  // All arguments except for the very first one should get taint.
567  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
568  // The arguments are pointer arguments. The data they are pointing at is
569  // tainted after the call.
570  const Expr* Arg = CE->getArg(i);
571  Optional<SVal> V = getPointedToSVal(C, Arg);
572  if (V)
573  State = State->addTaint(*V);
574  }
575  return State;
576 }
577 
578 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
579  CheckerContext &C) const {
580  return C.getState()->addTaint(CE, C.getLocationContext());
581 }
582 
583 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
584  ProgramStateRef State = C.getState();
585  SVal Val = C.getSVal(E);
586 
587  // stdin is a pointer, so it would be a region.
588  const MemRegion *MemReg = Val.getAsRegion();
589 
590  // The region should be symbolic, we do not know it's value.
591  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
592  if (!SymReg)
593  return false;
594 
595  // Get it's symbol and find the declaration region it's pointing to.
596  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
597  if (!Sm)
598  return false;
599  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
600  if (!DeclReg)
601  return false;
602 
603  // This region corresponds to a declaration, find out if it's a global/extern
604  // variable named stdin with the proper type.
605  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
606  D = D->getCanonicalDecl();
607  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
608  if (const PointerType * PtrTy =
609  dyn_cast<PointerType>(D->getType().getTypePtr()))
610  if (PtrTy->getPointeeType().getCanonicalType() ==
611  C.getASTContext().getFILEType().getCanonicalType())
612  return true;
613  }
614  return false;
615 }
616 
617 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
618  const CheckerContext &C,
619  unsigned int &ArgNum) {
620  // Find if the function contains a format string argument.
621  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
622  // vsnprintf, syslog, custom annotated functions.
623  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
624  if (!FDecl)
625  return false;
626  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
627  ArgNum = Format->getFormatIdx() - 1;
628  if ((Format->getType()->getName() == "printf") &&
629  CE->getNumArgs() > ArgNum)
630  return true;
631  }
632 
633  // Or if a function is named setproctitle (this is a heuristic).
634  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
635  ArgNum = 0;
636  return true;
637  }
638 
639  return false;
640 }
641 
642 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
643  const char Msg[],
644  CheckerContext &C) const {
645  assert(E);
646 
647  // Check for taint.
648  ProgramStateRef State = C.getState();
649  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
650  SVal TaintedSVal;
651  if (PointedToSVal && State->isTainted(*PointedToSVal))
652  TaintedSVal = *PointedToSVal;
653  else if (State->isTainted(E, C.getLocationContext()))
654  TaintedSVal = C.getSVal(E);
655  else
656  return false;
657 
658  // Generate diagnostic.
659  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
660  initBugType();
661  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
662  report->addRange(E->getSourceRange());
663  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
664  C.emitReport(std::move(report));
665  return true;
666  }
667  return false;
668 }
669 
670 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
671  CheckerContext &C) const{
672  // Check if the function contains a format string argument.
673  unsigned int ArgNum = 0;
674  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
675  return false;
676 
677  // If either the format string content or the pointer itself are tainted, warn.
678  return generateReportIfTainted(CE->getArg(ArgNum),
679  MsgUncontrolledFormatString, C);
680 }
681 
682 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
683  StringRef Name,
684  CheckerContext &C) const {
685  // TODO: It might make sense to run this check on demand. In some cases,
686  // we should check if the environment has been cleansed here. We also might
687  // need to know if the user was reset before these calls(seteuid).
688  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
689  .Case("system", 0)
690  .Case("popen", 0)
691  .Case("execl", 0)
692  .Case("execle", 0)
693  .Case("execlp", 0)
694  .Case("execv", 0)
695  .Case("execvp", 0)
696  .Case("execvP", 0)
697  .Case("execve", 0)
698  .Case("dlopen", 0)
699  .Default(UINT_MAX);
700 
701  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
702  return false;
703 
704  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
705 }
706 
707 // TODO: Should this check be a part of the CString checker?
708 // If yes, should taint be a global setting?
709 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
710  const FunctionDecl *FDecl,
711  CheckerContext &C) const {
712  // If the function has a buffer size argument, set ArgNum.
713  unsigned ArgNum = InvalidArgIndex;
714  unsigned BId = 0;
715  if ( (BId = FDecl->getMemoryFunctionKind()) )
716  switch(BId) {
717  case Builtin::BImemcpy:
718  case Builtin::BImemmove:
719  case Builtin::BIstrncpy:
720  ArgNum = 2;
721  break;
722  case Builtin::BIstrndup:
723  ArgNum = 1;
724  break;
725  default:
726  break;
727  };
728 
729  if (ArgNum == InvalidArgIndex) {
730  if (C.isCLibraryFunction(FDecl, "malloc") ||
731  C.isCLibraryFunction(FDecl, "calloc") ||
732  C.isCLibraryFunction(FDecl, "alloca"))
733  ArgNum = 0;
734  else if (C.isCLibraryFunction(FDecl, "memccpy"))
735  ArgNum = 3;
736  else if (C.isCLibraryFunction(FDecl, "realloc"))
737  ArgNum = 1;
738  else if (C.isCLibraryFunction(FDecl, "bcopy"))
739  ArgNum = 2;
740  }
741 
742  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
743  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
744 }
745 
746 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
747  mgr.registerChecker<GenericTaintChecker>();
748 }
Represents a function declaration or definition.
Definition: Decl.h:1739
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3655
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2543
A (possibly-)qualified type.
Definition: Type.h:642
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2480
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2468
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1415
constexpr XRayInstrMask Function
Definition: XRayInstr.h:39
Represents a variable declaration or definition.
Definition: Decl.h:812
LineState State
bool isReferenceType() const
Definition: Type.h:6294
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6058
#define UINT_MAX
Definition: limits.h:72
This represents one expression.
Definition: Expr.h:106
QualType getType() const
Definition: Expr.h:128
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:707
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6117
QualType getCanonicalType() const
Definition: Type.h:6097
Encodes a location in the source.
constexpr XRayInstrMask None
Definition: XRayInstr.h:38
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:216
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:421
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:513
bool isVoidType() const
Definition: Type.h:6530
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:276
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2407
bool isPointerType() const
Definition: Type.h:6282
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.
Definition: Expr.cpp:2523