clang  9.0.0svn
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
17 #include "clang/AST/Attr.h"
18 #include "clang/Basic/Builtins.h"
24 #include <climits>
25 #include <initializer_list>
26 #include <utility>
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class GenericTaintChecker
33  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
34 public:
35  static void *getTag() {
36  static int Tag;
37  return &Tag;
38  }
39 
40  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
41 
42  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
43 
44 private:
45  static const unsigned InvalidArgIndex = UINT_MAX;
46  /// Denotes the return vale.
47  static const unsigned ReturnValueIndex = UINT_MAX - 1;
48 
49  mutable std::unique_ptr<BugType> BT;
50  void initBugType() const {
51  if (!BT)
52  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
53  }
54 
55  /// Catch taint related bugs. Check if tainted data is passed to a
56  /// system call etc.
57  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
58 
59  /// Add taint sources on a pre-visit.
60  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
61 
62  /// Propagate taint generated at pre-visit.
63  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
64 
65  /// Check if the region the expression evaluates to is the standard input,
66  /// and thus, is tainted.
67  static bool isStdin(const Expr *E, CheckerContext &C);
68 
69  /// Given a pointer argument, return the value it points to.
70  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
71 
72  /// Check for CWE-134: Uncontrolled Format String.
73  static const char MsgUncontrolledFormatString[];
74  bool checkUncontrolledFormatString(const CallExpr *CE,
75  CheckerContext &C) const;
76 
77  /// Check for:
78  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
79  /// CWE-78, "Failure to Sanitize Data into an OS Command"
80  static const char MsgSanitizeSystemArgs[];
81  bool checkSystemCall(const CallExpr *CE, StringRef Name,
82  CheckerContext &C) const;
83 
84  /// Check if tainted data is used as a buffer size ins strn.. functions,
85  /// and allocators.
86  static const char MsgTaintedBufferSize[];
87  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
88  CheckerContext &C) const;
89 
90  /// Generate a report if the expression is tainted or points to tainted data.
91  bool generateReportIfTainted(const Expr *E, const char Msg[],
92  CheckerContext &C) const;
93 
94  using ArgVector = SmallVector<unsigned, 2>;
95 
96  /// A struct used to specify taint propagation rules for a function.
97  ///
98  /// If any of the possible taint source arguments is tainted, all of the
99  /// destination arguments should also be tainted. Use InvalidArgIndex in the
100  /// src list to specify that all of the arguments can introduce taint. Use
101  /// InvalidArgIndex in the dst arguments to signify that all the non-const
102  /// pointer and reference arguments might be tainted on return. If
103  /// ReturnValueIndex is added to the dst list, the return value will be
104  /// tainted.
105  struct TaintPropagationRule {
106  enum class VariadicType { None, Src, Dst };
107 
108  using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
109  CheckerContext &C);
110 
111  /// List of arguments which can be taint sources and should be checked.
112  ArgVector SrcArgs;
113  /// List of arguments which should be tainted on function return.
114  ArgVector DstArgs;
115  /// Index for the first variadic parameter if exist.
116  unsigned VariadicIndex;
117  /// Show when a function has variadic parameters. If it has, it marks all
118  /// of them as source or destination.
119  VariadicType VarType;
120  /// Special function for tainted source determination. If defined, it can
121  /// override the default behavior.
122  PropagationFuncType PropagationFunc;
123 
124  TaintPropagationRule()
125  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
126  PropagationFunc(nullptr) {}
127 
128  TaintPropagationRule(std::initializer_list<unsigned> &&Src,
129  std::initializer_list<unsigned> &&Dst,
130  VariadicType Var = VariadicType::None,
131  unsigned VarIndex = InvalidArgIndex,
132  PropagationFuncType Func = nullptr)
133  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
134  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
135 
136  /// Get the propagation rule for a given function.
137  static TaintPropagationRule
138  getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
139  CheckerContext &C);
140 
141  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
142  void addDstArg(unsigned A) { DstArgs.push_back(A); }
143 
144  bool isNull() const {
145  return SrcArgs.empty() && DstArgs.empty() &&
146  VariadicType::None == VarType;
147  }
148 
149  bool isDestinationArgument(unsigned ArgNum) const {
150  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
151  }
152 
153  static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
154  CheckerContext &C) {
155  if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
156  return true;
157 
158  if (!E->getType().getTypePtr()->isPointerType())
159  return false;
160 
161  Optional<SVal> V = getPointedToSVal(C, E);
162  return (V && State->isTainted(*V));
163  }
164 
165  /// Pre-process a function which propagates taint according to the
166  /// taint rule.
167  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
168 
169  // Functions for custom taintedness propagation.
170  static bool postSocket(bool IsTainted, const CallExpr *CE,
171  CheckerContext &C);
172  };
173 };
174 
175 const unsigned GenericTaintChecker::ReturnValueIndex;
176 const unsigned GenericTaintChecker::InvalidArgIndex;
177 
178 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
179  "Untrusted data is used as a format string "
180  "(CWE-134: Uncontrolled Format String)";
181 
182 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
183  "Untrusted data is passed to a system call "
184  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
185 
186 const char GenericTaintChecker::MsgTaintedBufferSize[] =
187  "Untrusted data is used to specify the buffer size "
188  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
189  "for character data and the null terminator)";
190 
191 } // end of anonymous namespace
192 
193 /// A set which is used to pass information from call pre-visit instruction
194 /// to the call post-visit. The values are unsigned integers, which are either
195 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
196 /// points to data, which should be tainted on return.
197 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
198 
199 GenericTaintChecker::TaintPropagationRule
200 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
201  const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
202  // TODO: Currently, we might lose precision here: we always mark a return
203  // value as tainted even if it's just a pointer, pointing to tainted data.
204 
205  // Check for exact name match for functions without builtin substitutes.
206  TaintPropagationRule Rule =
207  llvm::StringSwitch<TaintPropagationRule>(Name)
208  // Source functions
209  // TODO: Add support for vfscanf & family.
210  .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
211  .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
212  .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
213  .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
214  .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
215  .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
216  .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
217  .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
218  .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
219  .Case("socket",
220  TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
221  InvalidArgIndex,
222  &TaintPropagationRule::postSocket))
223  .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
224  // Propagating functions
225  .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
226  .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
227  .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
228  .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
229  .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
230  .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
231  .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
232  .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
233  .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
234  .Case("getdelim", TaintPropagationRule({3}, {0}))
235  .Case("getline", TaintPropagationRule({2}, {0}))
236  .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
237  .Case("pread",
238  TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
239  .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
240  .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
241  .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
242  .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
243  .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
244  .Default(TaintPropagationRule());
245 
246  if (!Rule.isNull())
247  return Rule;
248 
249  // Check if it's one of the memory setting/copying functions.
250  // This check is specialized but faster then calling isCLibraryFunction.
251  unsigned BId = 0;
252  if ((BId = FDecl->getMemoryFunctionKind()))
253  switch (BId) {
254  case Builtin::BImemcpy:
255  case Builtin::BImemmove:
256  case Builtin::BIstrncpy:
257  case Builtin::BIstrncat:
258  return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
259  case Builtin::BIstrlcpy:
260  case Builtin::BIstrlcat:
261  return TaintPropagationRule({1, 2}, {0});
262  case Builtin::BIstrndup:
263  return TaintPropagationRule({0, 1}, {ReturnValueIndex});
264 
265  default:
266  break;
267  };
268 
269  // Process all other functions which could be defined as builtins.
270  if (Rule.isNull()) {
271  if (C.isCLibraryFunction(FDecl, "snprintf"))
272  return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
273  3);
274  else if (C.isCLibraryFunction(FDecl, "sprintf"))
275  return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
276  2);
277  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
278  C.isCLibraryFunction(FDecl, "stpcpy") ||
279  C.isCLibraryFunction(FDecl, "strcat"))
280  return TaintPropagationRule({1}, {0, ReturnValueIndex});
281  else if (C.isCLibraryFunction(FDecl, "bcopy"))
282  return TaintPropagationRule({0, 2}, {1});
283  else if (C.isCLibraryFunction(FDecl, "strdup") ||
284  C.isCLibraryFunction(FDecl, "strdupa"))
285  return TaintPropagationRule({0}, {ReturnValueIndex});
286  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
287  return TaintPropagationRule({0}, {ReturnValueIndex});
288  }
289 
290  // Skipping the following functions, since they might be used for cleansing
291  // or smart memory copy:
292  // - memccpy - copying until hitting a special character.
293 
294  return TaintPropagationRule();
295 }
296 
297 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
298  CheckerContext &C) const {
299  // Check for taintedness related errors first: system call, uncontrolled
300  // format string, tainted buffer size.
301  if (checkPre(CE, C))
302  return;
303 
304  // Marks the function's arguments and/or return value tainted if it present in
305  // the list.
306  addSourcesPre(CE, C);
307 }
308 
309 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
310  CheckerContext &C) const {
311  // Set the marked values as tainted. The return value only accessible from
312  // checkPostStmt.
313  propagateFromPre(CE, C);
314 }
315 
316 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
317  CheckerContext &C) const {
318  ProgramStateRef State = nullptr;
319  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
320  if (!FDecl || FDecl->getKind() != Decl::Function)
321  return;
322 
323  StringRef Name = C.getCalleeName(FDecl);
324  if (Name.empty())
325  return;
326 
327  // First, try generating a propagation rule for this function.
328  TaintPropagationRule Rule =
329  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
330  if (!Rule.isNull()) {
331  State = Rule.process(CE, C);
332  if (!State)
333  return;
334  C.addTransition(State);
335  return;
336  }
337 
338  if (!State)
339  return;
340  C.addTransition(State);
341 }
342 
343 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
344  CheckerContext &C) const {
345  ProgramStateRef State = C.getState();
346 
347  // Depending on what was tainted at pre-visit, we determined a set of
348  // arguments which should be tainted after the function returns. These are
349  // stored in the state as TaintArgsOnPostVisit set.
350  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
351  if (TaintArgs.isEmpty())
352  return false;
353 
354  for (unsigned ArgNum : TaintArgs) {
355  // Special handling for the tainted return value.
356  if (ArgNum == ReturnValueIndex) {
357  State = State->addTaint(CE, C.getLocationContext());
358  continue;
359  }
360 
361  // The arguments are pointer arguments. The data they are pointing at is
362  // tainted after the call.
363  if (CE->getNumArgs() < (ArgNum + 1))
364  return false;
365  const Expr *Arg = CE->getArg(ArgNum);
366  Optional<SVal> V = getPointedToSVal(C, Arg);
367  if (V)
368  State = State->addTaint(*V);
369  }
370 
371  // Clear up the taint info from the state.
372  State = State->remove<TaintArgsOnPostVisit>();
373 
374  if (State != C.getState()) {
375  C.addTransition(State);
376  return true;
377  }
378  return false;
379 }
380 
381 bool GenericTaintChecker::checkPre(const CallExpr *CE,
382  CheckerContext &C) const {
383 
384  if (checkUncontrolledFormatString(CE, C))
385  return true;
386 
387  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
388  if (!FDecl || FDecl->getKind() != Decl::Function)
389  return false;
390 
391  StringRef Name = C.getCalleeName(FDecl);
392  if (Name.empty())
393  return false;
394 
395  if (checkSystemCall(CE, Name, C))
396  return true;
397 
398  if (checkTaintedBufferSize(CE, FDecl, C))
399  return true;
400 
401  return false;
402 }
403 
404 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
405  const Expr *Arg) {
406  ProgramStateRef State = C.getState();
407  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
408  if (AddrVal.isUnknownOrUndef())
409  return None;
410 
411  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
412  if (!AddrLoc)
413  return None;
414 
415  QualType ArgTy = Arg->getType().getCanonicalType();
416  if (!ArgTy->isPointerType())
417  return None;
418 
419  QualType ValTy = ArgTy->getPointeeType();
420 
421  // Do not dereference void pointers. Treat them as byte pointers instead.
422  // FIXME: we might want to consider more than just the first byte.
423  if (ValTy->isVoidType())
424  ValTy = C.getASTContext().CharTy;
425 
426  return State->getSVal(*AddrLoc, ValTy);
427 }
428 
430 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
431  CheckerContext &C) const {
432  ProgramStateRef State = C.getState();
433 
434  // Check for taint in arguments.
435  bool IsTainted = true;
436  for (unsigned ArgNum : SrcArgs) {
437  if (ArgNum >= CE->getNumArgs())
438  return State;
439  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
440  break;
441  }
442 
443  // Check for taint in variadic arguments.
444  if (!IsTainted && VariadicType::Src == VarType) {
445  // Check if any of the arguments is tainted
446  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
447  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
448  break;
449  }
450  }
451 
452  if (PropagationFunc)
453  IsTainted = PropagationFunc(IsTainted, CE, C);
454 
455  if (!IsTainted)
456  return State;
457 
458  // Mark the arguments which should be tainted after the function returns.
459  for (unsigned ArgNum : DstArgs) {
460  // Should mark the return value?
461  if (ArgNum == ReturnValueIndex) {
462  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
463  continue;
464  }
465 
466  // Mark the given argument.
467  assert(ArgNum < CE->getNumArgs());
468  State = State->add<TaintArgsOnPostVisit>(ArgNum);
469  }
470 
471  // Mark all variadic arguments tainted if present.
472  if (VariadicType::Dst == VarType) {
473  // For all pointer and references that were passed in:
474  // If they are not pointing to const data, mark data as tainted.
475  // TODO: So far we are just going one level down; ideally we'd need to
476  // recurse here.
477  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
478  const Expr *Arg = CE->getArg(i);
479  // Process pointer argument.
480  const Type *ArgTy = Arg->getType().getTypePtr();
481  QualType PType = ArgTy->getPointeeType();
482  if ((!PType.isNull() && !PType.isConstQualified()) ||
483  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
484  State = State->add<TaintArgsOnPostVisit>(i);
485  }
486  }
487 
488  return State;
489 }
490 
491 // If argument 0(protocol domain) is network, the return value should get taint.
492 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
493  const CallExpr *CE,
494  CheckerContext &C) {
495  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
496  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
497  // White list the internal communication protocols.
498  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
499  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
500  return false;
501 
502  return true;
503 }
504 
505 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
506  ProgramStateRef State = C.getState();
507  SVal Val = C.getSVal(E);
508 
509  // stdin is a pointer, so it would be a region.
510  const MemRegion *MemReg = Val.getAsRegion();
511 
512  // The region should be symbolic, we do not know it's value.
513  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
514  if (!SymReg)
515  return false;
516 
517  // Get it's symbol and find the declaration region it's pointing to.
518  const SymbolRegionValue *Sm =
519  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
520  if (!Sm)
521  return false;
522  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
523  if (!DeclReg)
524  return false;
525 
526  // This region corresponds to a declaration, find out if it's a global/extern
527  // variable named stdin with the proper type.
528  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
529  D = D->getCanonicalDecl();
530  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
531  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
532  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
533  C.getASTContext().getFILEType().getCanonicalType())
534  return true;
535  }
536  }
537  return false;
538 }
539 
540 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
541  const CheckerContext &C,
542  unsigned int &ArgNum) {
543  // Find if the function contains a format string argument.
544  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
545  // vsnprintf, syslog, custom annotated functions.
546  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
547  if (!FDecl)
548  return false;
549  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
550  ArgNum = Format->getFormatIdx() - 1;
551  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
552  return true;
553  }
554 
555  // Or if a function is named setproctitle (this is a heuristic).
556  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
557  ArgNum = 0;
558  return true;
559  }
560 
561  return false;
562 }
563 
564 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
565  const char Msg[],
566  CheckerContext &C) const {
567  assert(E);
568 
569  // Check for taint.
570  ProgramStateRef State = C.getState();
571  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
572  SVal TaintedSVal;
573  if (PointedToSVal && State->isTainted(*PointedToSVal))
574  TaintedSVal = *PointedToSVal;
575  else if (State->isTainted(E, C.getLocationContext()))
576  TaintedSVal = C.getSVal(E);
577  else
578  return false;
579 
580  // Generate diagnostic.
581  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
582  initBugType();
583  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
584  report->addRange(E->getSourceRange());
585  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
586  C.emitReport(std::move(report));
587  return true;
588  }
589  return false;
590 }
591 
592 bool GenericTaintChecker::checkUncontrolledFormatString(
593  const CallExpr *CE, CheckerContext &C) const {
594  // Check if the function contains a format string argument.
595  unsigned int ArgNum = 0;
596  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
597  return false;
598 
599  // If either the format string content or the pointer itself are tainted,
600  // warn.
601  return generateReportIfTainted(CE->getArg(ArgNum),
602  MsgUncontrolledFormatString, C);
603 }
604 
605 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
606  CheckerContext &C) const {
607  // TODO: It might make sense to run this check on demand. In some cases,
608  // we should check if the environment has been cleansed here. We also might
609  // need to know if the user was reset before these calls(seteuid).
610  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
611  .Case("system", 0)
612  .Case("popen", 0)
613  .Case("execl", 0)
614  .Case("execle", 0)
615  .Case("execlp", 0)
616  .Case("execv", 0)
617  .Case("execvp", 0)
618  .Case("execvP", 0)
619  .Case("execve", 0)
620  .Case("dlopen", 0)
621  .Default(UINT_MAX);
622 
623  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
624  return false;
625 
626  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
627 }
628 
629 // TODO: Should this check be a part of the CString checker?
630 // If yes, should taint be a global setting?
631 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
632  const FunctionDecl *FDecl,
633  CheckerContext &C) const {
634  // If the function has a buffer size argument, set ArgNum.
635  unsigned ArgNum = InvalidArgIndex;
636  unsigned BId = 0;
637  if ((BId = FDecl->getMemoryFunctionKind()))
638  switch (BId) {
639  case Builtin::BImemcpy:
640  case Builtin::BImemmove:
641  case Builtin::BIstrncpy:
642  ArgNum = 2;
643  break;
644  case Builtin::BIstrndup:
645  ArgNum = 1;
646  break;
647  default:
648  break;
649  };
650 
651  if (ArgNum == InvalidArgIndex) {
652  if (C.isCLibraryFunction(FDecl, "malloc") ||
653  C.isCLibraryFunction(FDecl, "calloc") ||
654  C.isCLibraryFunction(FDecl, "alloca"))
655  ArgNum = 0;
656  else if (C.isCLibraryFunction(FDecl, "memccpy"))
657  ArgNum = 3;
658  else if (C.isCLibraryFunction(FDecl, "realloc"))
659  ArgNum = 1;
660  else if (C.isCLibraryFunction(FDecl, "bcopy"))
661  ArgNum = 2;
662  }
663 
664  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
665  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
666 }
667 
668 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
669  mgr.registerChecker<GenericTaintChecker>();
670 }
671 
672 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
673  return true;
674 }
Represents a function declaration or definition.
Definition: Decl.h:1737
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3644
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2547
A (possibly-)qualified type.
Definition: Type.h:639
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2586
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2573
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1414
constexpr XRayInstrMask Function
Definition: XRayInstr.h:38
LineState State
Definition: Format.h:2153
bool isReferenceType() const
Definition: Type.h:6318
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6082
#define UINT_MAX
Definition: limits.h:72
This represents one expression.
Definition: Expr.h:108
#define bool
Definition: stdbool.h:31
QualType getType() const
Definition: Expr.h:130
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:704
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6141
QualType getCanonicalType() const
Definition: Type.h:6121
Encodes a location in the source.
constexpr XRayInstrMask None
Definition: XRayInstr.h:37
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:214
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:423
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:515
bool isVoidType() const
Definition: Type.h:6558
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:251
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2429
bool isPointerType() const
Definition: Type.h:6306
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2719