clang  9.0.0svn
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
17 #include "clang/AST/Attr.h"
18 #include "clang/Basic/Builtins.h"
24 #include <climits>
25 #include <initializer_list>
26 #include <utility>
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class GenericTaintChecker
33  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
34 public:
35  static void *getTag() {
36  static int Tag;
37  return &Tag;
38  }
39 
40  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
41 
42  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
43 
44 private:
45  static const unsigned InvalidArgIndex = UINT_MAX;
46  /// Denotes the return vale.
47  static const unsigned ReturnValueIndex = UINT_MAX - 1;
48 
49  mutable std::unique_ptr<BugType> BT;
50  void initBugType() const {
51  if (!BT)
52  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
53  }
54 
55  /// Catch taint related bugs. Check if tainted data is passed to a
56  /// system call etc.
57  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
58 
59  /// Add taint sources on a pre-visit.
60  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
61 
62  /// Propagate taint generated at pre-visit.
63  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
64 
65  /// Add taint sources on a post visit.
66  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
67 
68  /// Check if the region the expression evaluates to is the standard input,
69  /// and thus, is tainted.
70  static bool isStdin(const Expr *E, CheckerContext &C);
71 
72  /// Given a pointer argument, return the value it points to.
73  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
74 
75  /// Functions defining the attack surface.
76  using FnCheck = ProgramStateRef (GenericTaintChecker::*)(
77  const CallExpr *, CheckerContext &C) const;
78  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
79  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
80  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
81 
82  /// Taint the scanned input if the file is tainted.
83  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
84 
85  /// Check for CWE-134: Uncontrolled Format String.
86  static const char MsgUncontrolledFormatString[];
87  bool checkUncontrolledFormatString(const CallExpr *CE,
88  CheckerContext &C) const;
89 
90  /// Check for:
91  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
92  /// CWE-78, "Failure to Sanitize Data into an OS Command"
93  static const char MsgSanitizeSystemArgs[];
94  bool checkSystemCall(const CallExpr *CE, StringRef Name,
95  CheckerContext &C) const;
96 
97  /// Check if tainted data is used as a buffer size ins strn.. functions,
98  /// and allocators.
99  static const char MsgTaintedBufferSize[];
100  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
101  CheckerContext &C) const;
102 
103  /// Generate a report if the expression is tainted or points to tainted data.
104  bool generateReportIfTainted(const Expr *E, const char Msg[],
105  CheckerContext &C) const;
106 
107  using ArgVector = SmallVector<unsigned, 2>;
108 
109  /// A struct used to specify taint propagation rules for a function.
110  ///
111  /// If any of the possible taint source arguments is tainted, all of the
112  /// destination arguments should also be tainted. Use InvalidArgIndex in the
113  /// src list to specify that all of the arguments can introduce taint. Use
114  /// InvalidArgIndex in the dst arguments to signify that all the non-const
115  /// pointer and reference arguments might be tainted on return. If
116  /// ReturnValueIndex is added to the dst list, the return value will be
117  /// tainted.
118  struct TaintPropagationRule {
119  enum class VariadicType { None, Src, Dst };
120 
121  /// List of arguments which can be taint sources and should be checked.
122  ArgVector SrcArgs;
123  /// List of arguments which should be tainted on function return.
124  ArgVector DstArgs;
125  /// Index for the first variadic parameter if exist.
126  unsigned VariadicIndex;
127  /// Show when a function has variadic parameters. If it has, it marks all
128  /// of them as source or destination.
129  VariadicType VarType;
130 
131  TaintPropagationRule()
132  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {}
133 
134  TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135  std::initializer_list<unsigned> &&Dst,
136  VariadicType Var = VariadicType::None,
137  unsigned VarIndex = InvalidArgIndex)
138  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
139  VariadicIndex(VarIndex), VarType(Var) {}
140 
141  /// Get the propagation rule for a given function.
142  static TaintPropagationRule
143  getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
144  CheckerContext &C);
145 
146  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
147  void addDstArg(unsigned A) { DstArgs.push_back(A); }
148 
149  bool isNull() const {
150  return SrcArgs.empty() && DstArgs.empty() &&
151  VariadicType::None == VarType;
152  }
153 
154  bool isDestinationArgument(unsigned ArgNum) const {
155  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
156  }
157 
158  static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
159  CheckerContext &C) {
160  if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
161  return true;
162 
163  if (!E->getType().getTypePtr()->isPointerType())
164  return false;
165 
166  Optional<SVal> V = getPointedToSVal(C, E);
167  return (V && State->isTainted(*V));
168  }
169 
170  /// Pre-process a function which propagates taint according to the
171  /// taint rule.
172  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
173  };
174 };
175 
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
178 
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180  "Untrusted data is used as a format string "
181  "(CWE-134: Uncontrolled Format String)";
182 
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184  "Untrusted data is passed to a system call "
185  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
186 
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188  "Untrusted data is used to specify the buffer size "
189  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
190  "for character data and the null terminator)";
191 
192 } // end of anonymous namespace
193 
194 /// A set which is used to pass information from call pre-visit instruction
195 /// to the call post-visit. The values are unsigned integers, which are either
196 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
197 /// points to data, which should be tainted on return.
198 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
199 
200 GenericTaintChecker::TaintPropagationRule
201 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
202  const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
203  // TODO: Currently, we might lose precision here: we always mark a return
204  // value as tainted even if it's just a pointer, pointing to tainted data.
205 
206  // Check for exact name match for functions without builtin substitutes.
207  TaintPropagationRule Rule =
208  llvm::StringSwitch<TaintPropagationRule>(Name)
209  .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
210  .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
211  .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
212  .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
213  .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
214  .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
215  .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
216  .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
217  .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
218  .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
219  .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
220  .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
221  .Case("pread",
222  TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
223  .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224  .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
225  .Case("getline", TaintPropagationRule({2}, {0}))
226  .Case("getdelim", TaintPropagationRule({3}, {0}))
227  .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
228  .Default(TaintPropagationRule());
229 
230  if (!Rule.isNull())
231  return Rule;
232 
233  // Check if it's one of the memory setting/copying functions.
234  // This check is specialized but faster then calling isCLibraryFunction.
235  unsigned BId = 0;
236  if ((BId = FDecl->getMemoryFunctionKind()))
237  switch (BId) {
238  case Builtin::BImemcpy:
239  case Builtin::BImemmove:
240  case Builtin::BIstrncpy:
241  case Builtin::BIstrncat:
242  return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
243  case Builtin::BIstrlcpy:
244  case Builtin::BIstrlcat:
245  return TaintPropagationRule({1, 2}, {0});
246  case Builtin::BIstrndup:
247  return TaintPropagationRule({0, 1}, {ReturnValueIndex});
248 
249  default:
250  break;
251  };
252 
253  // Process all other functions which could be defined as builtins.
254  if (Rule.isNull()) {
255  if (C.isCLibraryFunction(FDecl, "snprintf"))
256  return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
257  3);
258  else if (C.isCLibraryFunction(FDecl, "sprintf"))
259  return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
260  2);
261  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
262  C.isCLibraryFunction(FDecl, "stpcpy") ||
263  C.isCLibraryFunction(FDecl, "strcat"))
264  return TaintPropagationRule({1}, {0, ReturnValueIndex});
265  else if (C.isCLibraryFunction(FDecl, "bcopy"))
266  return TaintPropagationRule({0, 2}, {1});
267  else if (C.isCLibraryFunction(FDecl, "strdup") ||
268  C.isCLibraryFunction(FDecl, "strdupa"))
269  return TaintPropagationRule({0}, {ReturnValueIndex});
270  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
271  return TaintPropagationRule({0}, {ReturnValueIndex});
272  }
273 
274  // Skipping the following functions, since they might be used for cleansing
275  // or smart memory copy:
276  // - memccpy - copying until hitting a special character.
277 
278  return TaintPropagationRule();
279 }
280 
281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
282  CheckerContext &C) const {
283  // Check for errors first.
284  if (checkPre(CE, C))
285  return;
286 
287  // Add taint second.
288  addSourcesPre(CE, C);
289 }
290 
291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
292  CheckerContext &C) const {
293  if (propagateFromPre(CE, C))
294  return;
295  addSourcesPost(CE, C);
296 }
297 
298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
299  CheckerContext &C) const {
300  ProgramStateRef State = nullptr;
301  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
302  if (!FDecl || FDecl->getKind() != Decl::Function)
303  return;
304 
305  StringRef Name = C.getCalleeName(FDecl);
306  if (Name.empty())
307  return;
308 
309  // First, try generating a propagation rule for this function.
310  TaintPropagationRule Rule =
311  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
312  if (!Rule.isNull()) {
313  State = Rule.process(CE, C);
314  if (!State)
315  return;
316  C.addTransition(State);
317  return;
318  }
319 
320  // Otherwise, check if we have custom pre-processing implemented.
321  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
322  .Case("fscanf", &GenericTaintChecker::preFscanf)
323  .Default(nullptr);
324  // Check and evaluate the call.
325  if (evalFunction)
326  State = (this->*evalFunction)(CE, C);
327  if (!State)
328  return;
329  C.addTransition(State);
330 }
331 
332 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
333  CheckerContext &C) const {
334  ProgramStateRef State = C.getState();
335 
336  // Depending on what was tainted at pre-visit, we determined a set of
337  // arguments which should be tainted after the function returns. These are
338  // stored in the state as TaintArgsOnPostVisit set.
339  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
340  if (TaintArgs.isEmpty())
341  return false;
342 
343  for (unsigned ArgNum : TaintArgs) {
344  // Special handling for the tainted return value.
345  if (ArgNum == ReturnValueIndex) {
346  State = State->addTaint(CE, C.getLocationContext());
347  continue;
348  }
349 
350  // The arguments are pointer arguments. The data they are pointing at is
351  // tainted after the call.
352  if (CE->getNumArgs() < (ArgNum + 1))
353  return false;
354  const Expr *Arg = CE->getArg(ArgNum);
355  Optional<SVal> V = getPointedToSVal(C, Arg);
356  if (V)
357  State = State->addTaint(*V);
358  }
359 
360  // Clear up the taint info from the state.
361  State = State->remove<TaintArgsOnPostVisit>();
362 
363  if (State != C.getState()) {
364  C.addTransition(State);
365  return true;
366  }
367  return false;
368 }
369 
370 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
371  CheckerContext &C) const {
372  // Define the attack surface.
373  // Set the evaluation function by switching on the callee name.
374  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
375  if (!FDecl || FDecl->getKind() != Decl::Function)
376  return;
377 
378  StringRef Name = C.getCalleeName(FDecl);
379  if (Name.empty())
380  return;
381  FnCheck evalFunction =
382  llvm::StringSwitch<FnCheck>(Name)
383  .Case("scanf", &GenericTaintChecker::postScanf)
384  // TODO: Add support for vfscanf & family.
385  .Case("getchar", &GenericTaintChecker::postRetTaint)
386  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
387  .Case("getenv", &GenericTaintChecker::postRetTaint)
388  .Case("fopen", &GenericTaintChecker::postRetTaint)
389  .Case("fdopen", &GenericTaintChecker::postRetTaint)
390  .Case("freopen", &GenericTaintChecker::postRetTaint)
391  .Case("getch", &GenericTaintChecker::postRetTaint)
392  .Case("wgetch", &GenericTaintChecker::postRetTaint)
393  .Case("socket", &GenericTaintChecker::postSocket)
394  .Default(nullptr);
395 
396  // If the callee isn't defined, it is not of security concern.
397  // Check and evaluate the call.
398  ProgramStateRef State = nullptr;
399  if (evalFunction)
400  State = (this->*evalFunction)(CE, C);
401  if (!State)
402  return;
403 
404  C.addTransition(State);
405 }
406 
407 bool GenericTaintChecker::checkPre(const CallExpr *CE,
408  CheckerContext &C) const {
409 
410  if (checkUncontrolledFormatString(CE, C))
411  return true;
412 
413  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
414  if (!FDecl || FDecl->getKind() != Decl::Function)
415  return false;
416 
417  StringRef Name = C.getCalleeName(FDecl);
418  if (Name.empty())
419  return false;
420 
421  if (checkSystemCall(CE, Name, C))
422  return true;
423 
424  if (checkTaintedBufferSize(CE, FDecl, C))
425  return true;
426 
427  return false;
428 }
429 
430 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
431  const Expr *Arg) {
432  ProgramStateRef State = C.getState();
433  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
434  if (AddrVal.isUnknownOrUndef())
435  return None;
436 
437  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
438  if (!AddrLoc)
439  return None;
440 
441  QualType ArgTy = Arg->getType().getCanonicalType();
442  if (!ArgTy->isPointerType())
443  return None;
444 
445  QualType ValTy = ArgTy->getPointeeType();
446 
447  // Do not dereference void pointers. Treat them as byte pointers instead.
448  // FIXME: we might want to consider more than just the first byte.
449  if (ValTy->isVoidType())
450  ValTy = C.getASTContext().CharTy;
451 
452  return State->getSVal(*AddrLoc, ValTy);
453 }
454 
456 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
457  CheckerContext &C) const {
458  ProgramStateRef State = C.getState();
459 
460  // Check for taint in arguments.
461  bool IsTainted = false;
462  for (unsigned ArgNum : SrcArgs) {
463  if (ArgNum >= CE->getNumArgs())
464  return State;
465  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
466  break;
467  }
468 
469  // Check for taint in variadic arguments.
470  if (!IsTainted && VariadicType::Src == VarType) {
471  // Check if any of the arguments is tainted
472  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
473  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
474  break;
475  }
476  }
477 
478  if (!IsTainted)
479  return State;
480 
481  // Mark the arguments which should be tainted after the function returns.
482  for (unsigned ArgNum : DstArgs) {
483  // Should mark the return value?
484  if (ArgNum == ReturnValueIndex) {
485  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
486  continue;
487  }
488 
489  // Mark the given argument.
490  assert(ArgNum < CE->getNumArgs());
491  State = State->add<TaintArgsOnPostVisit>(ArgNum);
492  }
493 
494  // Mark all variadic arguments tainted if present.
495  if (VariadicType::Dst == VarType) {
496  // For all pointer and references that were passed in:
497  // If they are not pointing to const data, mark data as tainted.
498  // TODO: So far we are just going one level down; ideally we'd need to
499  // recurse here.
500  for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
501  const Expr *Arg = CE->getArg(i);
502  // Process pointer argument.
503  const Type *ArgTy = Arg->getType().getTypePtr();
504  QualType PType = ArgTy->getPointeeType();
505  if ((!PType.isNull() && !PType.isConstQualified()) ||
506  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
507  State = State->add<TaintArgsOnPostVisit>(i);
508  }
509  }
510 
511  return State;
512 }
513 
514 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515 // and arg 1 should get taint.
516 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
517  CheckerContext &C) const {
518  assert(CE->getNumArgs() >= 2);
519  ProgramStateRef State = C.getState();
520 
521  // Check is the file descriptor is tainted.
522  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
523  isStdin(CE->getArg(0), C)) {
524  // All arguments except for the first two should get taint.
525  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526  State = State->add<TaintArgsOnPostVisit>(i);
527  return State;
528  }
529 
530  return nullptr;
531 }
532 
533 // If argument 0(protocol domain) is network, the return value should get taint.
534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535  CheckerContext &C) const {
536  ProgramStateRef State = C.getState();
537  if (CE->getNumArgs() < 3)
538  return State;
539 
540  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542  // White list the internal communication protocols.
543  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545  return State;
546  State = State->addTaint(CE, C.getLocationContext());
547  return State;
548 }
549 
550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551  CheckerContext &C) const {
552  ProgramStateRef State = C.getState();
553  if (CE->getNumArgs() < 2)
554  return State;
555 
556  // All arguments except for the very first one should get taint.
557  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558  // The arguments are pointer arguments. The data they are pointing at is
559  // tainted after the call.
560  const Expr *Arg = CE->getArg(i);
561  Optional<SVal> V = getPointedToSVal(C, Arg);
562  if (V)
563  State = State->addTaint(*V);
564  }
565  return State;
566 }
567 
568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569  CheckerContext &C) const {
570  return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572 
573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574  ProgramStateRef State = C.getState();
575  SVal Val = C.getSVal(E);
576 
577  // stdin is a pointer, so it would be a region.
578  const MemRegion *MemReg = Val.getAsRegion();
579 
580  // The region should be symbolic, we do not know it's value.
581  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582  if (!SymReg)
583  return false;
584 
585  // Get it's symbol and find the declaration region it's pointing to.
586  const SymbolRegionValue *Sm =
587  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588  if (!Sm)
589  return false;
590  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591  if (!DeclReg)
592  return false;
593 
594  // This region corresponds to a declaration, find out if it's a global/extern
595  // variable named stdin with the proper type.
596  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597  D = D->getCanonicalDecl();
598  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
599  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
600  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
601  C.getASTContext().getFILEType().getCanonicalType())
602  return true;
603  }
604  }
605  return false;
606 }
607 
608 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
609  const CheckerContext &C,
610  unsigned int &ArgNum) {
611  // Find if the function contains a format string argument.
612  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
613  // vsnprintf, syslog, custom annotated functions.
614  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
615  if (!FDecl)
616  return false;
617  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
618  ArgNum = Format->getFormatIdx() - 1;
619  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
620  return true;
621  }
622 
623  // Or if a function is named setproctitle (this is a heuristic).
624  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
625  ArgNum = 0;
626  return true;
627  }
628 
629  return false;
630 }
631 
632 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
633  const char Msg[],
634  CheckerContext &C) const {
635  assert(E);
636 
637  // Check for taint.
638  ProgramStateRef State = C.getState();
639  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
640  SVal TaintedSVal;
641  if (PointedToSVal && State->isTainted(*PointedToSVal))
642  TaintedSVal = *PointedToSVal;
643  else if (State->isTainted(E, C.getLocationContext()))
644  TaintedSVal = C.getSVal(E);
645  else
646  return false;
647 
648  // Generate diagnostic.
649  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
650  initBugType();
651  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
652  report->addRange(E->getSourceRange());
653  report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
654  C.emitReport(std::move(report));
655  return true;
656  }
657  return false;
658 }
659 
660 bool GenericTaintChecker::checkUncontrolledFormatString(
661  const CallExpr *CE, CheckerContext &C) const {
662  // Check if the function contains a format string argument.
663  unsigned int ArgNum = 0;
664  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
665  return false;
666 
667  // If either the format string content or the pointer itself are tainted,
668  // warn.
669  return generateReportIfTainted(CE->getArg(ArgNum),
670  MsgUncontrolledFormatString, C);
671 }
672 
673 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
674  CheckerContext &C) const {
675  // TODO: It might make sense to run this check on demand. In some cases,
676  // we should check if the environment has been cleansed here. We also might
677  // need to know if the user was reset before these calls(seteuid).
678  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
679  .Case("system", 0)
680  .Case("popen", 0)
681  .Case("execl", 0)
682  .Case("execle", 0)
683  .Case("execlp", 0)
684  .Case("execv", 0)
685  .Case("execvp", 0)
686  .Case("execvP", 0)
687  .Case("execve", 0)
688  .Case("dlopen", 0)
689  .Default(UINT_MAX);
690 
691  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
692  return false;
693 
694  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
695 }
696 
697 // TODO: Should this check be a part of the CString checker?
698 // If yes, should taint be a global setting?
699 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
700  const FunctionDecl *FDecl,
701  CheckerContext &C) const {
702  // If the function has a buffer size argument, set ArgNum.
703  unsigned ArgNum = InvalidArgIndex;
704  unsigned BId = 0;
705  if ((BId = FDecl->getMemoryFunctionKind()))
706  switch (BId) {
707  case Builtin::BImemcpy:
708  case Builtin::BImemmove:
709  case Builtin::BIstrncpy:
710  ArgNum = 2;
711  break;
712  case Builtin::BIstrndup:
713  ArgNum = 1;
714  break;
715  default:
716  break;
717  };
718 
719  if (ArgNum == InvalidArgIndex) {
720  if (C.isCLibraryFunction(FDecl, "malloc") ||
721  C.isCLibraryFunction(FDecl, "calloc") ||
722  C.isCLibraryFunction(FDecl, "alloca"))
723  ArgNum = 0;
724  else if (C.isCLibraryFunction(FDecl, "memccpy"))
725  ArgNum = 3;
726  else if (C.isCLibraryFunction(FDecl, "realloc"))
727  ArgNum = 1;
728  else if (C.isCLibraryFunction(FDecl, "bcopy"))
729  ArgNum = 2;
730  }
731 
732  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
733  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
734 }
735 
736 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
737  mgr.registerChecker<GenericTaintChecker>();
738 }
739 
740 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
741  return true;
742 }
Represents a function declaration or definition.
Definition: Decl.h:1737
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3640
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2542
A (possibly-)qualified type.
Definition: Type.h:634
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2586
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2573
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
Definition: Type.h:1409
constexpr XRayInstrMask Function
Definition: XRayInstr.h:38
LineState State
Definition: Format.h:2071
bool isReferenceType() const
Definition: Type.h:6313
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6077
#define UINT_MAX
Definition: limits.h:72
This represents one expression.
Definition: Expr.h:108
QualType getType() const
Definition: Expr.h:130
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:699
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6136
QualType getCanonicalType() const
Definition: Type.h:6116
Encodes a location in the source.
constexpr XRayInstrMask None
Definition: XRayInstr.h:37
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:214
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:423
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:515
bool isVoidType() const
Definition: Type.h:6553
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:251
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2429
bool isPointerType() const
Definition: Type.h:6301
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2715