clang  10.0.0svn
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/Support/YAMLTraits.h"
29 #include <limits>
30 #include <utility>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace taint;
35 
36 namespace {
37 class GenericTaintChecker
38  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
39 public:
40  static void *getTag() {
41  static int Tag;
42  return &Tag;
43  }
44 
45  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
46 
47  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
48 
49  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
50  const char *Sep) const override;
51 
52  using ArgVector = SmallVector<unsigned, 2>;
53  using SignedArgVector = SmallVector<int, 2>;
54 
55  enum class VariadicType { None, Src, Dst };
56 
57  /// Used to parse the configuration file.
58  struct TaintConfiguration {
59  using NameArgsPair = std::pair<std::string, ArgVector>;
60 
61  struct Propagation {
62  std::string Name;
63  ArgVector SrcArgs;
64  SignedArgVector DstArgs;
65  VariadicType VarType;
66  unsigned VarIndex;
67  };
68 
69  std::vector<Propagation> Propagations;
70  std::vector<NameArgsPair> Filters;
71  std::vector<NameArgsPair> Sinks;
72 
73  TaintConfiguration() = default;
74  TaintConfiguration(const TaintConfiguration &) = default;
75  TaintConfiguration(TaintConfiguration &&) = default;
76  TaintConfiguration &operator=(const TaintConfiguration &) = default;
77  TaintConfiguration &operator=(TaintConfiguration &&) = default;
78  };
79 
80  /// Convert SignedArgVector to ArgVector.
81  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
82  SignedArgVector Args);
83 
84  /// Parse the config.
85  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
86  TaintConfiguration &&Config);
87 
88  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
89  /// Denotes the return vale.
90  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
91  1};
92 
93 private:
94  mutable std::unique_ptr<BugType> BT;
95  void initBugType() const {
96  if (!BT)
97  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
98  }
99 
100  /// Catch taint related bugs. Check if tainted data is passed to a
101  /// system call etc.
102  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
103 
104  /// Add taint sources on a pre-visit.
105  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
106 
107  /// Propagate taint generated at pre-visit.
108  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
109 
110  /// Check if the region the expression evaluates to is the standard input,
111  /// and thus, is tainted.
112  static bool isStdin(const Expr *E, CheckerContext &C);
113 
114  /// Given a pointer argument, return the value it points to.
115  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
116 
117  /// Check for CWE-134: Uncontrolled Format String.
118  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
119  "Untrusted data is used as a format string "
120  "(CWE-134: Uncontrolled Format String)";
121  bool checkUncontrolledFormatString(const CallExpr *CE,
122  CheckerContext &C) const;
123 
124  /// Check for:
125  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
126  /// CWE-78, "Failure to Sanitize Data into an OS Command"
127  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
128  "Untrusted data is passed to a system call "
129  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
130  bool checkSystemCall(const CallExpr *CE, StringRef Name,
131  CheckerContext &C) const;
132 
133  /// Check if tainted data is used as a buffer size ins strn.. functions,
134  /// and allocators.
135  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
136  "Untrusted data is used to specify the buffer size "
137  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
138  "for character data and the null terminator)";
139  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
140  CheckerContext &C) const;
141 
142  /// Check if tainted data is used as a custom sink's parameter.
143  static constexpr llvm::StringLiteral MsgCustomSink =
144  "Untrusted data is passed to a user-defined sink";
145  bool checkCustomSinks(const CallExpr *CE, StringRef Name,
146  CheckerContext &C) const;
147 
148  /// Generate a report if the expression is tainted or points to tainted data.
149  bool generateReportIfTainted(const Expr *E, StringRef Msg,
150  CheckerContext &C) const;
151 
152  struct TaintPropagationRule;
153  using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
154  using NameArgMap = llvm::StringMap<ArgVector>;
155 
156  /// A struct used to specify taint propagation rules for a function.
157  ///
158  /// If any of the possible taint source arguments is tainted, all of the
159  /// destination arguments should also be tainted. Use InvalidArgIndex in the
160  /// src list to specify that all of the arguments can introduce taint. Use
161  /// InvalidArgIndex in the dst arguments to signify that all the non-const
162  /// pointer and reference arguments might be tainted on return. If
163  /// ReturnValueIndex is added to the dst list, the return value will be
164  /// tainted.
165  struct TaintPropagationRule {
166  using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
167  CheckerContext &C);
168 
169  /// List of arguments which can be taint sources and should be checked.
170  ArgVector SrcArgs;
171  /// List of arguments which should be tainted on function return.
172  ArgVector DstArgs;
173  /// Index for the first variadic parameter if exist.
174  unsigned VariadicIndex;
175  /// Show when a function has variadic parameters. If it has, it marks all
176  /// of them as source or destination.
177  VariadicType VarType;
178  /// Special function for tainted source determination. If defined, it can
179  /// override the default behavior.
180  PropagationFuncType PropagationFunc;
181 
182  TaintPropagationRule()
183  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
184  PropagationFunc(nullptr) {}
185 
186  TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
187  VariadicType Var = VariadicType::None,
188  unsigned VarIndex = InvalidArgIndex,
189  PropagationFuncType Func = nullptr)
190  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
191  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
192 
193  /// Get the propagation rule for a given function.
194  static TaintPropagationRule
195  getTaintPropagationRule(const NameRuleMap &CustomPropagations,
196  const FunctionDecl *FDecl, StringRef Name,
197  CheckerContext &C);
198 
199  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
200  void addDstArg(unsigned A) { DstArgs.push_back(A); }
201 
202  bool isNull() const {
203  return SrcArgs.empty() && DstArgs.empty() &&
204  VariadicType::None == VarType;
205  }
206 
207  bool isDestinationArgument(unsigned ArgNum) const {
208  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
209  }
210 
211  static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
212  CheckerContext &C) {
213  if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
214  return true;
215 
216  if (!E->getType().getTypePtr()->isPointerType())
217  return false;
218 
219  Optional<SVal> V = getPointedToSVal(C, E);
220  return (V && isTainted(State, *V));
221  }
222 
223  /// Pre-process a function which propagates taint according to the
224  /// taint rule.
225  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
226 
227  // Functions for custom taintedness propagation.
228  static bool postSocket(bool IsTainted, const CallExpr *CE,
229  CheckerContext &C);
230  };
231 
232  /// Defines a map between the propagation function's name and
233  /// TaintPropagationRule.
234  NameRuleMap CustomPropagations;
235 
236  /// Defines a map between the filter function's name and filtering args.
237  NameArgMap CustomFilters;
238 
239  /// Defines a map between the sink function's name and sinking args.
240  NameArgMap CustomSinks;
241 };
242 
243 const unsigned GenericTaintChecker::ReturnValueIndex;
244 const unsigned GenericTaintChecker::InvalidArgIndex;
245 
246 // FIXME: these lines can be removed in C++17
247 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
248 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
249 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
250 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
251 } // end of anonymous namespace
252 
253 using TaintConfig = GenericTaintChecker::TaintConfiguration;
254 
255 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
256 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
257 
258 namespace llvm {
259 namespace yaml {
260 template <> struct MappingTraits<TaintConfig> {
261  static void mapping(IO &IO, TaintConfig &Config) {
262  IO.mapOptional("Propagations", Config.Propagations);
263  IO.mapOptional("Filters", Config.Filters);
264  IO.mapOptional("Sinks", Config.Sinks);
265  }
266 };
267 
268 template <> struct MappingTraits<TaintConfig::Propagation> {
269  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
270  IO.mapRequired("Name", Propagation.Name);
271  IO.mapOptional("SrcArgs", Propagation.SrcArgs);
272  IO.mapOptional("DstArgs", Propagation.DstArgs);
273  IO.mapOptional("VariadicType", Propagation.VarType,
275  IO.mapOptional("VariadicIndex", Propagation.VarIndex,
276  GenericTaintChecker::InvalidArgIndex);
277  }
278 };
279 
280 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
281  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
282  IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
283  IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
284  IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
285  }
286 };
287 
288 template <> struct MappingTraits<TaintConfig::NameArgsPair> {
289  static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
290  IO.mapRequired("Name", NameArg.first);
291  IO.mapRequired("Args", NameArg.second);
292  }
293 };
294 } // namespace yaml
295 } // namespace llvm
296 
297 /// A set which is used to pass information from call pre-visit instruction
298 /// to the call post-visit. The values are unsigned integers, which are either
299 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
300 /// points to data, which should be tainted on return.
301 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
302 
303 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
304  CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
305  ArgVector Result;
306  for (int Arg : Args) {
307  if (Arg == -1)
308  Result.push_back(ReturnValueIndex);
309  else if (Arg < -1) {
310  Result.push_back(InvalidArgIndex);
311  Mgr.reportInvalidCheckerOptionValue(
312  this, Option,
313  "an argument number for propagation rules greater or equal to -1");
314  } else
315  Result.push_back(static_cast<unsigned>(Arg));
316  }
317  return Result;
318 }
319 
320 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
321  const std::string &Option,
322  TaintConfiguration &&Config) {
323  for (auto &P : Config.Propagations) {
324  GenericTaintChecker::CustomPropagations.try_emplace(
325  P.Name, std::move(P.SrcArgs),
326  convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
327  }
328 
329  for (auto &F : Config.Filters) {
330  GenericTaintChecker::CustomFilters.try_emplace(F.first,
331  std::move(F.second));
332  }
333 
334  for (auto &S : Config.Sinks) {
335  GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
336  }
337 }
338 
339 GenericTaintChecker::TaintPropagationRule
340 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
341  const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
342  StringRef Name, CheckerContext &C) {
343  // TODO: Currently, we might lose precision here: we always mark a return
344  // value as tainted even if it's just a pointer, pointing to tainted data.
345 
346  // Check for exact name match for functions without builtin substitutes.
347  TaintPropagationRule Rule =
348  llvm::StringSwitch<TaintPropagationRule>(Name)
349  // Source functions
350  // TODO: Add support for vfscanf & family.
351  .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
352  .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
353  .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
354  .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
355  .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
356  .Case("getchar_unlocked",
357  TaintPropagationRule({}, {ReturnValueIndex}))
358  .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
359  .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
360  .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
361  .Case("socket",
362  TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
363  InvalidArgIndex,
364  &TaintPropagationRule::postSocket))
365  .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
366  // Propagating functions
367  .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
368  .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
369  .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
370  .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
371  .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
372  .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
373  .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
374  .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
375  .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
376  .Case("getdelim", TaintPropagationRule({3}, {0}))
377  .Case("getline", TaintPropagationRule({2}, {0}))
378  .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
379  .Case("pread",
380  TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
381  .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
382  .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
383  .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
384  .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
385  .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
386  .Default(TaintPropagationRule());
387 
388  if (!Rule.isNull())
389  return Rule;
390 
391  // Check if it's one of the memory setting/copying functions.
392  // This check is specialized but faster then calling isCLibraryFunction.
393  unsigned BId = 0;
394  if ((BId = FDecl->getMemoryFunctionKind()))
395  switch (BId) {
396  case Builtin::BImemcpy:
397  case Builtin::BImemmove:
398  case Builtin::BIstrncpy:
399  case Builtin::BIstrncat:
400  return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
401  case Builtin::BIstrlcpy:
402  case Builtin::BIstrlcat:
403  return TaintPropagationRule({1, 2}, {0});
404  case Builtin::BIstrndup:
405  return TaintPropagationRule({0, 1}, {ReturnValueIndex});
406 
407  default:
408  break;
409  };
410 
411  // Process all other functions which could be defined as builtins.
412  if (Rule.isNull()) {
413  if (C.isCLibraryFunction(FDecl, "snprintf"))
414  return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
415  3);
416  else if (C.isCLibraryFunction(FDecl, "sprintf"))
417  return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
418  2);
419  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
420  C.isCLibraryFunction(FDecl, "stpcpy") ||
421  C.isCLibraryFunction(FDecl, "strcat"))
422  return TaintPropagationRule({1}, {0, ReturnValueIndex});
423  else if (C.isCLibraryFunction(FDecl, "bcopy"))
424  return TaintPropagationRule({0, 2}, {1});
425  else if (C.isCLibraryFunction(FDecl, "strdup") ||
426  C.isCLibraryFunction(FDecl, "strdupa"))
427  return TaintPropagationRule({0}, {ReturnValueIndex});
428  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
429  return TaintPropagationRule({0}, {ReturnValueIndex});
430  }
431 
432  // Skipping the following functions, since they might be used for cleansing
433  // or smart memory copy:
434  // - memccpy - copying until hitting a special character.
435 
436  auto It = CustomPropagations.find(Name);
437  if (It != CustomPropagations.end())
438  return It->getValue();
439 
440  return TaintPropagationRule();
441 }
442 
443 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
444  CheckerContext &C) const {
445  // Check for taintedness related errors first: system call, uncontrolled
446  // format string, tainted buffer size.
447  if (checkPre(CE, C))
448  return;
449 
450  // Marks the function's arguments and/or return value tainted if it present in
451  // the list.
452  addSourcesPre(CE, C);
453 }
454 
455 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
456  CheckerContext &C) const {
457  // Set the marked values as tainted. The return value only accessible from
458  // checkPostStmt.
459  propagateFromPre(CE, C);
460 }
461 
462 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
463  const char *NL, const char *Sep) const {
464  printTaint(State, Out, NL, Sep);
465 }
466 
467 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
468  CheckerContext &C) const {
469  ProgramStateRef State = nullptr;
470  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
471  if (!FDecl || FDecl->getKind() != Decl::Function)
472  return;
473 
474  StringRef Name = C.getCalleeName(FDecl);
475  if (Name.empty())
476  return;
477 
478  // First, try generating a propagation rule for this function.
479  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
480  this->CustomPropagations, FDecl, Name, C);
481  if (!Rule.isNull()) {
482  State = Rule.process(CE, C);
483  if (!State)
484  return;
485  C.addTransition(State);
486  return;
487  }
488 
489  if (!State)
490  return;
491  C.addTransition(State);
492 }
493 
494 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
495  CheckerContext &C) const {
496  ProgramStateRef State = C.getState();
497 
498  // Depending on what was tainted at pre-visit, we determined a set of
499  // arguments which should be tainted after the function returns. These are
500  // stored in the state as TaintArgsOnPostVisit set.
501  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
502  if (TaintArgs.isEmpty())
503  return false;
504 
505  for (unsigned ArgNum : TaintArgs) {
506  // Special handling for the tainted return value.
507  if (ArgNum == ReturnValueIndex) {
508  State = addTaint(State, CE, C.getLocationContext());
509  continue;
510  }
511 
512  // The arguments are pointer arguments. The data they are pointing at is
513  // tainted after the call.
514  if (CE->getNumArgs() < (ArgNum + 1))
515  return false;
516  const Expr *Arg = CE->getArg(ArgNum);
517  Optional<SVal> V = getPointedToSVal(C, Arg);
518  if (V)
519  State = addTaint(State, *V);
520  }
521 
522  // Clear up the taint info from the state.
523  State = State->remove<TaintArgsOnPostVisit>();
524 
525  if (State != C.getState()) {
526  C.addTransition(State);
527  return true;
528  }
529  return false;
530 }
531 
532 bool GenericTaintChecker::checkPre(const CallExpr *CE,
533  CheckerContext &C) const {
534 
535  if (checkUncontrolledFormatString(CE, C))
536  return true;
537 
538  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
539  if (!FDecl || FDecl->getKind() != Decl::Function)
540  return false;
541 
542  StringRef Name = C.getCalleeName(FDecl);
543  if (Name.empty())
544  return false;
545 
546  if (checkSystemCall(CE, Name, C))
547  return true;
548 
549  if (checkTaintedBufferSize(CE, FDecl, C))
550  return true;
551 
552  if (checkCustomSinks(CE, Name, C))
553  return true;
554 
555  return false;
556 }
557 
558 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
559  const Expr *Arg) {
560  ProgramStateRef State = C.getState();
561  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
562  if (AddrVal.isUnknownOrUndef())
563  return None;
564 
565  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
566  if (!AddrLoc)
567  return None;
568 
569  QualType ArgTy = Arg->getType().getCanonicalType();
570  if (!ArgTy->isPointerType())
571  return None;
572 
573  QualType ValTy = ArgTy->getPointeeType();
574 
575  // Do not dereference void pointers. Treat them as byte pointers instead.
576  // FIXME: we might want to consider more than just the first byte.
577  if (ValTy->isVoidType())
578  ValTy = C.getASTContext().CharTy;
579 
580  return State->getSVal(*AddrLoc, ValTy);
581 }
582 
584 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
585  CheckerContext &C) const {
586  ProgramStateRef State = C.getState();
587 
588  // Check for taint in arguments.
589  bool IsTainted = true;
590  for (unsigned ArgNum : SrcArgs) {
591  if (ArgNum >= CE->getNumArgs())
592  continue;
593 
594  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
595  break;
596  }
597 
598  // Check for taint in variadic arguments.
599  if (!IsTainted && VariadicType::Src == VarType) {
600  // Check if any of the arguments is tainted
601  for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
602  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
603  break;
604  }
605  }
606 
607  if (PropagationFunc)
608  IsTainted = PropagationFunc(IsTainted, CE, C);
609 
610  if (!IsTainted)
611  return State;
612 
613  // Mark the arguments which should be tainted after the function returns.
614  for (unsigned ArgNum : DstArgs) {
615  // Should mark the return value?
616  if (ArgNum == ReturnValueIndex) {
617  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
618  continue;
619  }
620 
621  if (ArgNum >= CE->getNumArgs())
622  continue;
623 
624  // Mark the given argument.
625  State = State->add<TaintArgsOnPostVisit>(ArgNum);
626  }
627 
628  // Mark all variadic arguments tainted if present.
629  if (VariadicType::Dst == VarType) {
630  // For all pointer and references that were passed in:
631  // If they are not pointing to const data, mark data as tainted.
632  // TODO: So far we are just going one level down; ideally we'd need to
633  // recurse here.
634  for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
635  const Expr *Arg = CE->getArg(i);
636  // Process pointer argument.
637  const Type *ArgTy = Arg->getType().getTypePtr();
638  QualType PType = ArgTy->getPointeeType();
639  if ((!PType.isNull() && !PType.isConstQualified()) ||
640  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
641  State = State->add<TaintArgsOnPostVisit>(i);
642  }
643  }
644 
645  return State;
646 }
647 
648 // If argument 0(protocol domain) is network, the return value should get taint.
649 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
650  const CallExpr *CE,
651  CheckerContext &C) {
652  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
653  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
654  // White list the internal communication protocols.
655  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
656  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
657  return false;
658 
659  return true;
660 }
661 
662 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
663  ProgramStateRef State = C.getState();
664  SVal Val = C.getSVal(E);
665 
666  // stdin is a pointer, so it would be a region.
667  const MemRegion *MemReg = Val.getAsRegion();
668 
669  // The region should be symbolic, we do not know it's value.
670  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
671  if (!SymReg)
672  return false;
673 
674  // Get it's symbol and find the declaration region it's pointing to.
675  const SymbolRegionValue *Sm =
676  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
677  if (!Sm)
678  return false;
679  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
680  if (!DeclReg)
681  return false;
682 
683  // This region corresponds to a declaration, find out if it's a global/extern
684  // variable named stdin with the proper type.
685  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
686  D = D->getCanonicalDecl();
687  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
688  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
689  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
690  C.getASTContext().getFILEType().getCanonicalType())
691  return true;
692  }
693  }
694  return false;
695 }
696 
697 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
698  const CheckerContext &C,
699  unsigned &ArgNum) {
700  // Find if the function contains a format string argument.
701  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
702  // vsnprintf, syslog, custom annotated functions.
703  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
704  if (!FDecl)
705  return false;
706  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
707  ArgNum = Format->getFormatIdx() - 1;
708  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
709  return true;
710  }
711 
712  // Or if a function is named setproctitle (this is a heuristic).
713  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
714  ArgNum = 0;
715  return true;
716  }
717 
718  return false;
719 }
720 
721 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
722  CheckerContext &C) const {
723  assert(E);
724 
725  // Check for taint.
726  ProgramStateRef State = C.getState();
727  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
728  SVal TaintedSVal;
729  if (PointedToSVal && isTainted(State, *PointedToSVal))
730  TaintedSVal = *PointedToSVal;
731  else if (isTainted(State, E, C.getLocationContext()))
732  TaintedSVal = C.getSVal(E);
733  else
734  return false;
735 
736  // Generate diagnostic.
737  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
738  initBugType();
739  auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
740  report->addRange(E->getSourceRange());
741  report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
742  C.emitReport(std::move(report));
743  return true;
744  }
745  return false;
746 }
747 
748 bool GenericTaintChecker::checkUncontrolledFormatString(
749  const CallExpr *CE, CheckerContext &C) const {
750  // Check if the function contains a format string argument.
751  unsigned ArgNum = 0;
752  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
753  return false;
754 
755  // If either the format string content or the pointer itself are tainted,
756  // warn.
757  return generateReportIfTainted(CE->getArg(ArgNum),
758  MsgUncontrolledFormatString, C);
759 }
760 
761 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
762  CheckerContext &C) const {
763  // TODO: It might make sense to run this check on demand. In some cases,
764  // we should check if the environment has been cleansed here. We also might
765  // need to know if the user was reset before these calls(seteuid).
766  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
767  .Case("system", 0)
768  .Case("popen", 0)
769  .Case("execl", 0)
770  .Case("execle", 0)
771  .Case("execlp", 0)
772  .Case("execv", 0)
773  .Case("execvp", 0)
774  .Case("execvP", 0)
775  .Case("execve", 0)
776  .Case("dlopen", 0)
777  .Default(InvalidArgIndex);
778 
779  if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
780  return false;
781 
782  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
783 }
784 
785 // TODO: Should this check be a part of the CString checker?
786 // If yes, should taint be a global setting?
787 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
788  const FunctionDecl *FDecl,
789  CheckerContext &C) const {
790  // If the function has a buffer size argument, set ArgNum.
791  unsigned ArgNum = InvalidArgIndex;
792  unsigned BId = 0;
793  if ((BId = FDecl->getMemoryFunctionKind()))
794  switch (BId) {
795  case Builtin::BImemcpy:
796  case Builtin::BImemmove:
797  case Builtin::BIstrncpy:
798  ArgNum = 2;
799  break;
800  case Builtin::BIstrndup:
801  ArgNum = 1;
802  break;
803  default:
804  break;
805  };
806 
807  if (ArgNum == InvalidArgIndex) {
808  if (C.isCLibraryFunction(FDecl, "malloc") ||
809  C.isCLibraryFunction(FDecl, "calloc") ||
810  C.isCLibraryFunction(FDecl, "alloca"))
811  ArgNum = 0;
812  else if (C.isCLibraryFunction(FDecl, "memccpy"))
813  ArgNum = 3;
814  else if (C.isCLibraryFunction(FDecl, "realloc"))
815  ArgNum = 1;
816  else if (C.isCLibraryFunction(FDecl, "bcopy"))
817  ArgNum = 2;
818  }
819 
820  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
821  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
822 }
823 
824 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
825  CheckerContext &C) const {
826  auto It = CustomSinks.find(Name);
827  if (It == CustomSinks.end())
828  return false;
829 
830  const GenericTaintChecker::ArgVector &Args = It->getValue();
831  for (unsigned ArgNum : Args) {
832  if (ArgNum >= CE->getNumArgs())
833  continue;
834 
835  if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
836  return true;
837  }
838 
839  return false;
840 }
841 
842 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
843  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
844  std::string Option{"Config"};
845  StringRef ConfigFile =
846  Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
848  getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
849  if (Config)
850  Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
851 }
852 
853 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
854  return true;
855 }
Represents a function declaration or definition.
Definition: Decl.h:1756
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value)
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3771
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2575
A (possibly-)qualified type.
Definition: Type.h:643
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2677
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2664
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:518
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
StringRef P
The base class of the type hierarchy.
Definition: Type.h:1436
constexpr XRayInstrMask Function
Definition: XRayInstr.h:38
LineState State
static void mapping(IO &IO, TaintConfig::Propagation &Propagation)
Definition: Format.h:2327
bool isReferenceType() const
Definition: Type.h:6402
__DEVICE__ int max(int __a, int __b)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6148
GenericTaintChecker::TaintConfiguration TaintConfig
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\, const char *sep="")
This represents one expression.
Definition: Expr.h:108
#define V(N, I)
Definition: ASTContext.h:2915
#define bool
Definition: stdbool.h:15
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned &ArgNum)
QualType getType() const
Definition: Expr.h:137
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:708
static void mapping(IO &IO, TaintConfig &Config)
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6207
QualType getCanonicalType() const
Definition: Type.h:6187
Encodes a location in the source.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
constexpr XRayInstrMask None
Definition: XRayInstr.h:37
static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg)
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:221
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
Kind getKind() const
Definition: DeclBase.h:432
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:524
bool isVoidType() const
Definition: Type.h:6649
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:262
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2521
bool isPointerType() const
Definition: Type.h:6390
std::error_code parseConfiguration(StringRef Text, FormatStyle *Style)
Parse configuration from YAML-formatted text.
Definition: Format.cpp:1096
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2962