clang  14.0.0git
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
43  static void *getTag() {
44  static int Tag;
45  return &Tag;
46  }
47 
48  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52  const char *Sep) const override;
53 
54  using ArgVector = SmallVector<unsigned, 2>;
55  using SignedArgVector = SmallVector<int, 2>;
56 
57  enum class VariadicType { None, Src, Dst };
58 
59  /// Used to parse the configuration file.
60  struct TaintConfiguration {
61  using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63  struct Propagation {
64  std::string Name;
66  ArgVector SrcArgs;
67  SignedArgVector DstArgs;
68  VariadicType VarType;
69  unsigned VarIndex;
70  };
71 
72  std::vector<Propagation> Propagations;
73  std::vector<NameScopeArgs> Filters;
74  std::vector<NameScopeArgs> Sinks;
75 
76  TaintConfiguration() = default;
77  TaintConfiguration(const TaintConfiguration &) = default;
78  TaintConfiguration(TaintConfiguration &&) = default;
79  TaintConfiguration &operator=(const TaintConfiguration &) = default;
80  TaintConfiguration &operator=(TaintConfiguration &&) = default;
81  };
82 
83  /// Convert SignedArgVector to ArgVector.
84  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85  const SignedArgVector &Args);
86 
87  /// Parse the config.
88  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89  TaintConfiguration &&Config);
90 
91  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92  /// Denotes the return vale.
93  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94  1};
95 
96 private:
97  mutable std::unique_ptr<BugType> BT;
98  void initBugType() const {
99  if (!BT)
100  BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101  "Untrusted Data");
102  }
103 
104  struct FunctionData {
105  FunctionData() = delete;
106  FunctionData(const FunctionDecl *FDecl, StringRef Name,
107  std::string FullName)
108  : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {}
109  FunctionData(const FunctionData &) = default;
110  FunctionData(FunctionData &&) = default;
111  FunctionData &operator=(const FunctionData &) = delete;
112  FunctionData &operator=(FunctionData &&) = delete;
113 
114  static Optional<FunctionData> create(const CallEvent &Call,
115  const CheckerContext &C) {
116  if (!Call.getDecl())
117  return None;
118 
119  const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
120  if (!FDecl || (FDecl->getKind() != Decl::Function &&
121  FDecl->getKind() != Decl::CXXMethod))
122  return None;
123 
124  StringRef Name = C.getCalleeName(FDecl);
125  std::string FullName = FDecl->getQualifiedNameAsString();
126  if (Name.empty() || FullName.empty())
127  return None;
128 
129  return FunctionData{FDecl, Name, std::move(FullName)};
130  }
131 
132  bool isInScope(StringRef Scope) const {
133  return StringRef(FullName).startswith(Scope);
134  }
135 
136  const FunctionDecl *const FDecl;
137  const StringRef Name;
138  const std::string FullName;
139  };
140 
141  /// Catch taint related bugs. Check if tainted data is passed to a
142  /// system call etc. Returns true on matching.
143  bool checkPre(const CallEvent &Call, const FunctionData &FData,
144  CheckerContext &C) const;
145 
146  /// Add taint sources on a pre-visit. Returns true on matching.
147  bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
148  CheckerContext &C) const;
149 
150  /// Mark filter's arguments not tainted on a pre-visit. Returns true on
151  /// matching.
152  bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
153  CheckerContext &C) const;
154 
155  /// Propagate taint generated at pre-visit. Returns true on matching.
156  static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
157 
158  /// Check if the region the expression evaluates to is the standard input,
159  /// and thus, is tainted.
160  static bool isStdin(const Expr *E, CheckerContext &C);
161 
162  /// Given a pointer argument, return the value it points to.
163  static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
164 
165  /// Check for CWE-134: Uncontrolled Format String.
166  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
167  "Untrusted data is used as a format string "
168  "(CWE-134: Uncontrolled Format String)";
169  bool checkUncontrolledFormatString(const CallEvent &Call,
170  CheckerContext &C) const;
171 
172  /// Check for:
173  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
174  /// CWE-78, "Failure to Sanitize Data into an OS Command"
175  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
176  "Untrusted data is passed to a system call "
177  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
178  bool checkSystemCall(const CallEvent &Call, StringRef Name,
179  CheckerContext &C) const;
180 
181  /// Check if tainted data is used as a buffer size ins strn.. functions,
182  /// and allocators.
183  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
184  "Untrusted data is used to specify the buffer size "
185  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
186  "for character data and the null terminator)";
187  bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
188 
189  /// Check if tainted data is used as a custom sink's parameter.
190  static constexpr llvm::StringLiteral MsgCustomSink =
191  "Untrusted data is passed to a user-defined sink";
192  bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
193  CheckerContext &C) const;
194 
195  /// Generate a report if the expression is tainted or points to tainted data.
196  bool generateReportIfTainted(const Expr *E, StringRef Msg,
197  CheckerContext &C) const;
198 
199  struct TaintPropagationRule;
200  template <typename T>
201  using ConfigDataMap =
202  std::unordered_multimap<std::string, std::pair<std::string, T>>;
203  using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
204  using NameArgMap = ConfigDataMap<ArgVector>;
205 
206  /// Find a function with the given name and scope. Returns the first match
207  /// or the end of the map.
208  template <typename T>
209  static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
210  const FunctionData &FData);
211 
212  /// A struct used to specify taint propagation rules for a function.
213  ///
214  /// If any of the possible taint source arguments is tainted, all of the
215  /// destination arguments should also be tainted. Use InvalidArgIndex in the
216  /// src list to specify that all of the arguments can introduce taint. Use
217  /// InvalidArgIndex in the dst arguments to signify that all the non-const
218  /// pointer and reference arguments might be tainted on return. If
219  /// ReturnValueIndex is added to the dst list, the return value will be
220  /// tainted.
221  struct TaintPropagationRule {
222  using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
223  CheckerContext &C);
224 
225  /// List of arguments which can be taint sources and should be checked.
226  ArgVector SrcArgs;
227  /// List of arguments which should be tainted on function return.
228  ArgVector DstArgs;
229  /// Index for the first variadic parameter if exist.
230  unsigned VariadicIndex;
231  /// Show when a function has variadic parameters. If it has, it marks all
232  /// of them as source or destination.
233  VariadicType VarType;
234  /// Special function for tainted source determination. If defined, it can
235  /// override the default behavior.
236  PropagationFuncType PropagationFunc;
237 
238  TaintPropagationRule()
239  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
240  PropagationFunc(nullptr) {}
241 
242  TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
243  VariadicType Var = VariadicType::None,
244  unsigned VarIndex = InvalidArgIndex,
245  PropagationFuncType Func = nullptr)
246  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
247  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
248 
249  /// Get the propagation rule for a given function.
250  static TaintPropagationRule
251  getTaintPropagationRule(const NameRuleMap &CustomPropagations,
252  const FunctionData &FData, CheckerContext &C);
253 
254  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
255  void addDstArg(unsigned A) { DstArgs.push_back(A); }
256 
257  bool isNull() const {
258  return SrcArgs.empty() && DstArgs.empty() &&
259  VariadicType::None == VarType;
260  }
261 
262  bool isDestinationArgument(unsigned ArgNum) const {
263  return llvm::is_contained(DstArgs, ArgNum);
264  }
265 
266  static bool isTaintedOrPointsToTainted(const Expr *E,
267  const ProgramStateRef &State,
268  CheckerContext &C) {
269  if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
270  return true;
271 
272  if (!E->getType().getTypePtr()->isPointerType())
273  return false;
274 
275  Optional<SVal> V = getPointeeOf(C, E);
276  return (V && isTainted(State, *V));
277  }
278 
279  /// Pre-process a function which propagates taint according to the
280  /// taint rule.
281  ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
282 
283  // Functions for custom taintedness propagation.
284  static bool postSocket(bool IsTainted, const CallEvent &Call,
285  CheckerContext &C);
286  };
287 
288  /// Defines a map between the propagation function's name, scope
289  /// and TaintPropagationRule.
290  NameRuleMap CustomPropagations;
291 
292  /// Defines a map between the filter function's name, scope and filtering
293  /// args.
294  NameArgMap CustomFilters;
295 
296  /// Defines a map between the sink function's name, scope and sinking args.
297  NameArgMap CustomSinks;
298 };
299 
300 const unsigned GenericTaintChecker::ReturnValueIndex;
301 const unsigned GenericTaintChecker::InvalidArgIndex;
302 
303 // FIXME: these lines can be removed in C++17
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
308 } // end of anonymous namespace
309 
310 using TaintConfig = GenericTaintChecker::TaintConfiguration;
311 
312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314 
315 namespace llvm {
316 namespace yaml {
317 template <> struct MappingTraits<TaintConfig> {
318  static void mapping(IO &IO, TaintConfig &Config) {
319  IO.mapOptional("Propagations", Config.Propagations);
320  IO.mapOptional("Filters", Config.Filters);
321  IO.mapOptional("Sinks", Config.Sinks);
322  }
323 };
324 
325 template <> struct MappingTraits<TaintConfig::Propagation> {
326  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
327  IO.mapRequired("Name", Propagation.Name);
328  IO.mapOptional("Scope", Propagation.Scope);
329  IO.mapOptional("SrcArgs", Propagation.SrcArgs);
330  IO.mapOptional("DstArgs", Propagation.DstArgs);
331  IO.mapOptional("VariadicType", Propagation.VarType,
333  IO.mapOptional("VariadicIndex", Propagation.VarIndex,
334  GenericTaintChecker::InvalidArgIndex);
335  }
336 };
337 
338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
339  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
340  IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
341  IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
342  IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343  }
344 };
345 
346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
347  static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
348  IO.mapRequired("Name", std::get<0>(NSA));
349  IO.mapOptional("Scope", std::get<1>(NSA));
350  IO.mapRequired("Args", std::get<2>(NSA));
351  }
352 };
353 } // namespace yaml
354 } // namespace llvm
355 
356 /// A set which is used to pass information from call pre-visit instruction
357 /// to the call post-visit. The values are unsigned integers, which are either
358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
359 /// points to data, which should be tainted on return.
360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
361 
362 GenericTaintChecker::ArgVector
363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
364  const std::string &Option,
365  const SignedArgVector &Args) {
366  ArgVector Result;
367  for (int Arg : Args) {
368  if (Arg == -1)
369  Result.push_back(ReturnValueIndex);
370  else if (Arg < -1) {
371  Result.push_back(InvalidArgIndex);
372  Mgr.reportInvalidCheckerOptionValue(
373  this, Option,
374  "an argument number for propagation rules greater or equal to -1");
375  } else
376  Result.push_back(static_cast<unsigned>(Arg));
377  }
378  return Result;
379 }
380 
381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
382  const std::string &Option,
383  TaintConfiguration &&Config) {
384  for (auto &P : Config.Propagations) {
385  GenericTaintChecker::CustomPropagations.emplace(
386  P.Name,
387  std::make_pair(P.Scope, TaintPropagationRule{
388  std::move(P.SrcArgs),
389  convertToArgVector(Mgr, Option, P.DstArgs),
390  P.VarType, P.VarIndex}));
391  }
392 
393  for (auto &F : Config.Filters) {
394  GenericTaintChecker::CustomFilters.emplace(
395  std::get<0>(F),
396  std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
397  }
398 
399  for (auto &S : Config.Sinks) {
400  GenericTaintChecker::CustomSinks.emplace(
401  std::get<0>(S),
402  std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403  }
404 }
405 
406 template <typename T>
407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
408  const FunctionData &FData) {
409  auto Range = Map.equal_range(std::string(FData.Name));
410  auto It =
411  std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
412  const auto &Value = Entry.second;
413  StringRef Scope = Value.first;
414  return Scope.empty() || FData.isInScope(Scope);
415  });
416  return It != Range.second ? It : Map.end();
417 }
418 
419 GenericTaintChecker::TaintPropagationRule
420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
421  const NameRuleMap &CustomPropagations, const FunctionData &FData,
422  CheckerContext &C) {
423  // TODO: Currently, we might lose precision here: we always mark a return
424  // value as tainted even if it's just a pointer, pointing to tainted data.
425 
426  // Check for exact name match for functions without builtin substitutes.
427  // Use qualified name, because these are C functions without namespace.
428  TaintPropagationRule Rule =
429  llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
430  // Source functions
431  // TODO: Add support for vfscanf & family.
432  .Case("fdopen", {{}, {ReturnValueIndex}})
433  .Case("fopen", {{}, {ReturnValueIndex}})
434  .Case("freopen", {{}, {ReturnValueIndex}})
435  .Case("getch", {{}, {ReturnValueIndex}})
436  .Case("getchar", {{}, {ReturnValueIndex}})
437  .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
438  .Case("gets", {{}, {0, ReturnValueIndex}})
439  .Case("scanf", {{}, {}, VariadicType::Dst, 1})
440  .Case("socket", {{},
441  {ReturnValueIndex},
443  InvalidArgIndex,
444  &TaintPropagationRule::postSocket})
445  .Case("wgetch", {{}, {ReturnValueIndex}})
446  // Propagating functions
447  .Case("atoi", {{0}, {ReturnValueIndex}})
448  .Case("atol", {{0}, {ReturnValueIndex}})
449  .Case("atoll", {{0}, {ReturnValueIndex}})
450  .Case("fgetc", {{0}, {ReturnValueIndex}})
451  .Case("fgetln", {{0}, {ReturnValueIndex}})
452  .Case("fgets", {{2}, {0, ReturnValueIndex}})
453  .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
454  .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
455  .Case("getc", {{0}, {ReturnValueIndex}})
456  .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
457  .Case("getdelim", {{3}, {0}})
458  .Case("getline", {{2}, {0}})
459  .Case("getw", {{0}, {ReturnValueIndex}})
460  .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
461  .Case("read", {{0, 2}, {1, ReturnValueIndex}})
462  .Case("strchr", {{0}, {ReturnValueIndex}})
463  .Case("strrchr", {{0}, {ReturnValueIndex}})
464  .Case("tolower", {{0}, {ReturnValueIndex}})
465  .Case("toupper", {{0}, {ReturnValueIndex}})
466  .Default({});
467 
468  if (!Rule.isNull())
469  return Rule;
470 
471  // `getenv` returns taint only in untrusted environments.
472  if (FData.FullName == "getenv") {
473  if (C.getAnalysisManager()
474  .getAnalyzerOptions()
475  .ShouldAssumeControlledEnvironment)
476  return {};
477  return {{}, {ReturnValueIndex}};
478  }
479 
480  assert(FData.FDecl);
481 
482  // Check if it's one of the memory setting/copying functions.
483  // This check is specialized but faster then calling isCLibraryFunction.
484  const FunctionDecl *FDecl = FData.FDecl;
485  unsigned BId = 0;
486  if ((BId = FDecl->getMemoryFunctionKind())) {
487  switch (BId) {
488  case Builtin::BImemcpy:
489  case Builtin::BImemmove:
490  case Builtin::BIstrncpy:
491  case Builtin::BIstrncat:
492  return {{1, 2}, {0, ReturnValueIndex}};
493  case Builtin::BIstrlcpy:
494  case Builtin::BIstrlcat:
495  return {{1, 2}, {0}};
496  case Builtin::BIstrndup:
497  return {{0, 1}, {ReturnValueIndex}};
498 
499  default:
500  break;
501  }
502  }
503 
504  // Process all other functions which could be defined as builtins.
505  if (Rule.isNull()) {
506  const auto OneOf = [FDecl](const auto &... Name) {
507  // FIXME: use fold expression in C++17
508  using unused = int[];
509  bool ret = false;
510  static_cast<void>(unused{
511  0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
512  return ret;
513  };
514  if (OneOf("snprintf"))
515  return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
516  if (OneOf("sprintf"))
517  return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
518  if (OneOf("strcpy", "stpcpy", "strcat"))
519  return {{1}, {0, ReturnValueIndex}};
520  if (OneOf("bcopy"))
521  return {{0, 2}, {1}};
522  if (OneOf("strdup", "strdupa", "wcsdup"))
523  return {{0}, {ReturnValueIndex}};
524  }
525 
526  // Skipping the following functions, since they might be used for cleansing or
527  // smart memory copy:
528  // - memccpy - copying until hitting a special character.
529 
530  auto It = findFunctionInConfig(CustomPropagations, FData);
531  if (It != CustomPropagations.end())
532  return It->second.second;
533  return {};
534 }
535 
536 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
537  CheckerContext &C) const {
539  if (!FData)
540  return;
541 
542  // Check for taintedness related errors first: system call, uncontrolled
543  // format string, tainted buffer size.
544  if (checkPre(Call, *FData, C))
545  return;
546 
547  // Marks the function's arguments and/or return value tainted if it present in
548  // the list.
549  if (addSourcesPre(Call, *FData, C))
550  return;
551 
552  addFiltersPre(Call, *FData, C);
553 }
554 
555 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
556  CheckerContext &C) const {
557  // Set the marked values as tainted. The return value only accessible from
558  // checkPostStmt.
559  propagateFromPre(Call, C);
560 }
561 
562 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
563  const char *NL, const char *Sep) const {
564  printTaint(State, Out, NL, Sep);
565 }
566 
567 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
568  const FunctionData &FData,
569  CheckerContext &C) const {
570  // First, try generating a propagation rule for this function.
571  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
572  this->CustomPropagations, FData, C);
573  if (!Rule.isNull()) {
574  ProgramStateRef State = Rule.process(Call, C);
575  if (State) {
576  C.addTransition(State);
577  return true;
578  }
579  }
580  return false;
581 }
582 
583 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
584  const FunctionData &FData,
585  CheckerContext &C) const {
586  auto It = findFunctionInConfig(CustomFilters, FData);
587  if (It == CustomFilters.end())
588  return false;
589 
590  ProgramStateRef State = C.getState();
591  const auto &Value = It->second;
592  const ArgVector &Args = Value.second;
593  for (unsigned ArgNum : Args) {
594  if (ArgNum >= Call.getNumArgs())
595  continue;
596 
597  const Expr *Arg = Call.getArgExpr(ArgNum);
598  Optional<SVal> V = getPointeeOf(C, Arg);
599  if (V)
600  State = removeTaint(State, *V);
601  }
602 
603  if (State != C.getState()) {
604  C.addTransition(State);
605  return true;
606  }
607  return false;
608 }
609 
610 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
611  CheckerContext &C) {
612  ProgramStateRef State = C.getState();
613 
614  // Depending on what was tainted at pre-visit, we determined a set of
615  // arguments which should be tainted after the function returns. These are
616  // stored in the state as TaintArgsOnPostVisit set.
617  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
618  if (TaintArgs.isEmpty())
619  return false;
620 
621  for (unsigned ArgNum : TaintArgs) {
622  // Special handling for the tainted return value.
623  if (ArgNum == ReturnValueIndex) {
624  State = addTaint(State, Call.getReturnValue());
625  continue;
626  }
627 
628  // The arguments are pointer arguments. The data they are pointing at is
629  // tainted after the call.
630  if (Call.getNumArgs() < (ArgNum + 1))
631  return false;
632  const Expr *Arg = Call.getArgExpr(ArgNum);
633  Optional<SVal> V = getPointeeOf(C, Arg);
634  if (V)
635  State = addTaint(State, *V);
636  }
637 
638  // Clear up the taint info from the state.
639  State = State->remove<TaintArgsOnPostVisit>();
640 
641  if (State != C.getState()) {
642  C.addTransition(State);
643  return true;
644  }
645  return false;
646 }
647 
648 bool GenericTaintChecker::checkPre(const CallEvent &Call,
649  const FunctionData &FData,
650  CheckerContext &C) const {
651  if (checkUncontrolledFormatString(Call, C))
652  return true;
653 
654  if (checkSystemCall(Call, FData.Name, C))
655  return true;
656 
657  if (checkTaintedBufferSize(Call, C))
658  return true;
659 
660  return checkCustomSinks(Call, FData, C);
661 }
662 
663 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
664  const Expr *Arg) {
665  ProgramStateRef State = C.getState();
666  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
667  if (AddrVal.isUnknownOrUndef())
668  return None;
669 
670  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
671  if (!AddrLoc)
672  return None;
673 
674  QualType ArgTy = Arg->getType().getCanonicalType();
675  if (!ArgTy->isPointerType())
676  return State->getSVal(*AddrLoc);
677 
678  QualType ValTy = ArgTy->getPointeeType();
679 
680  // Do not dereference void pointers. Treat them as byte pointers instead.
681  // FIXME: we might want to consider more than just the first byte.
682  if (ValTy->isVoidType())
683  ValTy = C.getASTContext().CharTy;
684 
685  return State->getSVal(*AddrLoc, ValTy);
686 }
687 
689 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
690  CheckerContext &C) const {
691  ProgramStateRef State = C.getState();
692 
693  // Check for taint in arguments.
694  bool IsTainted = true;
695  for (unsigned ArgNum : SrcArgs) {
696  if (ArgNum >= Call.getNumArgs())
697  continue;
698 
699  if ((IsTainted =
700  isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
701  break;
702  }
703 
704  // Check for taint in variadic arguments.
705  if (!IsTainted && VariadicType::Src == VarType) {
706  // Check if any of the arguments is tainted
707  for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
708  if ((IsTainted =
709  isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
710  break;
711  }
712  }
713 
714  if (PropagationFunc)
715  IsTainted = PropagationFunc(IsTainted, Call, C);
716 
717  if (!IsTainted)
718  return State;
719 
720  // Mark the arguments which should be tainted after the function returns.
721  for (unsigned ArgNum : DstArgs) {
722  // Should mark the return value?
723  if (ArgNum == ReturnValueIndex) {
724  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
725  continue;
726  }
727 
728  if (ArgNum >= Call.getNumArgs())
729  continue;
730 
731  // Mark the given argument.
732  State = State->add<TaintArgsOnPostVisit>(ArgNum);
733  }
734 
735  // Mark all variadic arguments tainted if present.
736  if (VariadicType::Dst == VarType) {
737  // For all pointer and references that were passed in:
738  // If they are not pointing to const data, mark data as tainted.
739  // TODO: So far we are just going one level down; ideally we'd need to
740  // recurse here.
741  for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
742  const Expr *Arg = Call.getArgExpr(i);
743  // Process pointer argument.
744  const Type *ArgTy = Arg->getType().getTypePtr();
745  QualType PType = ArgTy->getPointeeType();
746  if ((!PType.isNull() && !PType.isConstQualified()) ||
747  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
748  State = State->add<TaintArgsOnPostVisit>(i);
749  }
750  }
751  }
752 
753  return State;
754 }
755 
756 // If argument 0(protocol domain) is network, the return value should get taint.
757 bool GenericTaintChecker::TaintPropagationRule::postSocket(
758  bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
759  SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
760  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
761  // White list the internal communication protocols.
762  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
763  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
764  return false;
765  return true;
766 }
767 
768 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
769  ProgramStateRef State = C.getState();
770  SVal Val = C.getSVal(E);
771 
772  // stdin is a pointer, so it would be a region.
773  const MemRegion *MemReg = Val.getAsRegion();
774 
775  // The region should be symbolic, we do not know it's value.
776  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
777  if (!SymReg)
778  return false;
779 
780  // Get it's symbol and find the declaration region it's pointing to.
781  const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
782  if (!Sm)
783  return false;
784  const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
785  if (!DeclReg)
786  return false;
787 
788  // This region corresponds to a declaration, find out if it's a global/extern
789  // variable named stdin with the proper type.
790  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
791  D = D->getCanonicalDecl();
792  if (D->getName().contains("stdin") && D->isExternC()) {
793  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
794  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
795  C.getASTContext().getFILEType().getCanonicalType())
796  return true;
797  }
798  }
799  return false;
800 }
801 
802 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
803  const CheckerContext &C,
804  unsigned &ArgNum) {
805  // Find if the function contains a format string argument.
806  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
807  // vsnprintf, syslog, custom annotated functions.
808  const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
809  if (!FDecl)
810  return false;
811  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
812  ArgNum = Format->getFormatIdx() - 1;
813  if ((Format->getType()->getName() == "printf") &&
814  Call.getNumArgs() > ArgNum)
815  return true;
816  }
817 
818  // Or if a function is named setproctitle (this is a heuristic).
819  if (C.getCalleeName(FDecl).contains("setproctitle")) {
820  ArgNum = 0;
821  return true;
822  }
823 
824  return false;
825 }
826 
827 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
828  CheckerContext &C) const {
829  assert(E);
830 
831  // Check for taint.
832  ProgramStateRef State = C.getState();
833  Optional<SVal> PointedToSVal = getPointeeOf(C, E);
834  SVal TaintedSVal;
835  if (PointedToSVal && isTainted(State, *PointedToSVal))
836  TaintedSVal = *PointedToSVal;
837  else if (isTainted(State, E, C.getLocationContext()))
838  TaintedSVal = C.getSVal(E);
839  else
840  return false;
841 
842  // Generate diagnostic.
843  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
844  initBugType();
845  auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
846  report->addRange(E->getSourceRange());
847  report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
848  C.emitReport(std::move(report));
849  return true;
850  }
851  return false;
852 }
853 
854 bool GenericTaintChecker::checkUncontrolledFormatString(
855  const CallEvent &Call, CheckerContext &C) const {
856  // Check if the function contains a format string argument.
857  unsigned ArgNum = 0;
858  if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
859  return false;
860 
861  // If either the format string content or the pointer itself are tainted,
862  // warn.
863  return generateReportIfTainted(Call.getArgExpr(ArgNum),
864  MsgUncontrolledFormatString, C);
865 }
866 
867 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
868  CheckerContext &C) const {
869  // TODO: It might make sense to run this check on demand. In some cases,
870  // we should check if the environment has been cleansed here. We also might
871  // need to know if the user was reset before these calls(seteuid).
872  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
873  .Case("system", 0)
874  .Case("popen", 0)
875  .Case("execl", 0)
876  .Case("execle", 0)
877  .Case("execlp", 0)
878  .Case("execv", 0)
879  .Case("execvp", 0)
880  .Case("execvP", 0)
881  .Case("execve", 0)
882  .Case("dlopen", 0)
883  .Default(InvalidArgIndex);
884 
885  if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
886  return false;
887 
888  return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
889  C);
890 }
891 
892 // TODO: Should this check be a part of the CString checker?
893 // If yes, should taint be a global setting?
894 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
895  CheckerContext &C) const {
896  const auto *FDecl = Call.getDecl()->getAsFunction();
897  // If the function has a buffer size argument, set ArgNum.
898  unsigned ArgNum = InvalidArgIndex;
899  unsigned BId = 0;
900  if ((BId = FDecl->getMemoryFunctionKind())) {
901  switch (BId) {
902  case Builtin::BImemcpy:
903  case Builtin::BImemmove:
904  case Builtin::BIstrncpy:
905  ArgNum = 2;
906  break;
907  case Builtin::BIstrndup:
908  ArgNum = 1;
909  break;
910  default:
911  break;
912  }
913  }
914 
915  if (ArgNum == InvalidArgIndex) {
916  using CCtx = CheckerContext;
917  if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
918  CCtx::isCLibraryFunction(FDecl, "calloc") ||
919  CCtx::isCLibraryFunction(FDecl, "alloca"))
920  ArgNum = 0;
921  else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
922  ArgNum = 3;
923  else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
924  ArgNum = 1;
925  else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
926  ArgNum = 2;
927  }
928 
929  return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
930  generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
931  C);
932 }
933 
934 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
935  const FunctionData &FData,
936  CheckerContext &C) const {
937  auto It = findFunctionInConfig(CustomSinks, FData);
938  if (It == CustomSinks.end())
939  return false;
940 
941  const auto &Value = It->second;
942  const GenericTaintChecker::ArgVector &Args = Value.second;
943  for (unsigned ArgNum : Args) {
944  if (ArgNum >= Call.getNumArgs())
945  continue;
946 
947  if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
948  return true;
949  }
950 
951  return false;
952 }
953 
954 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
955  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
956  std::string Option{"Config"};
957  StringRef ConfigFile =
958  Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
960  getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
961  if (Config)
962  Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
963 }
964 
965 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
966  return true;
967 }
Builtins.h
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
llvm
Definition: Dominators.h:30
llvm::yaml::MappingTraits< TaintConfig::NameScopeArgs >::mapping
static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA)
Definition: GenericTaintChecker.cpp:347
llvm::yaml::MappingTraits< TaintConfig::Propagation >::mapping
static void mapping(IO &IO, TaintConfig::Propagation &Propagation)
Definition: GenericTaintChecker.cpp:326
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::QualType::isConstQualified
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6483
clang::format::parseConfiguration
std::error_code parseConfiguration(llvm::MemoryBufferRef Config, FormatStyle *Style, bool AllowUnknownOptions=false, llvm::SourceMgr::DiagHandlerTy DiagHandler=nullptr, void *DiagHandlerCtx=nullptr)
Parse configuration from YAML-formatted text.
Definition: Format.cpp:1580
clang::ento::taint::printTaint
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
llvm::SmallVector
Definition: LLVM.h:38
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:88
clang::Stmt::getSourceRange
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:324
clang::ento::ProgramStateRef
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Definition: ProgramState_Fwd.h:37
clang::QualType
A (possibly-)qualified type.
Definition: Type.h:673
Attr.h
AttributeLangSupport::C
@ C
Definition: SemaDeclAttr.cpp:54
clang::QualType::getCanonicalType
QualType getCanonicalType() const
Definition: Type.h:6463
llvm::Optional
Definition: LLVM.h:40
clang::Type::isVoidType
bool isVoidType() const
Definition: Type.h:6955
clang::ento::taint::isTainted
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
clang::index::SymbolRole::Call
@ Call
llvm::yaml::MappingTraits< TaintConfig >::mapping
static void mapping(IO &IO, TaintConfig &Config)
Definition: GenericTaintChecker.cpp:318
clang::Type
The base class of the type hierarchy.
Definition: Type.h:1490
CallEvent.h
clang::StructuralEquivalenceKind::Default
@ Default
clang::Type::isReferenceType
bool isReferenceType() const
Definition: Type.h:6684
V
#define V(N, I)
Definition: ASTContext.h:3121
BuiltinCheckerRegistration.h
clang::Decl::specific_attrs
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:529
clang::Decl::getKind
Kind getKind() const
Definition: DeclBase.h:433
clang::XRayInstrKind::None
constexpr XRayInstrMask None
Definition: XRayInstr.h:38
clang::Scope
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:40
CheckerManager.h
clang::ento::taint::addTaint
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
getPrintfFormatArgumentNum
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, unsigned &ArgNum)
Definition: GenericTaintChecker.cpp:802
bool
#define bool
Definition: stdbool.h:15
REGISTER_SET_WITH_PROGRAMSTATE
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Definition: ProgramStateTrait.h:109
clang::Type::getPointeeType
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:625
clang::FunctionDecl::getMemoryFunctionKind
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:4031
TaintConfig
GenericTaintChecker::TaintConfiguration TaintConfig
Definition: GenericTaintChecker.cpp:310
clang::ento::taint::removeTaint
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
clang::transformer::EditKind::Range
@ Range
P
StringRef P
Definition: ASTMatchersInternal.cpp:563
clang::serialized_diags::create
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
Definition: SerializedDiagnosticPrinter.cpp:302
clang::Type::isPointerType
bool isPointerType() const
Definition: Type.h:6672
Yaml.h
Taint.h
BugType.h
clang::QualType::isNull
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:738
Value
Value
Definition: UninitializedValues.cpp:102
State
LineState State
Definition: UnwrappedLineFormatter.cpp:986
clang::NamedDecl::getQualifiedNameAsString
std::string getQualifiedNameAsString() const
Definition: Decl.cpp:1582
llvm::yaml::ScalarEnumerationTraits< GenericTaintChecker::VariadicType >::enumeration
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value)
Definition: GenericTaintChecker.cpp:339
clang::ento::CheckerContext::isCLibraryFunction
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace,...
Definition: CheckerContext.cpp:48
CheckerContext.h
Checker.h
std
Definition: Format.h:4034
clang::Expr::IgnoreParens
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:2915
clang
Definition: CalledOnceCheck.h:17
clang::Expr::getType
QualType getType() const
Definition: Expr.h:141
clang::QualType::getTypePtr
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6424
ProgramStateTrait.h
clang::Expr
This represents one expression.
Definition: Expr.h:109
clang::FunctionDecl
Represents a function declaration or definition.
Definition: Decl.h:1856