clang  11.0.0git
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
43  static void *getTag() {
44  static int Tag;
45  return &Tag;
46  }
47 
48  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52  const char *Sep) const override;
53 
54  using ArgVector = SmallVector<unsigned, 2>;
55  using SignedArgVector = SmallVector<int, 2>;
56 
57  enum class VariadicType { None, Src, Dst };
58 
59  /// Used to parse the configuration file.
60  struct TaintConfiguration {
61  using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63  struct Propagation {
64  std::string Name;
65  std::string Scope;
66  ArgVector SrcArgs;
67  SignedArgVector DstArgs;
68  VariadicType VarType;
69  unsigned VarIndex;
70  };
71 
72  std::vector<Propagation> Propagations;
73  std::vector<NameScopeArgs> Filters;
74  std::vector<NameScopeArgs> Sinks;
75 
76  TaintConfiguration() = default;
77  TaintConfiguration(const TaintConfiguration &) = default;
78  TaintConfiguration(TaintConfiguration &&) = default;
79  TaintConfiguration &operator=(const TaintConfiguration &) = default;
80  TaintConfiguration &operator=(TaintConfiguration &&) = default;
81  };
82 
83  /// Convert SignedArgVector to ArgVector.
84  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85  const SignedArgVector &Args);
86 
87  /// Parse the config.
88  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89  TaintConfiguration &&Config);
90 
91  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92  /// Denotes the return vale.
93  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94  1};
95 
96 private:
97  mutable std::unique_ptr<BugType> BT;
98  void initBugType() const {
99  if (!BT)
100  BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101  "Untrusted Data");
102  }
103 
104  struct FunctionData {
105  FunctionData() = delete;
106  FunctionData(const FunctionData &) = default;
107  FunctionData(FunctionData &&) = default;
108  FunctionData &operator=(const FunctionData &) = delete;
109  FunctionData &operator=(FunctionData &&) = delete;
110 
111  static Optional<FunctionData> create(const CallEvent &Call,
112  const CheckerContext &C) {
113  if (!Call.getDecl())
114  return None;
115 
116  const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
117  if (!FDecl || (FDecl->getKind() != Decl::Function &&
118  FDecl->getKind() != Decl::CXXMethod))
119  return None;
120 
121  StringRef Name = C.getCalleeName(FDecl);
122  std::string FullName = FDecl->getQualifiedNameAsString();
123  if (Name.empty() || FullName.empty())
124  return None;
125 
126  return FunctionData{FDecl, Name, FullName};
127  }
128 
129  bool isInScope(StringRef Scope) const {
130  return StringRef(FullName).startswith(Scope);
131  }
132 
133  const FunctionDecl *const FDecl;
134  const StringRef Name;
135  const std::string FullName;
136  };
137 
138  /// Catch taint related bugs. Check if tainted data is passed to a
139  /// system call etc. Returns true on matching.
140  bool checkPre(const CallEvent &Call, const FunctionData &FData,
141  CheckerContext &C) const;
142 
143  /// Add taint sources on a pre-visit. Returns true on matching.
144  bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
145  CheckerContext &C) const;
146 
147  /// Mark filter's arguments not tainted on a pre-visit. Returns true on
148  /// matching.
149  bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
150  CheckerContext &C) const;
151 
152  /// Propagate taint generated at pre-visit. Returns true on matching.
153  static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
154 
155  /// Check if the region the expression evaluates to is the standard input,
156  /// and thus, is tainted.
157  static bool isStdin(const Expr *E, CheckerContext &C);
158 
159  /// Given a pointer argument, return the value it points to.
160  static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
161 
162  /// Check for CWE-134: Uncontrolled Format String.
163  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
164  "Untrusted data is used as a format string "
165  "(CWE-134: Uncontrolled Format String)";
166  bool checkUncontrolledFormatString(const CallEvent &Call,
167  CheckerContext &C) const;
168 
169  /// Check for:
170  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
171  /// CWE-78, "Failure to Sanitize Data into an OS Command"
172  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
173  "Untrusted data is passed to a system call "
174  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
175  bool checkSystemCall(const CallEvent &Call, StringRef Name,
176  CheckerContext &C) const;
177 
178  /// Check if tainted data is used as a buffer size ins strn.. functions,
179  /// and allocators.
180  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
181  "Untrusted data is used to specify the buffer size "
182  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
183  "for character data and the null terminator)";
184  bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
185 
186  /// Check if tainted data is used as a custom sink's parameter.
187  static constexpr llvm::StringLiteral MsgCustomSink =
188  "Untrusted data is passed to a user-defined sink";
189  bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
190  CheckerContext &C) const;
191 
192  /// Generate a report if the expression is tainted or points to tainted data.
193  bool generateReportIfTainted(const Expr *E, StringRef Msg,
194  CheckerContext &C) const;
195 
196  struct TaintPropagationRule;
197  template <typename T>
198  using ConfigDataMap =
199  std::unordered_multimap<std::string, std::pair<std::string, T>>;
200  using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
201  using NameArgMap = ConfigDataMap<ArgVector>;
202 
203  /// Find a function with the given name and scope. Returns the first match
204  /// or the end of the map.
205  template <typename T>
206  static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
207  const FunctionData &FData);
208 
209  /// A struct used to specify taint propagation rules for a function.
210  ///
211  /// If any of the possible taint source arguments is tainted, all of the
212  /// destination arguments should also be tainted. Use InvalidArgIndex in the
213  /// src list to specify that all of the arguments can introduce taint. Use
214  /// InvalidArgIndex in the dst arguments to signify that all the non-const
215  /// pointer and reference arguments might be tainted on return. If
216  /// ReturnValueIndex is added to the dst list, the return value will be
217  /// tainted.
218  struct TaintPropagationRule {
219  using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
220  CheckerContext &C);
221 
222  /// List of arguments which can be taint sources and should be checked.
223  ArgVector SrcArgs;
224  /// List of arguments which should be tainted on function return.
225  ArgVector DstArgs;
226  /// Index for the first variadic parameter if exist.
227  unsigned VariadicIndex;
228  /// Show when a function has variadic parameters. If it has, it marks all
229  /// of them as source or destination.
230  VariadicType VarType;
231  /// Special function for tainted source determination. If defined, it can
232  /// override the default behavior.
233  PropagationFuncType PropagationFunc;
234 
235  TaintPropagationRule()
236  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
237  PropagationFunc(nullptr) {}
238 
239  TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
240  VariadicType Var = VariadicType::None,
241  unsigned VarIndex = InvalidArgIndex,
242  PropagationFuncType Func = nullptr)
243  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
244  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
245 
246  /// Get the propagation rule for a given function.
247  static TaintPropagationRule
248  getTaintPropagationRule(const NameRuleMap &CustomPropagations,
249  const FunctionData &FData, CheckerContext &C);
250 
251  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
252  void addDstArg(unsigned A) { DstArgs.push_back(A); }
253 
254  bool isNull() const {
255  return SrcArgs.empty() && DstArgs.empty() &&
256  VariadicType::None == VarType;
257  }
258 
259  bool isDestinationArgument(unsigned ArgNum) const {
260  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
261  }
262 
263  static bool isTaintedOrPointsToTainted(const Expr *E,
264  const ProgramStateRef &State,
265  CheckerContext &C) {
266  if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
267  return true;
268 
269  if (!E->getType().getTypePtr()->isPointerType())
270  return false;
271 
272  Optional<SVal> V = getPointeeOf(C, E);
273  return (V && isTainted(State, *V));
274  }
275 
276  /// Pre-process a function which propagates taint according to the
277  /// taint rule.
278  ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
279 
280  // Functions for custom taintedness propagation.
281  static bool postSocket(bool IsTainted, const CallEvent &Call,
282  CheckerContext &C);
283  };
284 
285  /// Defines a map between the propagation function's name, scope
286  /// and TaintPropagationRule.
287  NameRuleMap CustomPropagations;
288 
289  /// Defines a map between the filter function's name, scope and filtering
290  /// args.
291  NameArgMap CustomFilters;
292 
293  /// Defines a map between the sink function's name, scope and sinking args.
294  NameArgMap CustomSinks;
295 };
296 
297 const unsigned GenericTaintChecker::ReturnValueIndex;
298 const unsigned GenericTaintChecker::InvalidArgIndex;
299 
300 // FIXME: these lines can be removed in C++17
301 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
302 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
303 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
305 } // end of anonymous namespace
306 
307 using TaintConfig = GenericTaintChecker::TaintConfiguration;
308 
309 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
310 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
311 
312 namespace llvm {
313 namespace yaml {
314 template <> struct MappingTraits<TaintConfig> {
315  static void mapping(IO &IO, TaintConfig &Config) {
316  IO.mapOptional("Propagations", Config.Propagations);
317  IO.mapOptional("Filters", Config.Filters);
318  IO.mapOptional("Sinks", Config.Sinks);
319  }
320 };
321 
322 template <> struct MappingTraits<TaintConfig::Propagation> {
323  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
324  IO.mapRequired("Name", Propagation.Name);
325  IO.mapOptional("Scope", Propagation.Scope);
326  IO.mapOptional("SrcArgs", Propagation.SrcArgs);
327  IO.mapOptional("DstArgs", Propagation.DstArgs);
328  IO.mapOptional("VariadicType", Propagation.VarType,
330  IO.mapOptional("VariadicIndex", Propagation.VarIndex,
331  GenericTaintChecker::InvalidArgIndex);
332  }
333 };
334 
335 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
336  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
337  IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
338  IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
339  IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
340  }
341 };
342 
343 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
344  static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
345  IO.mapRequired("Name", std::get<0>(NSA));
346  IO.mapOptional("Scope", std::get<1>(NSA));
347  IO.mapRequired("Args", std::get<2>(NSA));
348  }
349 };
350 } // namespace yaml
351 } // namespace llvm
352 
353 /// A set which is used to pass information from call pre-visit instruction
354 /// to the call post-visit. The values are unsigned integers, which are either
355 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
356 /// points to data, which should be tainted on return.
357 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
358 
360 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
361  const std::string &Option,
362  const SignedArgVector &Args) {
363  ArgVector Result;
364  for (int Arg : Args) {
365  if (Arg == -1)
366  Result.push_back(ReturnValueIndex);
367  else if (Arg < -1) {
368  Result.push_back(InvalidArgIndex);
369  Mgr.reportInvalidCheckerOptionValue(
370  this, Option,
371  "an argument number for propagation rules greater or equal to -1");
372  } else
373  Result.push_back(static_cast<unsigned>(Arg));
374  }
375  return Result;
376 }
377 
378 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
379  const std::string &Option,
380  TaintConfiguration &&Config) {
381  for (auto &P : Config.Propagations) {
382  GenericTaintChecker::CustomPropagations.emplace(
383  P.Name,
384  std::make_pair(P.Scope, TaintPropagationRule{
385  std::move(P.SrcArgs),
386  convertToArgVector(Mgr, Option, P.DstArgs),
387  P.VarType, P.VarIndex}));
388  }
389 
390  for (auto &F : Config.Filters) {
391  GenericTaintChecker::CustomFilters.emplace(
392  std::get<0>(F),
393  std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
394  }
395 
396  for (auto &S : Config.Sinks) {
397  GenericTaintChecker::CustomSinks.emplace(
398  std::get<0>(S),
399  std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
400  }
401 }
402 
403 template <typename T>
404 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
405  const FunctionData &FData) {
406  auto Range = Map.equal_range(std::string(FData.Name));
407  auto It =
408  std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
409  const auto &Value = Entry.second;
410  StringRef Scope = Value.first;
411  return Scope.empty() || FData.isInScope(Scope);
412  });
413  return It != Range.second ? It : Map.end();
414 }
415 
416 GenericTaintChecker::TaintPropagationRule
417 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
418  const NameRuleMap &CustomPropagations, const FunctionData &FData,
419  CheckerContext &C) {
420  // TODO: Currently, we might lose precision here: we always mark a return
421  // value as tainted even if it's just a pointer, pointing to tainted data.
422 
423  // Check for exact name match for functions without builtin substitutes.
424  // Use qualified name, because these are C functions without namespace.
425  TaintPropagationRule Rule =
426  llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
427  // Source functions
428  // TODO: Add support for vfscanf & family.
429  .Case("fdopen", {{}, {ReturnValueIndex}})
430  .Case("fopen", {{}, {ReturnValueIndex}})
431  .Case("freopen", {{}, {ReturnValueIndex}})
432  .Case("getch", {{}, {ReturnValueIndex}})
433  .Case("getchar", {{}, {ReturnValueIndex}})
434  .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
435  .Case("getenv", {{}, {ReturnValueIndex}})
436  .Case("gets", {{}, {0, ReturnValueIndex}})
437  .Case("scanf", {{}, {}, VariadicType::Dst, 1})
438  .Case("socket", {{},
439  {ReturnValueIndex},
441  InvalidArgIndex,
442  &TaintPropagationRule::postSocket})
443  .Case("wgetch", {{}, {ReturnValueIndex}})
444  // Propagating functions
445  .Case("atoi", {{0}, {ReturnValueIndex}})
446  .Case("atol", {{0}, {ReturnValueIndex}})
447  .Case("atoll", {{0}, {ReturnValueIndex}})
448  .Case("fgetc", {{0}, {ReturnValueIndex}})
449  .Case("fgetln", {{0}, {ReturnValueIndex}})
450  .Case("fgets", {{2}, {0, ReturnValueIndex}})
451  .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
452  .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
453  .Case("getc", {{0}, {ReturnValueIndex}})
454  .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
455  .Case("getdelim", {{3}, {0}})
456  .Case("getline", {{2}, {0}})
457  .Case("getw", {{0}, {ReturnValueIndex}})
458  .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
459  .Case("read", {{0, 2}, {1, ReturnValueIndex}})
460  .Case("strchr", {{0}, {ReturnValueIndex}})
461  .Case("strrchr", {{0}, {ReturnValueIndex}})
462  .Case("tolower", {{0}, {ReturnValueIndex}})
463  .Case("toupper", {{0}, {ReturnValueIndex}})
464  .Default({});
465 
466  if (!Rule.isNull())
467  return Rule;
468  assert(FData.FDecl);
469 
470  // Check if it's one of the memory setting/copying functions.
471  // This check is specialized but faster then calling isCLibraryFunction.
472  const FunctionDecl *FDecl = FData.FDecl;
473  unsigned BId = 0;
474  if ((BId = FDecl->getMemoryFunctionKind())) {
475  switch (BId) {
476  case Builtin::BImemcpy:
477  case Builtin::BImemmove:
478  case Builtin::BIstrncpy:
479  case Builtin::BIstrncat:
480  return {{1, 2}, {0, ReturnValueIndex}};
481  case Builtin::BIstrlcpy:
482  case Builtin::BIstrlcat:
483  return {{1, 2}, {0}};
484  case Builtin::BIstrndup:
485  return {{0, 1}, {ReturnValueIndex}};
486 
487  default:
488  break;
489  }
490  }
491 
492  // Process all other functions which could be defined as builtins.
493  if (Rule.isNull()) {
494  const auto OneOf = [FDecl](const auto &... Name) {
495  // FIXME: use fold expression in C++17
496  using unused = int[];
497  bool ret = false;
498  static_cast<void>(unused{
499  0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
500  return ret;
501  };
502  if (OneOf("snprintf"))
503  return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
504  if (OneOf("sprintf"))
505  return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
506  if (OneOf("strcpy", "stpcpy", "strcat"))
507  return {{1}, {0, ReturnValueIndex}};
508  if (OneOf("bcopy"))
509  return {{0, 2}, {1}};
510  if (OneOf("strdup", "strdupa", "wcsdup"))
511  return {{0}, {ReturnValueIndex}};
512  }
513 
514  // Skipping the following functions, since they might be used for cleansing or
515  // smart memory copy:
516  // - memccpy - copying until hitting a special character.
517 
518  auto It = findFunctionInConfig(CustomPropagations, FData);
519  if (It != CustomPropagations.end())
520  return It->second.second;
521  return {};
522 }
523 
524 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
525  CheckerContext &C) const {
527  if (!FData)
528  return;
529 
530  // Check for taintedness related errors first: system call, uncontrolled
531  // format string, tainted buffer size.
532  if (checkPre(Call, *FData, C))
533  return;
534 
535  // Marks the function's arguments and/or return value tainted if it present in
536  // the list.
537  if (addSourcesPre(Call, *FData, C))
538  return;
539 
540  addFiltersPre(Call, *FData, C);
541 }
542 
543 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
544  CheckerContext &C) const {
545  // Set the marked values as tainted. The return value only accessible from
546  // checkPostStmt.
547  propagateFromPre(Call, C);
548 }
549 
550 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
551  const char *NL, const char *Sep) const {
552  printTaint(State, Out, NL, Sep);
553 }
554 
555 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
556  const FunctionData &FData,
557  CheckerContext &C) const {
558  // First, try generating a propagation rule for this function.
559  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
560  this->CustomPropagations, FData, C);
561  if (!Rule.isNull()) {
562  ProgramStateRef State = Rule.process(Call, C);
563  if (State) {
564  C.addTransition(State);
565  return true;
566  }
567  }
568  return false;
569 }
570 
571 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
572  const FunctionData &FData,
573  CheckerContext &C) const {
574  auto It = findFunctionInConfig(CustomFilters, FData);
575  if (It == CustomFilters.end())
576  return false;
577 
578  ProgramStateRef State = C.getState();
579  const auto &Value = It->second;
580  const ArgVector &Args = Value.second;
581  for (unsigned ArgNum : Args) {
582  if (ArgNum >= Call.getNumArgs())
583  continue;
584 
585  const Expr *Arg = Call.getArgExpr(ArgNum);
586  Optional<SVal> V = getPointeeOf(C, Arg);
587  if (V)
588  State = removeTaint(State, *V);
589  }
590 
591  if (State != C.getState()) {
592  C.addTransition(State);
593  return true;
594  }
595  return false;
596 }
597 
598 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
599  CheckerContext &C) {
600  ProgramStateRef State = C.getState();
601 
602  // Depending on what was tainted at pre-visit, we determined a set of
603  // arguments which should be tainted after the function returns. These are
604  // stored in the state as TaintArgsOnPostVisit set.
605  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
606  if (TaintArgs.isEmpty())
607  return false;
608 
609  for (unsigned ArgNum : TaintArgs) {
610  // Special handling for the tainted return value.
611  if (ArgNum == ReturnValueIndex) {
612  State = addTaint(State, Call.getReturnValue());
613  continue;
614  }
615 
616  // The arguments are pointer arguments. The data they are pointing at is
617  // tainted after the call.
618  if (Call.getNumArgs() < (ArgNum + 1))
619  return false;
620  const Expr *Arg = Call.getArgExpr(ArgNum);
621  Optional<SVal> V = getPointeeOf(C, Arg);
622  if (V)
623  State = addTaint(State, *V);
624  }
625 
626  // Clear up the taint info from the state.
627  State = State->remove<TaintArgsOnPostVisit>();
628 
629  if (State != C.getState()) {
630  C.addTransition(State);
631  return true;
632  }
633  return false;
634 }
635 
636 bool GenericTaintChecker::checkPre(const CallEvent &Call,
637  const FunctionData &FData,
638  CheckerContext &C) const {
639  if (checkUncontrolledFormatString(Call, C))
640  return true;
641 
642  if (checkSystemCall(Call, FData.Name, C))
643  return true;
644 
645  if (checkTaintedBufferSize(Call, C))
646  return true;
647 
648  return checkCustomSinks(Call, FData, C);
649 }
650 
651 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
652  const Expr *Arg) {
653  ProgramStateRef State = C.getState();
654  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
655  if (AddrVal.isUnknownOrUndef())
656  return None;
657 
658  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
659  if (!AddrLoc)
660  return None;
661 
662  QualType ArgTy = Arg->getType().getCanonicalType();
663  if (!ArgTy->isPointerType())
664  return State->getSVal(*AddrLoc);
665 
666  QualType ValTy = ArgTy->getPointeeType();
667 
668  // Do not dereference void pointers. Treat them as byte pointers instead.
669  // FIXME: we might want to consider more than just the first byte.
670  if (ValTy->isVoidType())
671  ValTy = C.getASTContext().CharTy;
672 
673  return State->getSVal(*AddrLoc, ValTy);
674 }
675 
677 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
678  CheckerContext &C) const {
679  ProgramStateRef State = C.getState();
680 
681  // Check for taint in arguments.
682  bool IsTainted = true;
683  for (unsigned ArgNum : SrcArgs) {
684  if (ArgNum >= Call.getNumArgs())
685  continue;
686 
687  if ((IsTainted =
688  isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
689  break;
690  }
691 
692  // Check for taint in variadic arguments.
693  if (!IsTainted && VariadicType::Src == VarType) {
694  // Check if any of the arguments is tainted
695  for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
696  if ((IsTainted =
697  isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
698  break;
699  }
700  }
701 
702  if (PropagationFunc)
703  IsTainted = PropagationFunc(IsTainted, Call, C);
704 
705  if (!IsTainted)
706  return State;
707 
708  // Mark the arguments which should be tainted after the function returns.
709  for (unsigned ArgNum : DstArgs) {
710  // Should mark the return value?
711  if (ArgNum == ReturnValueIndex) {
712  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
713  continue;
714  }
715 
716  if (ArgNum >= Call.getNumArgs())
717  continue;
718 
719  // Mark the given argument.
720  State = State->add<TaintArgsOnPostVisit>(ArgNum);
721  }
722 
723  // Mark all variadic arguments tainted if present.
724  if (VariadicType::Dst == VarType) {
725  // For all pointer and references that were passed in:
726  // If they are not pointing to const data, mark data as tainted.
727  // TODO: So far we are just going one level down; ideally we'd need to
728  // recurse here.
729  for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
730  const Expr *Arg = Call.getArgExpr(i);
731  // Process pointer argument.
732  const Type *ArgTy = Arg->getType().getTypePtr();
733  QualType PType = ArgTy->getPointeeType();
734  if ((!PType.isNull() && !PType.isConstQualified()) ||
735  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
736  State = State->add<TaintArgsOnPostVisit>(i);
737  }
738  }
739  }
740 
741  return State;
742 }
743 
744 // If argument 0(protocol domain) is network, the return value should get taint.
745 bool GenericTaintChecker::TaintPropagationRule::postSocket(
746  bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
747  SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
748  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
749  // White list the internal communication protocols.
750  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
751  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
752  return false;
753  return true;
754 }
755 
756 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
757  ProgramStateRef State = C.getState();
758  SVal Val = C.getSVal(E);
759 
760  // stdin is a pointer, so it would be a region.
761  const MemRegion *MemReg = Val.getAsRegion();
762 
763  // The region should be symbolic, we do not know it's value.
764  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
765  if (!SymReg)
766  return false;
767 
768  // Get it's symbol and find the declaration region it's pointing to.
769  const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
770  if (!Sm)
771  return false;
772  const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
773  if (!DeclReg)
774  return false;
775 
776  // This region corresponds to a declaration, find out if it's a global/extern
777  // variable named stdin with the proper type.
778  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
779  D = D->getCanonicalDecl();
780  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
781  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
782  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
783  C.getASTContext().getFILEType().getCanonicalType())
784  return true;
785  }
786  }
787  return false;
788 }
789 
790 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
791  const CheckerContext &C,
792  unsigned &ArgNum) {
793  // Find if the function contains a format string argument.
794  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
795  // vsnprintf, syslog, custom annotated functions.
796  const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
797  if (!FDecl)
798  return false;
799  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
800  ArgNum = Format->getFormatIdx() - 1;
801  if ((Format->getType()->getName() == "printf") &&
802  Call.getNumArgs() > ArgNum)
803  return true;
804  }
805 
806  // Or if a function is named setproctitle (this is a heuristic).
807  if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
808  ArgNum = 0;
809  return true;
810  }
811 
812  return false;
813 }
814 
815 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
816  CheckerContext &C) const {
817  assert(E);
818 
819  // Check for taint.
820  ProgramStateRef State = C.getState();
821  Optional<SVal> PointedToSVal = getPointeeOf(C, E);
822  SVal TaintedSVal;
823  if (PointedToSVal && isTainted(State, *PointedToSVal))
824  TaintedSVal = *PointedToSVal;
825  else if (isTainted(State, E, C.getLocationContext()))
826  TaintedSVal = C.getSVal(E);
827  else
828  return false;
829 
830  // Generate diagnostic.
831  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
832  initBugType();
833  auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
834  report->addRange(E->getSourceRange());
835  report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
836  C.emitReport(std::move(report));
837  return true;
838  }
839  return false;
840 }
841 
842 bool GenericTaintChecker::checkUncontrolledFormatString(
843  const CallEvent &Call, CheckerContext &C) const {
844  // Check if the function contains a format string argument.
845  unsigned ArgNum = 0;
846  if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
847  return false;
848 
849  // If either the format string content or the pointer itself are tainted,
850  // warn.
851  return generateReportIfTainted(Call.getArgExpr(ArgNum),
852  MsgUncontrolledFormatString, C);
853 }
854 
855 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
856  CheckerContext &C) const {
857  // TODO: It might make sense to run this check on demand. In some cases,
858  // we should check if the environment has been cleansed here. We also might
859  // need to know if the user was reset before these calls(seteuid).
860  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
861  .Case("system", 0)
862  .Case("popen", 0)
863  .Case("execl", 0)
864  .Case("execle", 0)
865  .Case("execlp", 0)
866  .Case("execv", 0)
867  .Case("execvp", 0)
868  .Case("execvP", 0)
869  .Case("execve", 0)
870  .Case("dlopen", 0)
871  .Default(InvalidArgIndex);
872 
873  if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
874  return false;
875 
876  return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
877  C);
878 }
879 
880 // TODO: Should this check be a part of the CString checker?
881 // If yes, should taint be a global setting?
882 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
883  CheckerContext &C) const {
884  const auto *FDecl = Call.getDecl()->getAsFunction();
885  // If the function has a buffer size argument, set ArgNum.
886  unsigned ArgNum = InvalidArgIndex;
887  unsigned BId = 0;
888  if ((BId = FDecl->getMemoryFunctionKind())) {
889  switch (BId) {
890  case Builtin::BImemcpy:
891  case Builtin::BImemmove:
892  case Builtin::BIstrncpy:
893  ArgNum = 2;
894  break;
895  case Builtin::BIstrndup:
896  ArgNum = 1;
897  break;
898  default:
899  break;
900  }
901  }
902 
903  if (ArgNum == InvalidArgIndex) {
904  using CCtx = CheckerContext;
905  if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
906  CCtx::isCLibraryFunction(FDecl, "calloc") ||
907  CCtx::isCLibraryFunction(FDecl, "alloca"))
908  ArgNum = 0;
909  else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
910  ArgNum = 3;
911  else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
912  ArgNum = 1;
913  else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
914  ArgNum = 2;
915  }
916 
917  return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
918  generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
919  C);
920 }
921 
922 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
923  const FunctionData &FData,
924  CheckerContext &C) const {
925  auto It = findFunctionInConfig(CustomSinks, FData);
926  if (It == CustomSinks.end())
927  return false;
928 
929  const auto &Value = It->second;
930  const GenericTaintChecker::ArgVector &Args = Value.second;
931  for (unsigned ArgNum : Args) {
932  if (ArgNum >= Call.getNumArgs())
933  continue;
934 
935  if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
936  return true;
937  }
938 
939  return false;
940 }
941 
942 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
943  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
944  std::string Option{"Config"};
945  StringRef ConfigFile =
946  Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
948  getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
949  if (Config)
950  Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
951 }
952 
953 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
954  return true;
955 }
Represents a function declaration or definition.
Definition: Decl.h:1783
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value)
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3889
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2645
A (possibly-)qualified type.
Definition: Type.h:655
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:627
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
StringRef P
The base class of the type hierarchy.
Definition: Type.h:1472
LineState State
static void mapping(IO &IO, TaintConfig::Propagation &Propagation)
Definition: Format.h:2679
bool isReferenceType() const
Definition: Type.h:6662
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:40
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6402
GenericTaintChecker::TaintConfiguration TaintConfig
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\, const char *sep="")
This represents one expression.
Definition: Expr.h:110
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
#define V(N, I)
Definition: ASTContext.h:2899
#define bool
Definition: stdbool.h:15
QualType getType() const
Definition: Expr.h:142
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:720
static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA)
static void mapping(IO &IO, TaintConfig &Config)
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6461
QualType getCanonicalType() const
Definition: Type.h:6441
Encodes a location in the source.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
constexpr XRayInstrMask None
Definition: XRayInstr.h:38
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
Kind getKind() const
Definition: DeclBase.h:433
__DEVICE__ int max(int __a, int __b)
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:529
bool isVoidType() const
Definition: Type.h:6933
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:263
std::string getQualifiedNameAsString() const
Definition: Decl.cpp:1530
bool isPointerType() const
Definition: Type.h:6650
std::error_code parseConfiguration(StringRef Text, FormatStyle *Style)
Parse configuration from YAML-formatted text.
Definition: Format.cpp:1278
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, unsigned &ArgNum)
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2943