29#include "llvm/ADT/StringExtras.h"
30#include "llvm/Support/YAMLTraits.h"
38#define DEBUG_TYPE "taint-checker"
44using llvm::ImmutableSet;
48class GenericTaintChecker;
51constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
64constexpr llvm::StringLiteral MsgTaintedBufferSize =
65 "Untrusted data is used to specify the buffer size "
66 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67 "for character data and the null terminator)";
70constexpr llvm::StringLiteral MsgCustomSink =
71 "Untrusted data is passed to a user-defined sink";
77constexpr ArgIdxTy ReturnValueIndex{-1};
79static ArgIdxTy fromArgumentCount(
unsigned Count) {
81 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
82 "ArgIdxTy is not large enough to represent the number of arguments.");
93 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
99 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
105 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
106 D = D->getCanonicalDecl();
107 if (D->getName() ==
"stdin" && D->hasExternalStorage() && D->isExternC()) {
109 const QualType Ty = D->getType().getCanonicalType();
119 const QualType ArgTy = LValue.
getType(State->getStateManager().getContext());
121 return State->getSVal(LValue);
125 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
131 return getPointeeOf(State, *LValue);
140 if (
auto Pointee = getPointeeOf(State, Arg))
150 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
157 std::vector<SymbolRef> TaintedSymbols,
158 std::vector<ArgIdxTy> TaintedArgs,
160 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
161 TaintedArgs = std::move(TaintedArgs), CallLocation](
169 if (TaintedSymbols.empty())
170 return "Taint originated here";
172 for (
auto Sym : TaintedSymbols) {
175 LLVM_DEBUG(
for (
auto Arg
177 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
186const NoteTag *taintPropagationExplainerTag(
188 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
189 assert(TaintedSymbols.size() == TaintedArgs.size());
190 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
191 TaintedArgs = std::move(TaintedArgs), CallLocation](
194 llvm::raw_svector_ostream Out(Msg);
196 if (TaintedSymbols.empty() ||
200 int nofTaintedArgs = 0;
201 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
204 if (TaintedArgs[Idx] != ReturnValueIndex) {
205 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
206 << TaintedArgs[Idx] + 1 <<
"\n");
207 if (nofTaintedArgs == 0)
208 Out <<
"Taint propagated to the ";
211 Out << TaintedArgs[Idx] + 1
212 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
215 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
216 Out <<
"Taint propagated to the return value";
220 return std::string(Out.str());
230 ArgSet(ArgVecTy &&DiscreteArgs,
231 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
232 : DiscreteArgs(std::move(DiscreteArgs)),
233 VariadicIndex(std::move(VariadicIndex)) {}
235 bool contains(ArgIdxTy ArgIdx)
const {
236 if (llvm::is_contained(DiscreteArgs, ArgIdx))
239 return VariadicIndex && ArgIdx >= *VariadicIndex;
242 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
245 ArgVecTy DiscreteArgs;
246 std::optional<ArgIdxTy> VariadicIndex;
254class GenericTaintRule {
267 std::optional<StringRef> SinkMsg;
269 GenericTaintRule() =
default;
271 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
272 std::optional<StringRef> SinkMsg = std::nullopt)
273 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
274 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
280 static GenericTaintRule Sink(ArgSet &&SinkArgs,
281 std::optional<StringRef> Msg = std::nullopt) {
282 return {std::move(SinkArgs), {}, {}, {}, Msg};
286 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
287 return {{}, std::move(FilterArgs), {}, {}};
292 static GenericTaintRule Source(ArgSet &&SourceArgs) {
293 return {{}, {}, {}, std::move(SourceArgs)};
297 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
298 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
302 static GenericTaintRule
303 SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
304 std::optional<StringRef> Msg = std::nullopt) {
306 std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
316 return ArgIdx == ReturnValueIndex ?
Call.getOriginExpr()
317 :
Call.getArgExpr(ArgIdx);
327struct TaintConfiguration {
328 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
329 enum class VariadicType {
None, Src, Dst };
336 struct Sink : Common {
340 struct Filter : Common {
344 struct Propagation : Common {
347 VariadicType VarType;
351 std::vector<Propagation> Propagations;
352 std::vector<Filter> Filters;
353 std::vector<Sink> Sinks;
355 TaintConfiguration() =
default;
356 TaintConfiguration(
const TaintConfiguration &) =
default;
357 TaintConfiguration(TaintConfiguration &&) =
default;
358 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
359 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
362struct GenericTaintRuleParser {
367 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
368 RulesContTy parseConfiguration(
const std::string &Option,
369 TaintConfiguration &&Config)
const;
376 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
378 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
382 template <
typename Config>
383 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
386 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&
P,
387 RulesContTy &Rules)
const;
388 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&
P,
389 RulesContTy &Rules)
const;
390 void parseConfig(
const std::string &Option,
391 TaintConfiguration::Propagation &&
P,
392 RulesContTy &Rules)
const;
397class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
403 const char *Sep)
const override;
406 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
429 mutable std::optional<RuleLookupTy> StaticTaintRules;
430 mutable std::optional<RuleLookupTy> DynamicTaintRules;
435LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
436LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
437LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
441template <>
struct MappingTraits<TaintConfiguration> {
442 static void mapping(IO &IO, TaintConfiguration &Config) {
443 IO.mapOptional(
"Propagations", Config.Propagations);
444 IO.mapOptional(
"Filters", Config.Filters);
445 IO.mapOptional(
"Sinks", Config.Sinks);
449template <>
struct MappingTraits<TaintConfiguration::Sink> {
450 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
451 IO.mapRequired(
"Name", Sink.Name);
452 IO.mapOptional(
"Scope", Sink.Scope);
453 IO.mapRequired(
"Args", Sink.SinkArgs);
457template <>
struct MappingTraits<TaintConfiguration::Filter> {
458 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
459 IO.mapRequired(
"Name", Filter.Name);
460 IO.mapOptional(
"Scope", Filter.Scope);
461 IO.mapRequired(
"Args", Filter.FilterArgs);
465template <>
struct MappingTraits<TaintConfiguration::Propagation> {
466 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
467 IO.mapRequired(
"Name", Propagation.Name);
468 IO.mapOptional(
"Scope", Propagation.Scope);
469 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
470 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
471 IO.mapOptional(
"VariadicType", Propagation.VarType);
472 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
476template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
478 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
479 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
480 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
491 ImmutableSet<ArgIdxTy>)
494void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
495 const ArgVecTy &Args)
const {
496 for (ArgIdxTy Arg : Args) {
497 if (Arg < ReturnValueIndex) {
498 Mgr.reportInvalidCheckerOptionValue(
499 Mgr.getChecker<GenericTaintChecker>(), Option,
500 "an argument number for propagation rules greater or equal to -1");
505template <
typename Config>
507GenericTaintRuleParser::parseNameParts(
const Config &
C) {
508 NamePartsTy NameParts;
509 if (!
C.Scope.empty()) {
512 StringRef{
C.Scope}.split(NameParts,
"::", -1,
515 NameParts.emplace_back(
C.Name);
519template <
typename Config>
520void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
521 GenericTaintRule &&Rule,
522 RulesContTy &Rules) {
523 NamePartsTy NameParts = parseNameParts(
C);
527void GenericTaintRuleParser::parseConfig(
const std::string &Option,
528 TaintConfiguration::Sink &&S,
529 RulesContTy &Rules)
const {
530 validateArgVector(Option, S.SinkArgs);
531 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
535void GenericTaintRuleParser::parseConfig(
const std::string &Option,
536 TaintConfiguration::Filter &&S,
537 RulesContTy &Rules)
const {
538 validateArgVector(Option, S.FilterArgs);
539 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
543void GenericTaintRuleParser::parseConfig(
const std::string &Option,
544 TaintConfiguration::Propagation &&
P,
545 RulesContTy &Rules)
const {
546 validateArgVector(Option,
P.SrcArgs);
547 validateArgVector(Option,
P.DstArgs);
548 bool IsSrcVariadic =
P.VarType == TaintConfiguration::VariadicType::Src;
549 bool IsDstVariadic =
P.VarType == TaintConfiguration::VariadicType::Dst;
550 std::optional<ArgIdxTy> JustVarIndex =
P.VarIndex;
552 ArgSet SrcDesc(std::move(
P.SrcArgs),
553 IsSrcVariadic ? JustVarIndex : std::nullopt);
554 ArgSet DstDesc(std::move(
P.DstArgs),
555 IsDstVariadic ? JustVarIndex : std::nullopt);
557 consumeRulesFromConfig(
558 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
561GenericTaintRuleParser::RulesContTy
562GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
563 TaintConfiguration &&Config)
const {
567 for (
auto &F : Config.Filters)
568 parseConfig(Option, std::move(F), Rules);
570 for (
auto &S : Config.Sinks)
571 parseConfig(Option, std::move(S), Rules);
573 for (
auto &
P : Config.Propagations)
574 parseConfig(Option, std::move(
P), Rules);
583 if (StaticTaintRules || DynamicTaintRules)
586 using RulesConstructionTy =
587 std::vector<std::pair<CallDescription, GenericTaintRule>>;
588 using TR = GenericTaintRule;
592 RulesConstructionTy GlobalCRules{
594 {{{
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
595 {{{
"fopen"}}, TR::Source({{ReturnValueIndex}})},
596 {{{
"freopen"}}, TR::Source({{ReturnValueIndex}})},
597 {{{
"getch"}}, TR::Source({{ReturnValueIndex}})},
598 {{{
"getchar"}}, TR::Source({{ReturnValueIndex}})},
599 {{{
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
600 {{{
"gets"}}, TR::Source({{0}, ReturnValueIndex})},
601 {{{
"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
602 {{{
"scanf"}}, TR::Source({{}, 1})},
603 {{{
"scanf_s"}}, TR::Source({{}, {1}})},
604 {{{
"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
610 {{{
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
611 {{{
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
612 {{{
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
613 {{{
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
614 {{{
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
615 {{{
"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
616 {{{
"gethostname"}}, TR::Source({{0}})},
617 {{{
"getnameinfo"}}, TR::Source({{2, 4}})},
618 {{{
"getseuserbyname"}}, TR::Source({{1, 2}})},
619 {{{
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
620 {{{
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
621 {{{
"getlogin_r"}}, TR::Source({{0}})},
624 {{{
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
625 {{{
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
626 {{{
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
627 {{{
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
628 {{{
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
629 {{{
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630 {{{
"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
631 {{{
"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
632 {{{
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
633 {{{
"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
634 {{{
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
636 {{{
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
637 {{{
"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
638 {{{
"getdelim"}}, TR::Prop({{3}}, {{0}})},
639 {{{
"getline"}}, TR::Prop({{2}}, {{0}})},
640 {{{
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
641 {{{
"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
642 {{{
"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
643 {{{
"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644 {{{
"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
645 {{{
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646 {{{
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
647 {{{
"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
648 {{{
"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
649 {{{
"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
651 {{{
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
652 {{{
"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
654 {{{
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655 {{{
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
656 {{{
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
657 {{{
"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
658 {{{
"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
659 {{{
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
661 {{{
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
662 {{{
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
663 {{{
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
665 {{{
"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
666 {{{
"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
667 {{{
"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
673 {{{
"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
676 {{{
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
677 {{{
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
679 {{{
"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
681 {{{
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{{
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
686 {{{
"qsort"}}, TR::Prop({{0}}, {{0}})},
687 {{{
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
689 {{{
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
690 {{{
"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
691 {{{
"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
692 {{{
"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
693 {{{
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694 {{{
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
695 {{{
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696 {{{
"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697 {{{
"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
702 {{{
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
703 {{{
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
704 {{{
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
705 {{{
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
707 {{{
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
708 {{{
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
709 {{{
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710 {{{
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
711 {{{
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712 {{{
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
713 {{{
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
714 {{{
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
715 {{{
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716 {{{
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
717 {{{
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{{
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719 {{{
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
722 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
724 TR::Prop({{1, 2}}, {{0}})},
726 TR::Prop({{1, 2}}, {{0}})},
728 TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
730 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
732 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
734 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
736 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
738 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
741 TR::Prop({{0}}, {{ReturnValueIndex}})},
745 {{{
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
746 {{{
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
747 {{{
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
748 {{{
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
749 {{{
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
750 {{{
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
751 {{{
"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
752 {{{
"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
753 {{{
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
754 {{{
"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
755 {{{
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
760 TR::Sink({{3}}, MsgTaintedBufferSize)},
762 TR::Sink({{1}}, MsgTaintedBufferSize)},
763 {{{{
"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
764 {{{{
"setproctitle_fast"}}},
765 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
769 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
770 MsgTaintedBufferSize)},
772 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
773 MsgTaintedBufferSize)},
775 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
776 MsgTaintedBufferSize)},
778 TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
779 MsgTaintedBufferSize)},
781 TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
784 if (TR::UntrustedEnv(
C)) {
786 GlobalCRules.push_back(
787 {{{
"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
788 GlobalCRules.push_back({{{
"getenv"}}, TR::Source({{ReturnValueIndex}})});
791 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
792 std::make_move_iterator(GlobalCRules.end()));
797 GenericTaintRuleParser ConfigParser{*Mgr};
798 std::string Option{
"Config"};
799 StringRef ConfigFile =
801 std::optional<TaintConfiguration> Config =
802 getConfiguration<TaintConfiguration>(*Mgr,
this, Option, ConfigFile);
805 DynamicTaintRules = RuleLookupTy{};
809 GenericTaintRuleParser::RulesContTy Rules{
810 ConfigParser.parseConfiguration(Option, std::move(*Config))};
812 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
813 std::make_move_iterator(Rules.end()));
821 if (
const auto *Rule =
822 Call.isGlobalCFunction() ? StaticTaintRules->lookup(
Call) :
nullptr)
823 Rule->process(*
this,
Call,
C);
824 else if (
const auto *Rule = DynamicTaintRules->lookup(
Call))
825 Rule->process(*
this,
Call,
C);
832 checkUncontrolledFormatString(
Call,
C);
836 taintUnsafeSocketProtocol(
Call,
C);
839void GenericTaintChecker::checkPostCall(
const CallEvent &
Call,
849 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
851 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
854 assert(!TaintArgs->isEmpty());
856 LLVM_DEBUG(
for (ArgIdxTy I
858 llvm::dbgs() <<
"PostCall<";
859 Call.dump(llvm::dbgs());
860 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
863 const NoteTag *InjectionTag =
nullptr;
864 std::vector<SymbolRef> TaintedSymbols;
865 std::vector<ArgIdxTy> TaintedIndexes;
866 for (ArgIdxTy ArgNum : *TaintArgs) {
868 if (ArgNum == ReturnValueIndex) {
870 std::vector<SymbolRef> TaintedSyms =
872 if (!TaintedSyms.empty()) {
873 TaintedSymbols.push_back(TaintedSyms[0]);
874 TaintedIndexes.push_back(ArgNum);
880 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
883 if (!TaintedSyms.empty()) {
884 TaintedSymbols.push_back(TaintedSyms[0]);
885 TaintedIndexes.push_back(ArgNum);
891 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
892 Call.getCalleeStackFrame(0));
894 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
895 C.addTransition(State, InjectionTag);
898void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
899 const char *NL,
const char *Sep)
const {
903void GenericTaintRule::process(
const GenericTaintChecker &
Checker,
906 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
909 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
910 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
911 const Expr *E = GetArgExpr(I,
Call);
912 Fun(I, E,
C.getSVal(E));
917 ForEachCallArg([
this, &
Checker, &
C, &State](ArgIdxTy I,
const Expr *E,
SVal) {
919 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
920 State = addTaint(State, C.getSVal(E));
922 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(E)))
923 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink),
C);
927 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *E,
SVal S) {
928 if (FilterArgs.contains(I)) {
929 State = removeTaint(State, S);
930 if (auto P = getPointeeOf(State, S))
931 State = removeTaint(State, *P);
940 bool IsMatching = PropSrcArgs.isEmpty();
941 std::vector<SymbolRef> TaintedSymbols;
942 std::vector<ArgIdxTy> TaintedIndexes;
943 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
944 &TaintedIndexes](ArgIdxTy I,
const Expr *E,
SVal) {
945 std::optional<SVal> TaintedSVal =
946 getTaintedPointeeOrPointer(State,
C.getSVal(E));
948 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
951 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
952 std::vector<SymbolRef> TaintedArgSyms =
954 if (!TaintedArgSyms.empty()) {
955 llvm::append_range(TaintedSymbols, TaintedArgSyms);
956 TaintedIndexes.push_back(I);
970 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
971 const bool IsNonConstPtr =
972 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
974 return IsNonConstRef || IsNonConstPtr;
978 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
979 ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
982 if (PropDstArgs.contains(I)) {
983 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<"; Call.dump(llvm::dbgs());
985 <<
"> prepares tainting arg index: " << I <<
'\n';);
986 Result = F.add(Result, I);
995 if (WouldEscape(
V, E->
getType()) && getTaintedPointeeOrPointer(State,
V)) {
996 LLVM_DEBUG(
if (!Result.contains(I)) {
997 llvm::dbgs() <<
"PreCall<";
998 Call.dump(llvm::dbgs());
999 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
1001 Result = F.
add(Result, I);
1005 if (!Result.isEmpty())
1006 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1007 const NoteTag *InjectionTag = taintOriginTrackerTag(
1008 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1009 Call.getCalleeStackFrame(0));
1010 C.addTransition(State, InjectionTag);
1014 return !
C.getAnalysisManager()
1015 .getAnalyzerOptions()
1016 .ShouldAssumeControlledEnvironment;
1019bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1022 std::optional<SVal> TaintedSVal =
1023 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1030 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1033 report->markInteresting(TaintedSym);
1036 C.emitReport(std::move(report));
1054 const Decl *CallDecl =
Call.getDecl();
1061 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1064 ArgNum = Format->getFormatIdx() - 1;
1065 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1072bool GenericTaintChecker::checkUncontrolledFormatString(
1075 ArgIdxTy ArgNum = 0;
1081 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1082 MsgUncontrolledFormatString,
C);
1085void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &
Call,
1087 if (
Call.getNumArgs() < 1)
1092 if (!
ID->getName().equals(
"socket"))
1096 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1098 bool SafeProtocol = DomName.equals(
"AF_SYSTEM") ||
1099 DomName.equals(
"AF_LOCAL") || DomName.equals(
"AF_UNIX") ||
1100 DomName.equals(
"AF_RESERVED_36");
1105 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1106 ImmutableSet<ArgIdxTy> Result = F.
add(F.getEmptySet(), ReturnValueIndex);
1107 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1108 C.addTransition(State);
1116bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
Defines enum values for all the target-independent builtin functions.
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
One of these records is kept for each identifier that is lexed.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
This class represents a description of a function call using the number of arguments and the name of ...
Represents an abstract call to a function or method along a particular path.
virtual void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const
See CheckerManager::runCheckersForPrintState.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
@ CDF_MaybeBuiltin
Describes a C standard function that is sometimes implemented as a macro that expands to a compiler b...
for(unsigned I=0, E=TL.getNumArgs();I !=E;++I)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
@ None
The alignment was not explicit in code.
YAML serialization mapping.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)