29#include "llvm/ADT/StringExtras.h"
30#include "llvm/Support/YAMLTraits.h"
38#define DEBUG_TYPE "taint-checker"
44using llvm::ImmutableSet;
48class GenericTaintChecker;
51constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
64constexpr llvm::StringLiteral MsgTaintedBufferSize =
65 "Untrusted data is used to specify the buffer size "
66 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67 "for character data and the null terminator)";
70constexpr llvm::StringLiteral MsgCustomSink =
71 "Untrusted data is passed to a user-defined sink";
77constexpr ArgIdxTy ReturnValueIndex{-1};
79static ArgIdxTy fromArgumentCount(
unsigned Count) {
81 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
82 "ArgIdxTy is not large enough to represent the number of arguments.");
93 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
99 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
105 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
106 D = D->getCanonicalDecl();
108 if (D->getName().contains(
"stdin") && D->isExternC()) {
110 const QualType Ty = D->getType().getCanonicalType();
120 const QualType ArgTy = LValue.
getType(State->getStateManager().getContext());
122 return State->getSVal(LValue);
126 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
132 return getPointeeOf(State, *LValue);
141 if (
auto Pointee = getPointeeOf(State, Arg))
151 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
158 std::vector<SymbolRef> TaintedSymbols,
159 std::vector<ArgIdxTy> TaintedArgs,
161 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
162 TaintedArgs = std::move(TaintedArgs), CallLocation](
170 if (TaintedSymbols.empty())
171 return "Taint originated here";
173 for (
auto Sym : TaintedSymbols) {
176 LLVM_DEBUG(
for (
auto Arg
178 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
187const NoteTag *taintPropagationExplainerTag(
189 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
190 assert(TaintedSymbols.size() == TaintedArgs.size());
191 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
192 TaintedArgs = std::move(TaintedArgs), CallLocation](
195 llvm::raw_svector_ostream Out(Msg);
197 if (TaintedSymbols.empty() ||
201 int nofTaintedArgs = 0;
202 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
205 if (TaintedArgs[Idx] != ReturnValueIndex) {
206 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
207 << TaintedArgs[Idx] + 1 <<
"\n");
208 if (nofTaintedArgs == 0)
209 Out <<
"Taint propagated to the ";
212 Out << TaintedArgs[Idx] + 1
213 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
216 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
217 Out <<
"Taint propagated to the return value";
221 return std::string(Out.str());
231 ArgSet(ArgVecTy &&DiscreteArgs,
232 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
233 : DiscreteArgs(std::move(DiscreteArgs)),
234 VariadicIndex(std::move(VariadicIndex)) {}
236 bool contains(ArgIdxTy ArgIdx)
const {
237 if (llvm::is_contained(DiscreteArgs, ArgIdx))
240 return VariadicIndex && ArgIdx >= *VariadicIndex;
243 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
246 ArgVecTy DiscreteArgs;
247 std::optional<ArgIdxTy> VariadicIndex;
255class GenericTaintRule {
268 std::optional<StringRef> SinkMsg;
270 GenericTaintRule() =
default;
272 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
273 std::optional<StringRef> SinkMsg = std::nullopt)
274 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
275 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
281 static GenericTaintRule Sink(ArgSet &&SinkArgs,
282 std::optional<StringRef> Msg = std::nullopt) {
283 return {std::move(SinkArgs), {}, {}, {}, Msg};
287 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
288 return {{}, std::move(FilterArgs), {}, {}};
293 static GenericTaintRule Source(ArgSet &&SourceArgs) {
294 return {{}, {}, {}, std::move(SourceArgs)};
298 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
299 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
303 static GenericTaintRule
304 SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
305 std::optional<StringRef> Msg = std::nullopt) {
307 std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
316 static const Expr *GetArgExpr(ArgIdxTy ArgIdx,
const CallEvent &Call) {
317 return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
318 : Call.getArgExpr(ArgIdx);
328struct TaintConfiguration {
329 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
330 enum class VariadicType {
None, Src, Dst };
337 struct Sink : Common {
341 struct Filter : Common {
345 struct Propagation : Common {
348 VariadicType VarType;
352 std::vector<Propagation> Propagations;
353 std::vector<Filter> Filters;
354 std::vector<Sink> Sinks;
356 TaintConfiguration() =
default;
357 TaintConfiguration(
const TaintConfiguration &) =
default;
358 TaintConfiguration(TaintConfiguration &&) =
default;
359 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
360 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
363struct GenericTaintRuleParser {
368 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
369 RulesContTy parseConfiguration(
const std::string &Option,
370 TaintConfiguration &&Config)
const;
377 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
379 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
383 template <
typename Config>
384 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
387 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&
P,
388 RulesContTy &Rules)
const;
389 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&
P,
390 RulesContTy &Rules)
const;
391 void parseConfig(
const std::string &Option,
392 TaintConfiguration::Propagation &&
P,
393 RulesContTy &Rules)
const;
398class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
404 const char *Sep)
const override;
407 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
413 bool checkUncontrolledFormatString(
const CallEvent &Call,
416 void taintUnsafeSocketProtocol(
const CallEvent &Call,
430 mutable std::optional<RuleLookupTy> StaticTaintRules;
431 mutable std::optional<RuleLookupTy> DynamicTaintRules;
436LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
437LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
438LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
442template <>
struct MappingTraits<TaintConfiguration> {
443 static void mapping(IO &IO, TaintConfiguration &Config) {
444 IO.mapOptional(
"Propagations", Config.Propagations);
445 IO.mapOptional(
"Filters", Config.Filters);
446 IO.mapOptional(
"Sinks", Config.Sinks);
450template <>
struct MappingTraits<TaintConfiguration::Sink> {
451 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
452 IO.mapRequired(
"Name", Sink.Name);
453 IO.mapOptional(
"Scope", Sink.Scope);
454 IO.mapRequired(
"Args", Sink.SinkArgs);
458template <>
struct MappingTraits<TaintConfiguration::Filter> {
459 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
460 IO.mapRequired(
"Name", Filter.Name);
461 IO.mapOptional(
"Scope", Filter.Scope);
462 IO.mapRequired(
"Args", Filter.FilterArgs);
466template <>
struct MappingTraits<TaintConfiguration::Propagation> {
467 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
468 IO.mapRequired(
"Name", Propagation.Name);
469 IO.mapOptional(
"Scope", Propagation.Scope);
470 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
471 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
472 IO.mapOptional(
"VariadicType", Propagation.VarType);
473 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
477template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
479 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
480 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
481 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
492 ImmutableSet<ArgIdxTy>)
495void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
496 const ArgVecTy &Args)
const {
497 for (ArgIdxTy Arg : Args) {
498 if (Arg < ReturnValueIndex) {
499 Mgr.reportInvalidCheckerOptionValue(
500 Mgr.getChecker<GenericTaintChecker>(), Option,
501 "an argument number for propagation rules greater or equal to -1");
506template <
typename Config>
508GenericTaintRuleParser::parseNameParts(
const Config &
C) {
509 NamePartsTy NameParts;
510 if (!
C.Scope.empty()) {
513 StringRef{
C.Scope}.split(NameParts,
"::", -1,
516 NameParts.emplace_back(
C.Name);
520template <
typename Config>
521void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
522 GenericTaintRule &&Rule,
523 RulesContTy &Rules) {
524 NamePartsTy NameParts = parseNameParts(
C);
528void GenericTaintRuleParser::parseConfig(
const std::string &Option,
529 TaintConfiguration::Sink &&S,
530 RulesContTy &Rules)
const {
531 validateArgVector(Option, S.SinkArgs);
532 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
536void GenericTaintRuleParser::parseConfig(
const std::string &Option,
537 TaintConfiguration::Filter &&S,
538 RulesContTy &Rules)
const {
539 validateArgVector(Option, S.FilterArgs);
540 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
544void GenericTaintRuleParser::parseConfig(
const std::string &Option,
545 TaintConfiguration::Propagation &&
P,
546 RulesContTy &Rules)
const {
547 validateArgVector(Option,
P.SrcArgs);
548 validateArgVector(Option,
P.DstArgs);
549 bool IsSrcVariadic =
P.VarType == TaintConfiguration::VariadicType::Src;
550 bool IsDstVariadic =
P.VarType == TaintConfiguration::VariadicType::Dst;
551 std::optional<ArgIdxTy> JustVarIndex =
P.VarIndex;
553 ArgSet SrcDesc(std::move(
P.SrcArgs),
554 IsSrcVariadic ? JustVarIndex : std::nullopt);
555 ArgSet DstDesc(std::move(
P.DstArgs),
556 IsDstVariadic ? JustVarIndex : std::nullopt);
558 consumeRulesFromConfig(
559 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
562GenericTaintRuleParser::RulesContTy
563GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
564 TaintConfiguration &&Config)
const {
568 for (
auto &F : Config.Filters)
569 parseConfig(Option, std::move(F), Rules);
571 for (
auto &S : Config.Sinks)
572 parseConfig(Option, std::move(S), Rules);
574 for (
auto &
P : Config.Propagations)
575 parseConfig(Option, std::move(
P), Rules);
584 if (StaticTaintRules || DynamicTaintRules)
587 using RulesConstructionTy =
588 std::vector<std::pair<CallDescription, GenericTaintRule>>;
589 using TR = GenericTaintRule;
593 RulesConstructionTy GlobalCRules{
595 {{{
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
596 {{{
"fopen"}}, TR::Source({{ReturnValueIndex}})},
597 {{{
"freopen"}}, TR::Source({{ReturnValueIndex}})},
598 {{{
"getch"}}, TR::Source({{ReturnValueIndex}})},
599 {{{
"getchar"}}, TR::Source({{ReturnValueIndex}})},
600 {{{
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
601 {{{
"gets"}}, TR::Source({{0}, ReturnValueIndex})},
602 {{{
"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
603 {{{
"scanf"}}, TR::Source({{}, 1})},
604 {{{
"scanf_s"}}, TR::Source({{}, {1}})},
605 {{{
"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
611 {{{
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
612 {{{
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
613 {{{
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
614 {{{
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
615 {{{
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
616 {{{
"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
617 {{{
"gethostname"}}, TR::Source({{0}})},
618 {{{
"getnameinfo"}}, TR::Source({{2, 4}})},
619 {{{
"getseuserbyname"}}, TR::Source({{1, 2}})},
620 {{{
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
621 {{{
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
622 {{{
"getlogin_r"}}, TR::Source({{0}})},
625 {{{
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
626 {{{
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
627 {{{
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
628 {{{
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
629 {{{
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630 {{{
"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
631 {{{
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
632 {{{
"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
633 {{{
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
635 {{{
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
636 {{{
"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
637 {{{
"getdelim"}}, TR::Prop({{3}}, {{0}})},
638 {{{
"getline"}}, TR::Prop({{2}}, {{0}})},
639 {{{
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
640 {{{
"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
641 {{{
"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
642 {{{
"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
643 {{{
"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644 {{{
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
645 {{{
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646 {{{
"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
647 {{{
"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
648 {{{
"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
650 {{{
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
651 {{{
"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
653 {{{
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
654 {{{
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655 {{{
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
656 {{{
"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
657 {{{
"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
658 {{{
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
660 {{{
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
661 {{{
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
662 {{{
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
664 {{{
"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
665 {{{
"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
666 {{{
"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
672 {{{
"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
675 {{{
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
676 {{{
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
678 {{{
"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
680 {{{
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
681 {{{
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
685 {{{
"qsort"}}, TR::Prop({{0}}, {{0}})},
686 {{{
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
688 {{{
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
689 {{{
"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
690 {{{
"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
691 {{{
"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
692 {{{
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
693 {{{
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694 {{{
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
695 {{{
"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696 {{{
"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697 {{{
"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
698 {{{
"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
699 {{{
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
700 {{{
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
701 {{{
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
702 {{{
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
704 {{{
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
705 {{{
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
706 {{{
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
707 {{{
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
708 {{{
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
709 {{{
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710 {{{
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
711 {{{
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712 {{{
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
713 {{{
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
714 {{{
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
715 {{{
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716 {{{
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
721 TR::Prop({{1, 2}}, {{0}})},
723 TR::Prop({{1, 2}}, {{0}})},
725 TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
727 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
729 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
731 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
733 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
736 TR::Prop({{0}}, {{ReturnValueIndex}})},
740 {{{
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
741 {{{
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
742 {{{
"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
743 {{{
"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
744 {{{
"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
745 {{{
"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
746 {{{
"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
747 {{{
"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
748 {{{
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
753 TR::Sink({{3}}, MsgTaintedBufferSize)},
755 TR::Sink({{1}}, MsgTaintedBufferSize)},
756 {{{{
"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
757 {{{{
"setproctitle_fast"}}},
758 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
762 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
763 MsgTaintedBufferSize)},
765 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
766 MsgTaintedBufferSize)},
768 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
769 MsgTaintedBufferSize)},
771 TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
772 MsgTaintedBufferSize)},
774 TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
777 if (TR::UntrustedEnv(
C)) {
779 GlobalCRules.push_back(
780 {{{
"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
781 GlobalCRules.push_back({{{
"getenv"}}, TR::Source({{ReturnValueIndex}})});
784 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
785 std::make_move_iterator(GlobalCRules.end()));
790 GenericTaintRuleParser ConfigParser{*Mgr};
791 std::string Option{
"Config"};
792 StringRef ConfigFile =
794 std::optional<TaintConfiguration> Config =
795 getConfiguration<TaintConfiguration>(*Mgr,
this, Option, ConfigFile);
798 DynamicTaintRules = RuleLookupTy{};
802 GenericTaintRuleParser::RulesContTy Rules{
803 ConfigParser.parseConfiguration(Option, std::move(*Config))};
805 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
806 std::make_move_iterator(Rules.end()));
809void GenericTaintChecker::checkPreCall(
const CallEvent &Call,
814 if (
const auto *Rule =
815 Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) :
nullptr)
816 Rule->process(*
this, Call,
C);
817 else if (
const auto *Rule = DynamicTaintRules->lookup(Call))
818 Rule->process(*
this, Call,
C);
825 checkUncontrolledFormatString(Call,
C);
829 taintUnsafeSocketProtocol(Call,
C);
832void GenericTaintChecker::checkPostCall(
const CallEvent &Call,
842 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
844 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
847 assert(!TaintArgs->isEmpty());
849 LLVM_DEBUG(
for (ArgIdxTy I
851 llvm::dbgs() <<
"PostCall<";
852 Call.dump(llvm::dbgs());
853 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
856 const NoteTag *InjectionTag =
nullptr;
857 std::vector<SymbolRef> TaintedSymbols;
858 std::vector<ArgIdxTy> TaintedIndexes;
859 for (ArgIdxTy ArgNum : *TaintArgs) {
861 if (ArgNum == ReturnValueIndex) {
863 std::vector<SymbolRef> TaintedSyms =
865 if (!TaintedSyms.empty()) {
866 TaintedSymbols.push_back(TaintedSyms[0]);
867 TaintedIndexes.push_back(ArgNum);
873 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
876 if (!TaintedSyms.empty()) {
877 TaintedSymbols.push_back(TaintedSyms[0]);
878 TaintedIndexes.push_back(ArgNum);
884 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
885 Call.getCalleeStackFrame(0));
887 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
888 C.addTransition(State, InjectionTag);
891void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
892 const char *NL,
const char *Sep)
const {
896void GenericTaintRule::process(
const GenericTaintChecker &
Checker,
899 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
902 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
903 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
904 const Expr *E = GetArgExpr(I, Call);
905 Fun(I, E,
C.getSVal(E));
910 ForEachCallArg([
this, &
Checker, &
C, &State](ArgIdxTy I,
const Expr *E,
SVal) {
912 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
913 State = addTaint(State, C.getSVal(E));
915 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(E)))
916 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink),
C);
920 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *E,
SVal S) {
921 if (FilterArgs.contains(I)) {
922 State = removeTaint(State, S);
923 if (auto P = getPointeeOf(State, S))
924 State = removeTaint(State, *P);
931 bool IsMatching = PropSrcArgs.isEmpty();
932 std::vector<SymbolRef> TaintedSymbols;
933 std::vector<ArgIdxTy> TaintedIndexes;
934 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
935 &TaintedIndexes](ArgIdxTy I,
const Expr *E,
SVal) {
936 std::optional<SVal> TaintedSVal =
937 getTaintedPointeeOrPointer(State,
C.getSVal(E));
939 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
942 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
943 std::vector<SymbolRef> TaintedArgSyms =
945 if (!TaintedArgSyms.empty()) {
946 llvm::append_range(TaintedSymbols, TaintedArgSyms);
947 TaintedIndexes.push_back(I);
959 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
960 const bool IsNonConstPtr =
961 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
963 return IsNonConstRef || IsNonConstPtr;
967 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
968 ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
971 if (PropDstArgs.contains(I)) {
972 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<"; Call.dump(llvm::dbgs());
974 <<
"> prepares tainting arg index: " << I <<
'\n';);
975 Result = F.add(Result, I);
982 LLVM_DEBUG(if (!Result.contains(I)) {
983 llvm::dbgs() <<
"PreCall<";
984 Call.dump(llvm::dbgs());
985 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
987 Result = F.
add(Result, I);
991 if (!Result.isEmpty())
992 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
993 const NoteTag *InjectionTag = taintOriginTrackerTag(
994 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
995 Call.getCalleeStackFrame(0));
996 C.addTransition(State, InjectionTag);
1000 return !
C.getAnalysisManager()
1001 .getAnalyzerOptions()
1002 .ShouldAssumeControlledEnvironment;
1005bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1008 std::optional<SVal> TaintedSVal =
1009 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1016 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1019 report->markInteresting(TaintedSym);
1022 C.emitReport(std::move(report));
1040 const Decl *CallDecl = Call.getDecl();
1047 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
1050 ArgNum = Format->getFormatIdx() - 1;
1051 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1058bool GenericTaintChecker::checkUncontrolledFormatString(
1061 ArgIdxTy ArgNum = 0;
1067 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1068 MsgUncontrolledFormatString,
C);
1071void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &Call,
1073 if (
Call.getNumArgs() < 1)
1078 if (!
ID->getName().equals(
"socket"))
1082 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1084 bool SafeProtocol = DomName.equals(
"AF_SYSTEM") ||
1085 DomName.equals(
"AF_LOCAL") || DomName.equals(
"AF_UNIX") ||
1086 DomName.equals(
"AF_RESERVED_36");
1091 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1092 ImmutableSet<ArgIdxTy> Result = F.
add(F.getEmptySet(), ReturnValueIndex);
1093 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1094 C.addTransition(State);
1102bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
Defines enum values for all the target-independent builtin functions.
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
One of these records is kept for each identifier that is lexed.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
This class represents a description of a function call using the number of arguments and the name of ...
Represents an abstract call to a function or method along a particular path.
virtual void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const
See CheckerManager::runCheckersForPrintState.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
@ CDF_MaybeBuiltin
Describes a C standard function that is sometimes implemented as a macro that expands to a compiler b...
bool Call(InterpState &S, CodePtr OpPC, const Function *Func)
for(unsigned I=0, E=TL.getNumArgs();I !=E;++I)
@ C
Languages that the frontend can parse and compile.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
@ None
The alignment was not explicit in code.
YAML serialization mapping.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)