29#include "llvm/ADT/StringExtras.h"
30#include "llvm/Support/YAMLTraits.h"
38#define DEBUG_TYPE "taint-checker"
44using llvm::ImmutableSet;
48class GenericTaintChecker;
51constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
64constexpr llvm::StringLiteral MsgTaintedBufferSize =
65 "Untrusted data is used to specify the buffer size "
66 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67 "for character data and the null terminator)";
70constexpr llvm::StringLiteral MsgCustomSink =
71 "Untrusted data is passed to a user-defined sink";
77constexpr ArgIdxTy ReturnValueIndex{-1};
79static ArgIdxTy fromArgumentCount(
unsigned Count) {
81 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
82 "ArgIdxTy is not large enough to represent the number of arguments.");
93 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
99 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
105 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
106 D = D->getCanonicalDecl();
107 if (D->getName() ==
"stdin" && D->hasExternalStorage() && D->isExternC()) {
109 const QualType Ty = D->getType().getCanonicalType();
119 const QualType ArgTy = LValue.
getType(State->getStateManager().getContext());
121 return State->getSVal(LValue);
125 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
131 return getPointeeOf(State, *LValue);
140 if (
auto Pointee = getPointeeOf(State, Arg))
150 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
157 std::vector<SymbolRef> TaintedSymbols,
158 std::vector<ArgIdxTy> TaintedArgs,
160 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
161 TaintedArgs = std::move(TaintedArgs), CallLocation](
169 if (TaintedSymbols.empty())
170 return "Taint originated here";
172 for (
auto Sym : TaintedSymbols) {
175 LLVM_DEBUG(
for (
auto Arg
177 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
186const NoteTag *taintPropagationExplainerTag(
188 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
189 assert(TaintedSymbols.size() == TaintedArgs.size());
190 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
191 TaintedArgs = std::move(TaintedArgs), CallLocation](
194 llvm::raw_svector_ostream Out(Msg);
196 if (TaintedSymbols.empty() ||
200 int nofTaintedArgs = 0;
201 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
204 if (TaintedArgs[Idx] != ReturnValueIndex) {
205 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
206 << TaintedArgs[Idx] + 1 <<
"\n");
207 if (nofTaintedArgs == 0)
208 Out <<
"Taint propagated to the ";
211 Out << TaintedArgs[Idx] + 1
212 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
215 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
216 Out <<
"Taint propagated to the return value";
220 return std::string(Out.str());
230 ArgSet(ArgVecTy &&DiscreteArgs,
231 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
232 : DiscreteArgs(std::move(DiscreteArgs)),
233 VariadicIndex(std::move(VariadicIndex)) {}
235 bool contains(ArgIdxTy ArgIdx)
const {
236 if (llvm::is_contained(DiscreteArgs, ArgIdx))
239 return VariadicIndex && ArgIdx >= *VariadicIndex;
242 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
245 ArgVecTy DiscreteArgs;
246 std::optional<ArgIdxTy> VariadicIndex;
254class GenericTaintRule {
267 std::optional<StringRef> SinkMsg;
269 GenericTaintRule() =
default;
271 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
272 std::optional<StringRef> SinkMsg = std::nullopt)
273 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
274 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
280 static GenericTaintRule Sink(ArgSet &&SinkArgs,
281 std::optional<StringRef> Msg = std::nullopt) {
282 return {std::move(SinkArgs), {}, {}, {}, Msg};
286 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
287 return {{}, std::move(FilterArgs), {}, {}};
292 static GenericTaintRule Source(ArgSet &&SourceArgs) {
293 return {{}, {}, {}, std::move(SourceArgs)};
297 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
298 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
302 static GenericTaintRule
303 SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
304 std::optional<StringRef> Msg = std::nullopt) {
306 std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
316 return ArgIdx == ReturnValueIndex ?
Call.getOriginExpr()
317 :
Call.getArgExpr(ArgIdx);
327struct TaintConfiguration {
328 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
329 enum class VariadicType {
None, Src, Dst };
336 struct Sink : Common {
340 struct Filter : Common {
344 struct Propagation : Common {
347 VariadicType VarType;
351 std::vector<Propagation> Propagations;
352 std::vector<Filter> Filters;
353 std::vector<Sink> Sinks;
355 TaintConfiguration() =
default;
356 TaintConfiguration(
const TaintConfiguration &) =
default;
357 TaintConfiguration(TaintConfiguration &&) =
default;
358 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
359 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
362struct GenericTaintRuleParser {
367 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
368 RulesContTy parseConfiguration(
const std::string &Option,
369 TaintConfiguration &&Config)
const;
376 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
378 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
382 template <
typename Config>
383 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
386 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&
P,
387 RulesContTy &Rules)
const;
388 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&
P,
389 RulesContTy &Rules)
const;
390 void parseConfig(
const std::string &Option,
391 TaintConfiguration::Propagation &&
P,
392 RulesContTy &Rules)
const;
397class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
403 const char *Sep)
const override;
406 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
429 mutable std::optional<RuleLookupTy> StaticTaintRules;
430 mutable std::optional<RuleLookupTy> DynamicTaintRules;
435LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
436LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
437LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
441template <>
struct MappingTraits<TaintConfiguration> {
442 static void mapping(IO &IO, TaintConfiguration &Config) {
443 IO.mapOptional(
"Propagations", Config.Propagations);
444 IO.mapOptional(
"Filters", Config.Filters);
445 IO.mapOptional(
"Sinks", Config.Sinks);
449template <>
struct MappingTraits<TaintConfiguration::Sink> {
450 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
451 IO.mapRequired(
"Name", Sink.Name);
452 IO.mapOptional(
"Scope", Sink.Scope);
453 IO.mapRequired(
"Args", Sink.SinkArgs);
457template <>
struct MappingTraits<TaintConfiguration::Filter> {
458 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
459 IO.mapRequired(
"Name", Filter.Name);
460 IO.mapOptional(
"Scope", Filter.Scope);
461 IO.mapRequired(
"Args", Filter.FilterArgs);
465template <>
struct MappingTraits<TaintConfiguration::Propagation> {
466 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
467 IO.mapRequired(
"Name", Propagation.Name);
468 IO.mapOptional(
"Scope", Propagation.Scope);
469 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
470 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
471 IO.mapOptional(
"VariadicType", Propagation.VarType);
472 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
476template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
478 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
479 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
480 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
491 ImmutableSet<ArgIdxTy>)
494void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
495 const ArgVecTy &Args)
const {
496 for (ArgIdxTy Arg : Args) {
497 if (Arg < ReturnValueIndex) {
498 Mgr.reportInvalidCheckerOptionValue(
499 Mgr.getChecker<GenericTaintChecker>(), Option,
500 "an argument number for propagation rules greater or equal to -1");
505template <
typename Config>
507GenericTaintRuleParser::parseNameParts(
const Config &
C) {
508 NamePartsTy NameParts;
509 if (!
C.Scope.empty()) {
512 StringRef{
C.Scope}.split(NameParts,
"::", -1,
515 NameParts.emplace_back(
C.Name);
519template <
typename Config>
520void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
521 GenericTaintRule &&Rule,
522 RulesContTy &Rules) {
523 NamePartsTy NameParts = parseNameParts(
C);
527void GenericTaintRuleParser::parseConfig(
const std::string &Option,
528 TaintConfiguration::Sink &&S,
529 RulesContTy &Rules)
const {
530 validateArgVector(Option, S.SinkArgs);
531 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
535void GenericTaintRuleParser::parseConfig(
const std::string &Option,
536 TaintConfiguration::Filter &&S,
537 RulesContTy &Rules)
const {
538 validateArgVector(Option, S.FilterArgs);
539 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
543void GenericTaintRuleParser::parseConfig(
const std::string &Option,
544 TaintConfiguration::Propagation &&
P,
545 RulesContTy &Rules)
const {
546 validateArgVector(Option,
P.SrcArgs);
547 validateArgVector(Option,
P.DstArgs);
548 bool IsSrcVariadic =
P.VarType == TaintConfiguration::VariadicType::Src;
549 bool IsDstVariadic =
P.VarType == TaintConfiguration::VariadicType::Dst;
550 std::optional<ArgIdxTy> JustVarIndex =
P.VarIndex;
552 ArgSet SrcDesc(std::move(
P.SrcArgs),
553 IsSrcVariadic ? JustVarIndex : std::nullopt);
554 ArgSet DstDesc(std::move(
P.DstArgs),
555 IsDstVariadic ? JustVarIndex : std::nullopt);
557 consumeRulesFromConfig(
558 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
561GenericTaintRuleParser::RulesContTy
562GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
563 TaintConfiguration &&Config)
const {
567 for (
auto &F : Config.Filters)
568 parseConfig(Option, std::move(F), Rules);
570 for (
auto &S : Config.Sinks)
571 parseConfig(Option, std::move(S), Rules);
573 for (
auto &
P : Config.Propagations)
574 parseConfig(Option, std::move(
P), Rules);
583 if (StaticTaintRules || DynamicTaintRules)
586 using RulesConstructionTy =
587 std::vector<std::pair<CallDescription, GenericTaintRule>>;
588 using TR = GenericTaintRule;
592 RulesConstructionTy GlobalCRules{
594 {{{
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
595 {{{
"fopen"}}, TR::Source({{ReturnValueIndex}})},
596 {{{
"freopen"}}, TR::Source({{ReturnValueIndex}})},
597 {{{
"getch"}}, TR::Source({{ReturnValueIndex}})},
598 {{{
"getchar"}}, TR::Source({{ReturnValueIndex}})},
599 {{{
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
600 {{{
"gets"}}, TR::Source({{0}, ReturnValueIndex})},
601 {{{
"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
602 {{{
"scanf"}}, TR::Source({{}, 1})},
603 {{{
"scanf_s"}}, TR::Source({{}, {1}})},
604 {{{
"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
610 {{{
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
611 {{{
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
612 {{{
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
613 {{{
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
614 {{{
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
615 {{{
"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
616 {{{
"gethostname"}}, TR::Source({{0}})},
617 {{{
"getnameinfo"}}, TR::Source({{2, 4}})},
618 {{{
"getseuserbyname"}}, TR::Source({{1, 2}})},
619 {{{
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
620 {{{
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
621 {{{
"getlogin_r"}}, TR::Source({{0}})},
624 {{{
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
625 {{{
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
626 {{{
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
627 {{{
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
628 {{{
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
629 {{{
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630 {{{
"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
631 {{{
"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
632 {{{
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
633 {{{
"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
634 {{{
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
636 {{{
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
637 {{{
"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
638 {{{
"getdelim"}}, TR::Prop({{3}}, {{0}})},
639 {{{
"getline"}}, TR::Prop({{2}}, {{0}})},
640 {{{
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
641 {{{
"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
642 {{{
"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
643 {{{
"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644 {{{
"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
645 {{{
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646 {{{
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
647 {{{
"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
648 {{{
"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
649 {{{
"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
651 {{{
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
652 {{{
"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
654 {{{
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655 {{{
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
656 {{{
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
657 {{{
"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
658 {{{
"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
659 {{{
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
661 {{{
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
662 {{{
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
663 {{{
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
665 {{{
"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
666 {{{
"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
667 {{{
"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
673 {{{
"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
676 {{{
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
677 {{{
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
679 {{{
"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
681 {{{
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{{
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
686 {{{
"qsort"}}, TR::Prop({{0}}, {{0}})},
687 {{{
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
689 {{{
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
690 {{{
"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
691 {{{
"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
692 {{{
"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
693 {{{
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694 {{{
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
695 {{{
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696 {{{
"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697 {{{
"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
702 {{{
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
703 {{{
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
704 {{{
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
705 {{{
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
707 {{{
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
708 {{{
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
709 {{{
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710 {{{
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
711 {{{
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712 {{{
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
713 {{{
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
714 {{{
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
715 {{{
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716 {{{
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
717 {{{
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{{
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719 {{{
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
721 {{CDM::CLibraryMaybeHardened, {BI.
getName(Builtin::BIstrncat)}},
722 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
723 {{CDM::CLibraryMaybeHardened, {BI.
getName(Builtin::BIstrlcpy)}},
724 TR::Prop({{1, 2}}, {{0}})},
725 {{CDM::CLibraryMaybeHardened, {BI.
getName(Builtin::BIstrlcat)}},
726 TR::Prop({{1, 2}}, {{0}})},
727 {{CDM::CLibraryMaybeHardened, {{
"snprintf"}}},
728 TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
729 {{CDM::CLibraryMaybeHardened, {{
"sprintf"}}},
730 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
731 {{CDM::CLibraryMaybeHardened, {{
"strcpy"}}},
732 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
733 {{CDM::CLibraryMaybeHardened, {{
"stpcpy"}}},
734 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
735 {{CDM::CLibraryMaybeHardened, {{
"strcat"}}},
736 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
737 {{CDM::CLibraryMaybeHardened, {{
"wcsncat"}}},
738 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
739 {{CDM::CLibrary, {{
"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
740 {{CDM::CLibrary, {{
"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
741 {{CDM::CLibrary, {{
"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
744 {{{
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
745 {{{
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
746 {{{
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
747 {{{
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
748 {{{
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
749 {{{
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
750 {{{
"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
751 {{{
"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
752 {{{
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
753 {{{
"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
754 {{{
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
755 {{CDM::CLibrary, {{
"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
756 {{CDM::CLibrary, {{
"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
757 {{CDM::CLibrary, {{
"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
758 {{CDM::CLibrary, {{
"memccpy"}}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
759 {{CDM::CLibrary, {{
"realloc"}}}, TR::Sink({{1}}, MsgTaintedBufferSize)},
760 {{{{
"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
761 {{{{
"setproctitle_fast"}}},
762 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
765 {{CDM::CLibraryMaybeHardened, BI.
getName(Builtin::BImemcpy)},
766 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
767 MsgTaintedBufferSize)},
768 {{CDM::CLibraryMaybeHardened, {BI.
getName(Builtin::BImemmove)}},
769 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
770 MsgTaintedBufferSize)},
771 {{CDM::CLibraryMaybeHardened, {BI.
getName(Builtin::BIstrncpy)}},
772 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
773 MsgTaintedBufferSize)},
774 {{CDM::CLibrary, {BI.
getName(Builtin::BIstrndup)}},
775 TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
776 MsgTaintedBufferSize)},
777 {{CDM::CLibrary, {{
"bcopy"}}},
778 TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
781 if (TR::UntrustedEnv(
C)) {
783 GlobalCRules.push_back(
784 {{{
"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
785 GlobalCRules.push_back({{{
"getenv"}}, TR::Source({{ReturnValueIndex}})});
788 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
789 std::make_move_iterator(GlobalCRules.end()));
794 GenericTaintRuleParser ConfigParser{*Mgr};
795 std::string Option{
"Config"};
796 StringRef ConfigFile =
798 std::optional<TaintConfiguration> Config =
799 getConfiguration<TaintConfiguration>(*Mgr,
this, Option, ConfigFile);
802 DynamicTaintRules = RuleLookupTy{};
806 GenericTaintRuleParser::RulesContTy Rules{
807 ConfigParser.parseConfiguration(Option, std::move(*Config))};
809 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
810 std::make_move_iterator(Rules.end()));
818 if (
const auto *Rule =
819 Call.isGlobalCFunction() ? StaticTaintRules->lookup(
Call) :
nullptr)
820 Rule->process(*
this,
Call,
C);
821 else if (
const auto *Rule = DynamicTaintRules->lookup(
Call))
822 Rule->process(*
this,
Call,
C);
829 checkUncontrolledFormatString(
Call,
C);
833 taintUnsafeSocketProtocol(
Call,
C);
836void GenericTaintChecker::checkPostCall(
const CallEvent &
Call,
846 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
848 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
851 assert(!TaintArgs->isEmpty());
853 LLVM_DEBUG(
for (ArgIdxTy I
855 llvm::dbgs() <<
"PostCall<";
856 Call.dump(llvm::dbgs());
857 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
860 const NoteTag *InjectionTag =
nullptr;
861 std::vector<SymbolRef> TaintedSymbols;
862 std::vector<ArgIdxTy> TaintedIndexes;
863 for (ArgIdxTy ArgNum : *TaintArgs) {
865 if (ArgNum == ReturnValueIndex) {
867 std::vector<SymbolRef> TaintedSyms =
869 if (!TaintedSyms.empty()) {
870 TaintedSymbols.push_back(TaintedSyms[0]);
871 TaintedIndexes.push_back(ArgNum);
877 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
880 if (!TaintedSyms.empty()) {
881 TaintedSymbols.push_back(TaintedSyms[0]);
882 TaintedIndexes.push_back(ArgNum);
888 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
889 Call.getCalleeStackFrame(0));
891 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
892 C.addTransition(State, InjectionTag);
895void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
896 const char *NL,
const char *Sep)
const {
900void GenericTaintRule::process(
const GenericTaintChecker &
Checker,
903 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
906 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
907 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
908 const Expr *E = GetArgExpr(I,
Call);
909 Fun(I, E,
C.getSVal(E));
914 ForEachCallArg([
this, &
Checker, &
C, &State](ArgIdxTy I,
const Expr *E,
SVal) {
916 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
917 State = addTaint(State, C.getSVal(E));
919 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(E)))
920 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink),
C);
924 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *E,
SVal S) {
925 if (FilterArgs.contains(I)) {
926 State = removeTaint(State, S);
927 if (auto P = getPointeeOf(State, S))
928 State = removeTaint(State, *P);
937 bool IsMatching = PropSrcArgs.isEmpty();
938 std::vector<SymbolRef> TaintedSymbols;
939 std::vector<ArgIdxTy> TaintedIndexes;
940 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
941 &TaintedIndexes](ArgIdxTy I,
const Expr *E,
SVal) {
942 std::optional<SVal> TaintedSVal =
943 getTaintedPointeeOrPointer(State,
C.getSVal(E));
945 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
948 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
949 std::vector<SymbolRef> TaintedArgSyms =
951 if (!TaintedArgSyms.empty()) {
952 llvm::append_range(TaintedSymbols, TaintedArgSyms);
953 TaintedIndexes.push_back(I);
967 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
968 const bool IsNonConstPtr =
969 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
971 return IsNonConstRef || IsNonConstPtr;
975 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
976 ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
979 if (PropDstArgs.contains(I)) {
980 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<"; Call.dump(llvm::dbgs());
982 <<
"> prepares tainting arg index: " << I <<
'\n';);
983 Result = F.add(Result, I);
992 if (WouldEscape(
V, E->
getType()) && getTaintedPointeeOrPointer(State,
V)) {
993 LLVM_DEBUG(
if (!Result.contains(I)) {
994 llvm::dbgs() <<
"PreCall<";
995 Call.dump(llvm::dbgs());
996 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
998 Result = F.
add(Result, I);
1002 if (!Result.isEmpty())
1003 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1004 const NoteTag *InjectionTag = taintOriginTrackerTag(
1005 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1006 Call.getCalleeStackFrame(0));
1007 C.addTransition(State, InjectionTag);
1011 return !
C.getAnalysisManager()
1012 .getAnalyzerOptions()
1013 .ShouldAssumeControlledEnvironment;
1016bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1019 std::optional<SVal> TaintedSVal =
1020 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1027 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1030 report->markInteresting(TaintedSym);
1033 C.emitReport(std::move(report));
1051 const Decl *CallDecl =
Call.getDecl();
1058 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1061 ArgNum = Format->getFormatIdx() - 1;
1062 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1069bool GenericTaintChecker::checkUncontrolledFormatString(
1072 ArgIdxTy ArgNum = 0;
1078 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1079 MsgUncontrolledFormatString,
C);
1082void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &
Call,
1084 if (
Call.getNumArgs() < 1)
1089 if (!
ID->getName().equals(
"socket"))
1093 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1095 bool SafeProtocol = DomName.equals(
"AF_SYSTEM") ||
1096 DomName.equals(
"AF_LOCAL") || DomName.equals(
"AF_UNIX") ||
1097 DomName.equals(
"AF_RESERVED_36");
1102 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1103 ImmutableSet<ArgIdxTy> Result = F.
add(F.getEmptySet(), ReturnValueIndex);
1104 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1105 C.addTransition(State);
1113bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
Defines enum values for all the target-independent builtin functions.
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
One of these records is kept for each identifier that is lexed.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
Represents an abstract call to a function or method along a particular path.
virtual void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const
See CheckerManager::runCheckersForPrintState.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
for(const auto &A :T->param_types())
@ None
The alignment was not explicit in code.
Diagnostic wrappers for TextAPI types for error reporting.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)