29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Support/YAMLTraits.h"
39#define DEBUG_TYPE "taint-checker"
45using llvm::ImmutableSet;
49class GenericTaintChecker;
52constexpr llvm::StringLiteral MsgUncontrolledFormatString =
53 "Untrusted data is used as a format string "
54 "(CWE-134: Uncontrolled Format String)";
59constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
60 "Untrusted data is passed to a system call "
61 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
64constexpr llvm::StringLiteral MsgCustomSink =
65 "Untrusted data is passed to a user-defined sink";
71constexpr ArgIdxTy ReturnValueIndex{-1};
73static ArgIdxTy fromArgumentCount(
unsigned Count) {
75 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
76 "ArgIdxTy is not large enough to represent the number of arguments.");
87 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
93 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
99 if (
const auto *
D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
101 if (
D->getName() ==
"stdin" &&
D->hasExternalStorage() &&
D->isExternC()) {
103 const QualType Ty =
D->getType().getCanonicalType();
113 const QualType ArgTy = LValue.
getType(State->getStateManager().getContext());
115 return State->getSVal(LValue);
119 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
125 return getPointeeOf(State, *LValue);
134 if (
auto Pointee = getPointeeOf(State, Arg))
144 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
151 std::vector<SymbolRef> TaintedSymbols,
152 std::vector<ArgIdxTy> TaintedArgs,
154 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
155 TaintedArgs = std::move(TaintedArgs), CallLocation](
163 if (TaintedSymbols.empty())
164 return "Taint originated here";
166 for (
auto Sym : TaintedSymbols) {
169 LLVM_DEBUG(
for (
auto Arg
171 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
180const NoteTag *taintPropagationExplainerTag(
182 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
183 assert(TaintedSymbols.size() == TaintedArgs.size());
184 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
185 TaintedArgs = std::move(TaintedArgs), CallLocation](
188 llvm::raw_svector_ostream Out(Msg);
190 if (TaintedSymbols.empty() ||
194 int nofTaintedArgs = 0;
195 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
198 if (TaintedArgs[Idx] != ReturnValueIndex) {
199 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
200 << TaintedArgs[Idx] + 1 <<
"\n");
201 if (nofTaintedArgs == 0)
202 Out <<
"Taint propagated to the ";
205 Out << TaintedArgs[Idx] + 1
206 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
209 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
210 Out <<
"Taint propagated to the return value";
214 return std::string(Out.str());
224 ArgSet(ArgVecTy &&DiscreteArgs,
225 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
226 : DiscreteArgs(std::move(DiscreteArgs)),
227 VariadicIndex(std::move(VariadicIndex)) {}
229 bool contains(ArgIdxTy ArgIdx)
const {
230 if (llvm::is_contained(DiscreteArgs, ArgIdx))
233 return VariadicIndex && ArgIdx >= *VariadicIndex;
236 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
239 ArgVecTy DiscreteArgs;
240 std::optional<ArgIdxTy> VariadicIndex;
248class GenericTaintRule {
261 std::optional<StringRef> SinkMsg;
263 GenericTaintRule() =
default;
265 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
266 std::optional<StringRef> SinkMsg = std::nullopt)
267 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
268 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
274 static GenericTaintRule Sink(ArgSet &&SinkArgs,
275 std::optional<StringRef> Msg = std::nullopt) {
276 return {std::move(SinkArgs), {}, {}, {}, Msg};
280 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
281 return {{}, std::move(FilterArgs), {}, {}};
286 static GenericTaintRule Source(ArgSet &&SourceArgs) {
287 return {{}, {}, {}, std::move(SourceArgs)};
291 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
292 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
302 return ArgIdx == ReturnValueIndex ?
Call.getOriginExpr()
303 :
Call.getArgExpr(ArgIdx);
313struct TaintConfiguration {
314 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
315 enum class VariadicType {
None, Src, Dst };
322 struct Sink : Common {
326 struct Filter : Common {
330 struct Propagation : Common {
333 VariadicType VarType;
337 std::vector<Propagation> Propagations;
338 std::vector<Filter> Filters;
339 std::vector<Sink> Sinks;
341 TaintConfiguration() =
default;
342 TaintConfiguration(
const TaintConfiguration &) =
default;
343 TaintConfiguration(TaintConfiguration &&) =
default;
344 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
345 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
348struct GenericTaintRuleParser {
353 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
354 RulesContTy parseConfiguration(
const std::string &Option,
355 TaintConfiguration &&Config)
const;
362 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
364 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
368 template <
typename Config>
369 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
372 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&
P,
373 RulesContTy &Rules)
const;
374 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&
P,
375 RulesContTy &Rules)
const;
376 void parseConfig(
const std::string &Option,
377 TaintConfiguration::Propagation &&
P,
378 RulesContTy &Rules)
const;
383class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
389 const char *Sep)
const override;
392 bool generateReportIfTainted(
const Expr *
E, StringRef Msg,
395 bool isTaintReporterCheckerEnabled =
false;
396 std::optional<BugType> BT;
413 mutable std::optional<RuleLookupTy> StaticTaintRules;
414 mutable std::optional<RuleLookupTy> DynamicTaintRules;
419LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
420LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
421LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
425template <>
struct MappingTraits<TaintConfiguration> {
426 static void mapping(IO &IO, TaintConfiguration &Config) {
427 IO.mapOptional(
"Propagations", Config.Propagations);
428 IO.mapOptional(
"Filters", Config.Filters);
429 IO.mapOptional(
"Sinks", Config.Sinks);
433template <>
struct MappingTraits<TaintConfiguration::Sink> {
434 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
435 IO.mapRequired(
"Name", Sink.Name);
436 IO.mapOptional(
"Scope", Sink.Scope);
437 IO.mapRequired(
"Args", Sink.SinkArgs);
441template <>
struct MappingTraits<TaintConfiguration::Filter> {
442 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
443 IO.mapRequired(
"Name", Filter.Name);
444 IO.mapOptional(
"Scope", Filter.Scope);
445 IO.mapRequired(
"Args", Filter.FilterArgs);
449template <>
struct MappingTraits<TaintConfiguration::Propagation> {
450 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
451 IO.mapRequired(
"Name", Propagation.Name);
452 IO.mapOptional(
"Scope", Propagation.Scope);
453 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
454 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
455 IO.mapOptional(
"VariadicType", Propagation.VarType);
456 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
460template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
462 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
463 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
464 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
475 ImmutableSet<ArgIdxTy>)
478void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
479 const ArgVecTy &Args)
const {
480 for (ArgIdxTy Arg : Args) {
481 if (Arg < ReturnValueIndex) {
482 Mgr.reportInvalidCheckerOptionValue(
483 Mgr.getChecker<GenericTaintChecker>(), Option,
484 "an argument number for propagation rules greater or equal to -1");
489template <
typename Config>
491GenericTaintRuleParser::parseNameParts(
const Config &
C) {
492 NamePartsTy NameParts;
493 if (!
C.Scope.empty()) {
496 StringRef{
C.Scope}.split(NameParts,
"::", -1,
499 NameParts.emplace_back(
C.Name);
503template <
typename Config>
504void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
505 GenericTaintRule &&Rule,
506 RulesContTy &Rules) {
507 NamePartsTy NameParts = parseNameParts(
C);
512void GenericTaintRuleParser::parseConfig(
const std::string &Option,
513 TaintConfiguration::Sink &&S,
514 RulesContTy &Rules)
const {
515 validateArgVector(Option, S.SinkArgs);
516 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
520void GenericTaintRuleParser::parseConfig(
const std::string &Option,
521 TaintConfiguration::Filter &&S,
522 RulesContTy &Rules)
const {
523 validateArgVector(Option, S.FilterArgs);
524 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
528void GenericTaintRuleParser::parseConfig(
const std::string &Option,
529 TaintConfiguration::Propagation &&
P,
530 RulesContTy &Rules)
const {
531 validateArgVector(Option,
P.SrcArgs);
532 validateArgVector(Option,
P.DstArgs);
533 bool IsSrcVariadic =
P.VarType == TaintConfiguration::VariadicType::Src;
534 bool IsDstVariadic =
P.VarType == TaintConfiguration::VariadicType::Dst;
535 std::optional<ArgIdxTy> JustVarIndex =
P.VarIndex;
537 ArgSet SrcDesc(std::move(
P.SrcArgs),
538 IsSrcVariadic ? JustVarIndex : std::nullopt);
539 ArgSet DstDesc(std::move(
P.DstArgs),
540 IsDstVariadic ? JustVarIndex : std::nullopt);
542 consumeRulesFromConfig(
543 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
546GenericTaintRuleParser::RulesContTy
547GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
548 TaintConfiguration &&Config)
const {
552 for (
auto &F : Config.Filters)
553 parseConfig(Option, std::move(F), Rules);
555 for (
auto &S : Config.Sinks)
556 parseConfig(Option, std::move(S), Rules);
558 for (
auto &
P : Config.Propagations)
559 parseConfig(Option, std::move(
P), Rules);
568 if (StaticTaintRules || DynamicTaintRules)
571 using RulesConstructionTy =
572 std::vector<std::pair<CallDescription, GenericTaintRule>>;
573 using TR = GenericTaintRule;
575 RulesConstructionTy GlobalCRules{
577 {{CDM::CLibrary, {
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
578 {{CDM::CLibrary, {
"fopen"}}, TR::Source({{ReturnValueIndex}})},
579 {{CDM::CLibrary, {
"freopen"}}, TR::Source({{ReturnValueIndex}})},
580 {{CDM::CLibrary, {
"getch"}}, TR::Source({{ReturnValueIndex}})},
581 {{CDM::CLibrary, {
"getchar"}}, TR::Source({{ReturnValueIndex}})},
582 {{CDM::CLibrary, {
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
583 {{CDM::CLibrary, {
"gets"}}, TR::Source({{0, ReturnValueIndex}})},
584 {{CDM::CLibrary, {
"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},
585 {{CDM::CLibrary, {
"scanf"}}, TR::Source({{}, 1})},
586 {{CDM::CLibrary, {
"scanf_s"}}, TR::Source({{}, 1})},
587 {{CDM::CLibrary, {
"wgetch"}}, TR::Source({{ReturnValueIndex}})},
593 {{CDM::CLibrary, {
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
594 {{CDM::CLibrary, {
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
595 {{CDM::CLibrary, {
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
596 {{CDM::CLibrary, {
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
597 {{CDM::CLibrary, {
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
598 {{CDM::CLibrary, {
"get_current_dir_name"}},
599 TR::Source({{ReturnValueIndex}})},
600 {{CDM::CLibrary, {
"gethostname"}}, TR::Source({{0}})},
601 {{CDM::CLibrary, {
"getnameinfo"}}, TR::Source({{2, 4}})},
602 {{CDM::CLibrary, {
"getseuserbyname"}}, TR::Source({{1, 2}})},
603 {{CDM::CLibrary, {
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
604 {{CDM::CLibrary, {
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
605 {{CDM::CLibrary, {
"getlogin_r"}}, TR::Source({{0}})},
608 {{CDM::CLibrary, {
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
609 {{CDM::CLibrary, {
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
610 {{CDM::CLibrary, {
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
611 {{CDM::CLibrary, {
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
612 {{CDM::CLibrary, {
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
613 {{CDM::CLibrary, {
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
614 {{CDM::CLibraryMaybeHardened, {
"fgets"}},
615 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
616 {{CDM::CLibraryMaybeHardened, {
"fgetws"}},
617 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
618 {{CDM::CLibrary, {
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
619 {{CDM::CLibrary, {
"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
620 {{CDM::CLibrary, {
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
621 {{CDM::CLibrary, {
"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
623 {{CDM::CLibrary, {
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
624 {{CDM::CLibrary, {
"getc_unlocked"}},
625 TR::Prop({{0}}, {{ReturnValueIndex}})},
626 {{CDM::CLibrary, {
"getdelim"}}, TR::Prop({{3}}, {{0}})},
630 {{CDM::CLibrary, {
"getline"}}, TR::Prop({{2}}, {{0}})},
631 {{CDM::CLibrary, {
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
632 {{CDM::CLibraryMaybeHardened, {
"pread"}},
633 TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
634 {{CDM::CLibraryMaybeHardened, {
"read"}},
635 TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
636 {{CDM::CLibraryMaybeHardened, {
"fread"}},
637 TR::Prop({{3}}, {{0, ReturnValueIndex}})},
638 {{CDM::CLibraryMaybeHardened, {
"recv"}},
639 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
640 {{CDM::CLibraryMaybeHardened, {
"recvfrom"}},
641 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
643 {{CDM::CLibrary, {
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644 {{CDM::CLibrary, {
"ttyname_r"}},
645 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
647 {{CDM::CLibrary, {
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
648 {{CDM::CLibrary, {
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
649 {{CDM::CLibrary, {
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
651 {{CDM::CLibrary, {
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
652 {{CDM::CLibrary, {
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
653 {{CDM::CLibrary, {
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655 {{CDM::CLibrary, {
"memcmp"}},
656 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
657 {{CDM::CLibraryMaybeHardened, {
"memcpy"}},
658 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
659 {{CDM::CLibraryMaybeHardened, {
"memmove"}},
660 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
661 {{CDM::CLibraryMaybeHardened, {
"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},
667 {{CDM::CLibrary, {
"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
668 {{CDM::CLibrary, {
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
669 {{CDM::CLibrary, {
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
673 {{CDM::CLibraryMaybeHardened, {
"memchr"}},
674 TR::Prop({{0}}, {{ReturnValueIndex}})},
675 {{CDM::CLibraryMaybeHardened, {
"memrchr"}},
676 TR::Prop({{0}}, {{ReturnValueIndex}})},
677 {{CDM::CLibrary, {
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
678 {{CDM::CLibraryMaybeHardened, {
"strchr"}},
679 TR::Prop({{0}}, {{ReturnValueIndex}})},
680 {{CDM::CLibraryMaybeHardened, {
"strrchr"}},
681 TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{CDM::CLibraryMaybeHardened, {
"strchrnul"}},
683 TR::Prop({{0}}, {{ReturnValueIndex}})},
684 {{CDM::CLibrary, {
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
685 {{CDM::CLibrary, {
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
689 {{CDM::CLibrary, {
"qsort"}}, TR::Prop({{0}}, {{0}})},
690 {{CDM::CLibrary, {
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
692 {{CDM::CLibrary, {
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
693 {{CDM::CLibrary, {
"strcasecmp"}},
694 TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
695 {{CDM::CLibrary, {
"strncmp"}},
696 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
697 {{CDM::CLibrary, {
"strncasecmp"}},
698 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
699 {{CDM::CLibrary, {
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
700 {{CDM::CLibrary, {
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
701 {{CDM::CLibrary, {
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
703 {{CDM::CLibrary, {
"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
704 {{CDM::CLibrary, {
"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
705 {{CDM::CLibrary, {
"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
706 {{CDM::CLibrary, {
"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
707 {{CDM::CLibrary, {
"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712 {{CDM::CLibrary, {
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
713 {{CDM::CLibrary, {
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
714 {{CDM::CLibrary, {
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
715 {{CDM::CLibrary, {
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
717 {{CDM::CLibrary, {
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{CDM::CLibrary, {
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
720 {{CDM::CLibrary, {
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
721 {{CDM::CLibrary, {
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
722 {{CDM::CLibrary, {
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
723 {{CDM::CLibrary, {
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
724 {{CDM::CLibrary, {
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
725 {{CDM::CLibrary, {
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
726 {{CDM::CLibrary, {
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
727 {{CDM::CLibrary, {
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
728 {{CDM::CLibrary, {
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
729 {{CDM::CLibrary, {
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
730 {{CDM::CLibrary, {
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
731 {{CDM::CLibrary, {
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
732 {{CDM::CLibrary, {
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
734 {{CDM::CLibraryMaybeHardened, {
"strcpy"}},
735 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
736 {{CDM::CLibraryMaybeHardened, {
"stpcpy"}},
737 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
738 {{CDM::CLibraryMaybeHardened, {
"strcat"}},
739 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
740 {{CDM::CLibraryMaybeHardened, {
"wcsncat"}},
741 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
742 {{CDM::CLibraryMaybeHardened, {
"strncpy"}},
743 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
744 {{CDM::CLibraryMaybeHardened, {
"strncat"}},
745 TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})},
746 {{CDM::CLibraryMaybeHardened, {
"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})},
747 {{CDM::CLibraryMaybeHardened, {
"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})},
755 {{CDM::CLibrary, {
"snprintf"}},
756 TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})},
758 {{CDM::CLibrary, {
"sprintf"}},
759 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
762 {{CDM::CLibrary, {
"__snprintf_chk"}},
763 TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})},
766 {{CDM::CLibrary, {
"__sprintf_chk"}},
767 TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})},
770 {{CDM::CLibrary, {
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
771 {{CDM::CLibrary, {
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
772 {{CDM::CLibrary, {
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
773 {{CDM::CLibrary, {
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
774 {{CDM::CLibrary, {
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
775 {{CDM::CLibrary, {
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
776 {{CDM::CLibrary, {
"execve"}},
777 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
778 {{CDM::CLibrary, {
"fexecve"}},
779 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
780 {{CDM::CLibrary, {
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
781 {{CDM::CLibrary, {
"execvpe"}},
782 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
783 {{CDM::CLibrary, {
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
791 {{CDM::CLibrary, {
"setproctitle"}},
792 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
793 {{CDM::CLibrary, {
"setproctitle_fast"}},
794 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};
796 if (TR::UntrustedEnv(
C)) {
799 GlobalCRules.push_back({{CDM::CLibrary, {
"setproctitle_init"}},
800 TR::Sink({{1, 2}}, MsgCustomSink)});
803 GlobalCRules.push_back(
804 {{CDM::CLibrary, {
"getenv"}}, TR::Source({{ReturnValueIndex}})});
807 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
808 std::make_move_iterator(GlobalCRules.end()));
813 GenericTaintRuleParser ConfigParser{*Mgr};
814 std::string Option{
"Config"};
815 StringRef ConfigFile =
817 std::optional<TaintConfiguration> Config =
818 getConfiguration<TaintConfiguration>(*Mgr,
this, Option, ConfigFile);
821 DynamicTaintRules = RuleLookupTy{};
825 GenericTaintRuleParser::RulesContTy Rules{
826 ConfigParser.parseConfiguration(Option, std::move(*Config))};
828 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
829 std::make_move_iterator(Rules.end()));
837 if (
const auto *Rule =
838 Call.isGlobalCFunction() ? StaticTaintRules->lookup(
Call) :
nullptr)
840 else if (
const auto *Rule = DynamicTaintRules->lookup(
Call))
848 checkUncontrolledFormatString(
Call,
C);
852 taintUnsafeSocketProtocol(
Call,
C);
855void GenericTaintChecker::checkPostCall(
const CallEvent &
Call,
865 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
867 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
870 assert(!TaintArgs->isEmpty());
872 LLVM_DEBUG(
for (ArgIdxTy I
874 llvm::dbgs() <<
"PostCall<";
875 Call.dump(llvm::dbgs());
876 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
879 const NoteTag *InjectionTag =
nullptr;
880 std::vector<SymbolRef> TaintedSymbols;
881 std::vector<ArgIdxTy> TaintedIndexes;
882 for (ArgIdxTy ArgNum : *TaintArgs) {
884 if (ArgNum == ReturnValueIndex) {
886 std::vector<SymbolRef> TaintedSyms =
888 if (!TaintedSyms.empty()) {
889 TaintedSymbols.push_back(TaintedSyms[0]);
890 TaintedIndexes.push_back(ArgNum);
896 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
899 if (!TaintedSyms.empty()) {
900 TaintedSymbols.push_back(TaintedSyms[0]);
901 TaintedIndexes.push_back(ArgNum);
907 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
908 Call.getCalleeStackFrame(0));
910 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
911 C.addTransition(State, InjectionTag);
914void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
915 const char *NL,
const char *Sep)
const {
919void GenericTaintRule::process(
const GenericTaintChecker &
Checker,
922 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
925 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
926 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
928 Fun(I,
E,
C.getSVal(
E));
935 if (isStdin(
C.getSVal(
E),
C.getASTContext())) {
936 State = addTaint(State, C.getSVal(E));
938 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(
E)))
939 Checker.generateReportIfTainted(
E, SinkMsg.value_or(MsgCustomSink),
C);
943 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *
E,
SVal S) {
944 if (FilterArgs.contains(I)) {
945 State = removeTaint(State, S);
946 if (auto P = getPointeeOf(State, S))
947 State = removeTaint(State, *P);
956 bool IsMatching = PropSrcArgs.isEmpty();
957 std::vector<SymbolRef> TaintedSymbols;
958 std::vector<ArgIdxTy> TaintedIndexes;
959 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
960 &TaintedIndexes](ArgIdxTy I,
const Expr *
E,
SVal) {
961 std::optional<SVal> TaintedSVal =
962 getTaintedPointeeOrPointer(State,
C.getSVal(
E));
964 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
967 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
968 std::vector<SymbolRef> TaintedArgSyms =
970 if (!TaintedArgSyms.empty()) {
971 llvm::append_range(TaintedSymbols, TaintedArgSyms);
972 TaintedIndexes.push_back(I);
986 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
987 const bool IsNonConstPtr =
988 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
990 return IsNonConstRef || IsNonConstPtr;
994 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
995 ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
998 if (PropDstArgs.contains(I)) {
999 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<"; Call.dump(llvm::dbgs());
1001 <<
"> prepares tainting arg index: " << I <<
'\n';);
1002 Result = F.add(Result, I);
1011 if (WouldEscape(
V,
E->
getType()) && getTaintedPointeeOrPointer(State,
V)) {
1012 LLVM_DEBUG(
if (!Result.contains(I)) {
1013 llvm::dbgs() <<
"PreCall<";
1014 Call.dump(llvm::dbgs());
1015 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
1017 Result = F.
add(Result, I);
1021 if (!Result.isEmpty())
1022 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1023 const NoteTag *InjectionTag = taintOriginTrackerTag(
1024 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1025 Call.getCalleeStackFrame(0));
1026 C.addTransition(State, InjectionTag);
1030 return !
C.getAnalysisManager()
1031 .getAnalyzerOptions()
1032 .ShouldAssumeControlledEnvironment;
1035bool GenericTaintChecker::generateReportIfTainted(
const Expr *
E, StringRef Msg,
1038 if (!isTaintReporterCheckerEnabled)
1040 std::optional<SVal> TaintedSVal =
1041 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(
E));
1049 if (
ExplodedNode *N =
C.generateNonFatalErrorNode(
C.getState(), &Tag)) {
1050 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
1053 report->markInteresting(TaintedSym);
1055 C.emitReport(std::move(report));
1073 const Decl *CallDecl =
Call.getDecl();
1080 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1083 ArgNum = Format->getFormatIdx() - 1;
1084 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1091bool GenericTaintChecker::checkUncontrolledFormatString(
1094 ArgIdxTy ArgNum = 0;
1100 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1101 MsgUncontrolledFormatString,
C);
1104void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &
Call,
1106 if (
Call.getNumArgs() < 1)
1111 if (
ID->getName() !=
"socket")
1115 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1117 bool SafeProtocol = DomName ==
"AF_SYSTEM" || DomName ==
"AF_LOCAL" ||
1118 DomName ==
"AF_UNIX" || DomName ==
"AF_RESERVED_36";
1123 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1124 ImmutableSet<ArgIdxTy> Result = F.
add(F.getEmptySet(), ReturnValueIndex);
1125 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1126 C.addTransition(State);
1134bool ento::shouldRegisterTaintPropagationChecker(
const CheckerManager &mgr) {
1139 GenericTaintChecker *checker = Mgr.
getChecker<GenericTaintChecker>();
1140 checker->isTaintReporterCheckerEnabled =
true;
1145bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
Defines enum values for all the target-independent builtin functions.
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
This represents one expression.
Represents a function declaration or definition.
One of these records is kept for each identifier that is lexed.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
Represents an abstract call to a function or method along a particular path.
virtual void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const
See CheckerManager::runCheckersForPrintState.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
CheckerNameRef getCurrentCheckerName() const
Tag that can use a checker name as a message provider (see SimpleProgramPointTag).
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
const Regex Rule("(.+)/(.+)\\.framework/")
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
for(const auto &A :T->param_types())
@ None
The alignment was not explicit in code.
Diagnostic wrappers for TextAPI types for error reporting.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)