28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/Support/YAMLTraits.h"
38#define DEBUG_TYPE "taint-checker"
44using llvm::ImmutableSet;
48class GenericTaintChecker;
51constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
63constexpr llvm::StringLiteral MsgCustomSink =
64 "Untrusted data is passed to a user-defined sink";
70constexpr ArgIdxTy ReturnValueIndex{-1};
72static ArgIdxTy fromArgumentCount(
unsigned Count) {
74 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
75 "ArgIdxTy is not large enough to represent the number of arguments.");
86 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
92 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
98 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
99 D = D->getCanonicalDecl();
100 if (D->getName() ==
"stdin" && D->hasExternalStorage() && D->isExternC()) {
102 const QualType Ty = D->getType().getCanonicalType();
112 const QualType ArgTy = LValue.getType(State->getStateManager().getContext());
114 return State->getSVal(LValue);
118 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
124 return getPointeeOf(State, *LValue);
133 if (
auto Pointee = getPointeeOf(State, Arg))
143 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
150 std::vector<SymbolRef> TaintedSymbols,
151 std::vector<ArgIdxTy> TaintedArgs,
153 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
154 TaintedArgs = std::move(TaintedArgs), CallLocation](
161 if (TaintedSymbols.empty())
162 return "Taint originated here";
164 for (
auto Sym : TaintedSymbols) {
167 LLVM_DEBUG(
for (
auto Arg
169 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
178const NoteTag *taintPropagationExplainerTag(
180 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
181 assert(TaintedSymbols.size() == TaintedArgs.size());
182 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
183 TaintedArgs = std::move(TaintedArgs), CallLocation](
186 llvm::raw_svector_ostream Out(Msg);
188 if (TaintedSymbols.empty() ||
192 int nofTaintedArgs = 0;
193 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
196 if (TaintedArgs[Idx] != ReturnValueIndex) {
197 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
198 << TaintedArgs[Idx] + 1 <<
"\n");
199 if (nofTaintedArgs == 0)
200 Out <<
"Taint propagated to the ";
203 Out << TaintedArgs[Idx] + 1
204 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
207 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
208 Out <<
"Taint propagated to the return value";
212 return std::string(Out.str());
222 ArgSet(ArgVecTy &&DiscreteArgs,
223 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
224 : DiscreteArgs(std::move(DiscreteArgs)),
225 VariadicIndex(std::move(VariadicIndex)) {}
227 bool contains(ArgIdxTy ArgIdx)
const {
228 if (llvm::is_contained(DiscreteArgs, ArgIdx))
231 return VariadicIndex && ArgIdx >= *VariadicIndex;
234 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
237 ArgVecTy DiscreteArgs;
238 std::optional<ArgIdxTy> VariadicIndex;
246class GenericTaintRule {
259 std::optional<StringRef> SinkMsg;
261 GenericTaintRule() =
default;
263 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
264 std::optional<StringRef> SinkMsg = std::nullopt)
265 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
266 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
272 static GenericTaintRule Sink(ArgSet &&SinkArgs,
273 std::optional<StringRef> Msg = std::nullopt) {
274 return {std::move(SinkArgs), {}, {}, {}, Msg};
278 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
279 return {{}, std::move(FilterArgs), {}, {}};
284 static GenericTaintRule Source(ArgSet &&SourceArgs) {
285 return {{}, {}, {}, std::move(SourceArgs)};
289 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
290 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
300 return ArgIdx == ReturnValueIndex ?
Call.getOriginExpr()
301 :
Call.getArgExpr(ArgIdx);
311struct TaintConfiguration {
312 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
313 enum class VariadicType {
None, Src, Dst };
320 struct Sink : Common {
324 struct Filter : Common {
328 struct Propagation : Common {
331 VariadicType VarType;
335 std::vector<Propagation> Propagations;
336 std::vector<Filter> Filters;
337 std::vector<Sink> Sinks;
339 TaintConfiguration() =
default;
340 TaintConfiguration(
const TaintConfiguration &) =
default;
341 TaintConfiguration(TaintConfiguration &&) =
default;
342 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
343 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
346struct GenericTaintRuleParser {
351 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
352 RulesContTy parseConfiguration(
const std::string &Option,
353 TaintConfiguration &&Config)
const;
360 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
362 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
366 template <
typename Config>
367 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
370 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&P,
371 RulesContTy &Rules)
const;
372 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&P,
373 RulesContTy &Rules)
const;
374 void parseConfig(
const std::string &Option,
375 TaintConfiguration::Propagation &&P,
376 RulesContTy &Rules)
const;
381class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
386 void printState(raw_ostream &Out,
ProgramStateRef State,
const char *NL,
387 const char *Sep)
const override;
390 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
393 bool isTaintReporterCheckerEnabled =
false;
394 std::optional<BugType> BT;
411 mutable std::optional<RuleLookupTy> StaticTaintRules;
412 mutable std::optional<RuleLookupTy> DynamicTaintRules;
417LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
418LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
419LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
423template <>
struct MappingTraits<TaintConfiguration> {
424 static void mapping(IO &IO, TaintConfiguration &Config) {
425 IO.mapOptional(
"Propagations", Config.Propagations);
426 IO.mapOptional(
"Filters", Config.Filters);
427 IO.mapOptional(
"Sinks", Config.Sinks);
431template <>
struct MappingTraits<TaintConfiguration::Sink> {
432 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
433 IO.mapRequired(
"Name", Sink.Name);
434 IO.mapOptional(
"Scope", Sink.Scope);
435 IO.mapRequired(
"Args", Sink.SinkArgs);
439template <>
struct MappingTraits<TaintConfiguration::Filter> {
440 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
441 IO.mapRequired(
"Name", Filter.Name);
442 IO.mapOptional(
"Scope", Filter.Scope);
443 IO.mapRequired(
"Args", Filter.FilterArgs);
447template <>
struct MappingTraits<TaintConfiguration::Propagation> {
448 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
449 IO.mapRequired(
"Name", Propagation.Name);
450 IO.mapOptional(
"Scope", Propagation.Scope);
451 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
452 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
453 IO.mapOptional(
"VariadicType", Propagation.VarType);
454 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
458template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
460 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
461 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
462 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
473 ImmutableSet<ArgIdxTy>)
476void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
477 const ArgVecTy &Args)
const {
478 for (ArgIdxTy Arg : Args) {
479 if (Arg < ReturnValueIndex) {
481 Mgr.
getChecker<GenericTaintChecker>(), Option,
482 "an argument number for propagation rules greater or equal to -1");
487template <
typename Config>
488GenericTaintRuleParser::NamePartsTy
489GenericTaintRuleParser::parseNameParts(
const Config &
C) {
490 NamePartsTy NameParts;
491 if (!
C.Scope.empty()) {
494 StringRef{
C.Scope}.split(NameParts,
"::", -1,
497 NameParts.emplace_back(
C.Name);
501template <
typename Config>
502void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
503 GenericTaintRule &&Rule,
504 RulesContTy &Rules) {
505 NamePartsTy NameParts = parseNameParts(
C);
506 Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts),
510void GenericTaintRuleParser::parseConfig(
const std::string &Option,
511 TaintConfiguration::Sink &&S,
512 RulesContTy &Rules)
const {
513 validateArgVector(Option, S.SinkArgs);
514 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
518void GenericTaintRuleParser::parseConfig(
const std::string &Option,
519 TaintConfiguration::Filter &&S,
520 RulesContTy &Rules)
const {
521 validateArgVector(Option, S.FilterArgs);
522 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
526void GenericTaintRuleParser::parseConfig(
const std::string &Option,
527 TaintConfiguration::Propagation &&P,
528 RulesContTy &Rules)
const {
529 validateArgVector(Option, P.SrcArgs);
530 validateArgVector(Option, P.DstArgs);
531 bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
532 bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
533 std::optional<ArgIdxTy> JustVarIndex = P.VarIndex;
535 ArgSet SrcDesc(std::move(P.SrcArgs),
536 IsSrcVariadic ? JustVarIndex : std::nullopt);
537 ArgSet DstDesc(std::move(P.DstArgs),
538 IsDstVariadic ? JustVarIndex : std::nullopt);
540 consumeRulesFromConfig(
541 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
544GenericTaintRuleParser::RulesContTy
545GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
546 TaintConfiguration &&Config)
const {
550 for (
auto &F : Config.Filters)
551 parseConfig(Option, std::move(F), Rules);
553 for (
auto &S : Config.Sinks)
554 parseConfig(Option, std::move(S), Rules);
556 for (
auto &P : Config.Propagations)
557 parseConfig(Option, std::move(P), Rules);
562void GenericTaintChecker::initTaintRules(CheckerContext &
C)
const {
566 if (StaticTaintRules || DynamicTaintRules)
569 using RulesConstructionTy =
570 std::vector<std::pair<CallDescription, GenericTaintRule>>;
571 using TR = GenericTaintRule;
573 RulesConstructionTy GlobalCRules{
575 {{CDM::CLibrary, {
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
576 {{CDM::CLibrary, {
"fopen"}}, TR::Source({{ReturnValueIndex}})},
577 {{CDM::CLibrary, {
"freopen"}}, TR::Source({{ReturnValueIndex}})},
578 {{CDM::CLibrary, {
"getch"}}, TR::Source({{ReturnValueIndex}})},
579 {{CDM::CLibrary, {
"getchar"}}, TR::Source({{ReturnValueIndex}})},
580 {{CDM::CLibrary, {
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
581 {{CDM::CLibrary, {
"gets"}}, TR::Source({{0, ReturnValueIndex}})},
582 {{CDM::CLibrary, {
"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},
583 {{CDM::CLibrary, {
"scanf"}}, TR::Source({{}, 1})},
584 {{CDM::CLibrary, {
"scanf_s"}}, TR::Source({{}, 1})},
585 {{CDM::CLibrary, {
"wgetch"}}, TR::Source({{ReturnValueIndex}})},
591 {{CDM::CLibrary, {
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
592 {{CDM::CLibrary, {
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
593 {{CDM::CLibrary, {
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
594 {{CDM::CLibrary, {
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
595 {{CDM::CLibrary, {
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
596 {{CDM::CLibrary, {
"get_current_dir_name"}},
597 TR::Source({{ReturnValueIndex}})},
598 {{CDM::CLibrary, {
"gethostname"}}, TR::Source({{0}})},
599 {{CDM::CLibrary, {
"getnameinfo"}}, TR::Source({{2, 4}})},
600 {{CDM::CLibrary, {
"getseuserbyname"}}, TR::Source({{1, 2}})},
601 {{CDM::CLibrary, {
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
602 {{CDM::CLibrary, {
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
603 {{CDM::CLibrary, {
"getlogin_r"}}, TR::Source({{0}})},
606 {{CDM::CLibrary, {
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
607 {{CDM::CLibrary, {
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
608 {{CDM::CLibrary, {
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
609 {{CDM::CLibrary, {
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
610 {{CDM::CLibrary, {
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
611 {{CDM::CLibrary, {
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
612 {{CDM::CLibraryMaybeHardened, {
"fgets"}},
613 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
614 {{CDM::CLibraryMaybeHardened, {
"fgetws"}},
615 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
616 {{CDM::CLibrary, {
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
617 {{CDM::CLibrary, {
"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
618 {{CDM::CLibrary, {
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
619 {{CDM::CLibrary, {
"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
621 {{CDM::CLibrary, {
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
622 {{CDM::CLibrary, {
"getc_unlocked"}},
623 TR::Prop({{0}}, {{ReturnValueIndex}})},
624 {{CDM::CLibrary, {
"getdelim"}}, TR::Prop({{3}}, {{0}})},
628 {{CDM::CLibrary, {
"getline"}}, TR::Prop({{2}}, {{0}})},
629 {{CDM::CLibrary, {
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630 {{CDM::CLibraryMaybeHardened, {
"pread"}},
631 TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
632 {{CDM::CLibraryMaybeHardened, {
"read"}},
633 TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
634 {{CDM::CLibraryMaybeHardened, {
"fread"}},
635 TR::Prop({{3}}, {{0, ReturnValueIndex}})},
636 {{CDM::CLibraryMaybeHardened, {
"recv"}},
637 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
638 {{CDM::CLibraryMaybeHardened, {
"recvfrom"}},
639 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
641 {{CDM::CLibrary, {
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
642 {{CDM::CLibrary, {
"ttyname_r"}},
643 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
645 {{CDM::CLibrary, {
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646 {{CDM::CLibrary, {
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
647 {{CDM::CLibrary, {
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
649 {{CDM::CLibrary, {
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
650 {{CDM::CLibrary, {
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
651 {{CDM::CLibrary, {
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
653 {{CDM::CLibrary, {
"memcmp"}},
654 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
655 {{CDM::CLibraryMaybeHardened, {
"memcpy"}},
656 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
657 {{CDM::CLibraryMaybeHardened, {
"memmove"}},
658 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
659 {{CDM::CLibraryMaybeHardened, {
"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},
665 {{CDM::CLibrary, {
"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
666 {{CDM::CLibrary, {
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
667 {{CDM::CLibrary, {
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
671 {{CDM::CLibraryMaybeHardened, {
"memchr"}},
672 TR::Prop({{0}}, {{ReturnValueIndex}})},
673 {{CDM::CLibraryMaybeHardened, {
"memrchr"}},
674 TR::Prop({{0}}, {{ReturnValueIndex}})},
675 {{CDM::CLibrary, {
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
676 {{CDM::CLibraryMaybeHardened, {
"strchr"}},
677 TR::Prop({{0}}, {{ReturnValueIndex}})},
678 {{CDM::CLibraryMaybeHardened, {
"strrchr"}},
679 TR::Prop({{0}}, {{ReturnValueIndex}})},
680 {{CDM::CLibraryMaybeHardened, {
"strchrnul"}},
681 TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{CDM::CLibrary, {
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
683 {{CDM::CLibrary, {
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
687 {{CDM::CLibrary, {
"qsort"}}, TR::Prop({{0}}, {{0}})},
688 {{CDM::CLibrary, {
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
690 {{CDM::CLibrary, {
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
691 {{CDM::CLibrary, {
"strcasecmp"}},
692 TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
693 {{CDM::CLibrary, {
"strncmp"}},
694 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
695 {{CDM::CLibrary, {
"strncasecmp"}},
696 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
697 {{CDM::CLibrary, {
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
698 {{CDM::CLibrary, {
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
699 {{CDM::CLibrary, {
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
701 {{CDM::CLibrary, {
"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
702 {{CDM::CLibrary, {
"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
703 {{CDM::CLibrary, {
"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
704 {{CDM::CLibrary, {
"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
705 {{CDM::CLibrary, {
"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710 {{CDM::CLibrary, {
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
711 {{CDM::CLibrary, {
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
712 {{CDM::CLibrary, {
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
713 {{CDM::CLibrary, {
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
715 {{CDM::CLibrary, {
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716 {{CDM::CLibrary, {
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{CDM::CLibrary, {
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719 {{CDM::CLibrary, {
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
720 {{CDM::CLibrary, {
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
721 {{CDM::CLibrary, {
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
722 {{CDM::CLibrary, {
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
723 {{CDM::CLibrary, {
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
724 {{CDM::CLibrary, {
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
725 {{CDM::CLibrary, {
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
726 {{CDM::CLibrary, {
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
727 {{CDM::CLibrary, {
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
728 {{CDM::CLibrary, {
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
729 {{CDM::CLibrary, {
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
730 {{CDM::CLibrary, {
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
732 {{CDM::CLibraryMaybeHardened, {
"strcpy"}},
733 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
734 {{CDM::CLibraryMaybeHardened, {
"stpcpy"}},
735 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
736 {{CDM::CLibraryMaybeHardened, {
"strcat"}},
737 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
738 {{CDM::CLibraryMaybeHardened, {
"wcsncat"}},
739 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
740 {{CDM::CLibraryMaybeHardened, {
"strncpy"}},
741 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
742 {{CDM::CLibraryMaybeHardened, {
"strncat"}},
743 TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})},
744 {{CDM::CLibraryMaybeHardened, {
"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})},
745 {{CDM::CLibraryMaybeHardened, {
"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})},
753 {{CDM::CLibrary, {
"snprintf"}},
754 TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})},
756 {{CDM::CLibrary, {
"sprintf"}},
757 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
760 {{CDM::CLibrary, {
"__snprintf_chk"}},
761 TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})},
764 {{CDM::CLibrary, {
"__sprintf_chk"}},
765 TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})},
768 {{CDM::CLibrary, {
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
769 {{CDM::CLibrary, {
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
770 {{CDM::CLibrary, {
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
771 {{CDM::CLibrary, {
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
772 {{CDM::CLibrary, {
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
773 {{CDM::CLibrary, {
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
774 {{CDM::CLibrary, {
"execve"}},
775 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
776 {{CDM::CLibrary, {
"fexecve"}},
777 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
778 {{CDM::CLibrary, {
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
779 {{CDM::CLibrary, {
"execvpe"}},
780 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
781 {{CDM::CLibrary, {
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
789 {{CDM::CLibrary, {
"setproctitle"}},
790 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
791 {{CDM::CLibrary, {
"setproctitle_fast"}},
792 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};
794 if (TR::UntrustedEnv(
C)) {
797 GlobalCRules.push_back({{CDM::CLibrary, {
"setproctitle_init"}},
798 TR::Sink({{1, 2}}, MsgCustomSink)});
801 GlobalCRules.push_back(
802 {{CDM::CLibrary, {
"getenv"}}, TR::Source({{ReturnValueIndex}})});
805 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
806 std::make_move_iterator(GlobalCRules.end()));
809 CheckerManager *Mgr =
C.getAnalysisManager().getCheckerManager();
811 GenericTaintRuleParser ConfigParser{*Mgr};
812 std::string Option{
"Config"};
813 StringRef ConfigFile =
815 std::optional<TaintConfiguration> Config =
819 DynamicTaintRules = RuleLookupTy{};
823 GenericTaintRuleParser::RulesContTy Rules{
824 ConfigParser.parseConfiguration(Option, std::move(*Config))};
826 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
827 std::make_move_iterator(Rules.end()));
830void GenericTaintChecker::checkPreCall(
const CallEvent &
Call,
831 CheckerContext &
C)
const {
835 if (
const auto *Rule =
836 Call.isGlobalCFunction() ? StaticTaintRules->lookup(
Call) :
nullptr)
838 else if (
const auto *Rule = DynamicTaintRules->lookup(
Call))
846 checkUncontrolledFormatString(
Call,
C);
850 taintUnsafeSocketProtocol(
Call,
C);
853void GenericTaintChecker::checkPostCall(
const CallEvent &
Call,
854 CheckerContext &
C)
const {
858 const StackFrameContext *CurrentFrame =
C.getStackFrame();
863 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
865 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
868 assert(!TaintArgs->isEmpty());
870 LLVM_DEBUG(
for (ArgIdxTy I
872 llvm::dbgs() <<
"PostCall<";
873 Call.dump(llvm::dbgs());
874 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
877 const NoteTag *InjectionTag =
nullptr;
878 std::vector<SymbolRef> TaintedSymbols;
879 std::vector<ArgIdxTy> TaintedIndexes;
880 for (ArgIdxTy ArgNum : *TaintArgs) {
882 if (ArgNum == ReturnValueIndex) {
884 std::vector<SymbolRef> TaintedSyms =
886 if (!TaintedSyms.empty()) {
887 TaintedSymbols.push_back(TaintedSyms[0]);
888 TaintedIndexes.push_back(ArgNum);
894 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
897 if (!TaintedSyms.empty()) {
898 TaintedSymbols.push_back(TaintedSyms[0]);
899 TaintedIndexes.push_back(ArgNum);
905 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
906 Call.getCalleeStackFrame(0));
908 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
909 C.addTransition(State, InjectionTag);
912void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
913 const char *NL,
const char *Sep)
const {
917void GenericTaintRule::process(
const GenericTaintChecker &Checker,
918 const CallEvent &
Call, CheckerContext &
C)
const {
920 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
923 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
924 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
925 const Expr *E = GetArgExpr(I,
Call);
926 Fun(I, E,
C.getSVal(E));
931 ForEachCallArg([
this, &Checker, &
C, &State](ArgIdxTy I,
const Expr *E, SVal) {
933 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
936 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(E)))
937 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink),
C);
941 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *E, SVal S) {
942 if (FilterArgs.contains(I)) {
944 if (
auto P = getPointeeOf(State, S))
954 bool IsMatching = PropSrcArgs.isEmpty();
955 std::vector<SymbolRef> TaintedSymbols;
956 std::vector<ArgIdxTy> TaintedIndexes;
957 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
958 &TaintedIndexes](ArgIdxTy I,
const Expr *E, SVal) {
959 std::optional<SVal> TaintedSVal =
960 getTaintedPointeeOrPointer(State,
C.getSVal(E));
962 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
965 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
966 std::vector<SymbolRef> TaintedArgSyms =
968 if (!TaintedArgSyms.empty()) {
969 llvm::append_range(TaintedSymbols, TaintedArgSyms);
970 TaintedIndexes.push_back(I);
980 const auto WouldEscape = [](SVal
V, QualType Ty) ->
bool {
984 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
985 const bool IsNonConstPtr =
986 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
988 return IsNonConstRef || IsNonConstPtr;
992 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
993 ImmutableSet<ArgIdxTy>
Result = F.getEmptySet();
995 [&](ArgIdxTy I,
const Expr *E, SVal
V) {
996 if (PropDstArgs.contains(I)) {
997 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<";
Call.dump(llvm::dbgs());
999 <<
"> prepares tainting arg index: " << I <<
'\n';);
1009 if (WouldEscape(
V, E->
getType()) && getTaintedPointeeOrPointer(State,
V)) {
1010 LLVM_DEBUG(
if (!
Result.contains(I)) {
1011 llvm::dbgs() <<
"PreCall<";
1012 Call.dump(llvm::dbgs());
1013 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
1020 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(),
Result);
1021 const NoteTag *InjectionTag = taintOriginTrackerTag(
1022 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1023 Call.getCalleeStackFrame(0));
1024 C.addTransition(State, InjectionTag);
1027bool GenericTaintRule::UntrustedEnv(CheckerContext &
C) {
1028 return !
C.getAnalysisManager()
1029 .getAnalyzerOptions()
1030 .ShouldAssumeControlledEnvironment;
1033bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1034 CheckerContext &
C)
const {
1036 if (!isTaintReporterCheckerEnabled)
1038 std::optional<SVal> TaintedSVal =
1039 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1046 if (ExplodedNode *N =
C.generateNonFatalErrorNode(
C.getState())) {
1047 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
1050 report->markInteresting(TaintedSym);
1052 C.emitReport(std::move(report));
1070 const Decl *CallDecl =
Call.getDecl();
1077 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1084 ArgNum = Format->getFormatIdx() - 1;
1093 const CXXMethodDecl *MDecl = dyn_cast<CXXMethodDecl>(FDecl);
1097 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1104bool GenericTaintChecker::checkUncontrolledFormatString(
1105 const CallEvent &
Call, CheckerContext &
C)
const {
1107 ArgIdxTy ArgNum = 0;
1113 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1114 MsgUncontrolledFormatString,
C);
1117void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &
Call,
1118 CheckerContext &
C)
const {
1119 if (
Call.getNumArgs() < 1)
1121 const IdentifierInfo *
ID =
Call.getCalleeIdentifier();
1124 if (
ID->getName() !=
"socket")
1127 SourceLocation DomLoc =
Call.getArgExpr(0)->getExprLoc();
1128 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1130 bool SafeProtocol = DomName ==
"AF_SYSTEM" || DomName ==
"AF_LOCAL" ||
1131 DomName ==
"AF_UNIX" || DomName ==
"AF_RESERVED_36";
1136 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1137 ImmutableSet<ArgIdxTy>
Result = F.add(F.getEmptySet(), ReturnValueIndex);
1138 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(),
Result);
1139 C.addTransition(State);
1143void ento::registerTaintPropagationChecker(CheckerManager &Mgr) {
1147bool ento::shouldRegisterTaintPropagationChecker(
const CheckerManager &mgr) {
1151void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
1152 GenericTaintChecker *checker = Mgr.
getChecker<GenericTaintChecker>();
1153 checker->isTaintReporterCheckerEnabled =
true;
1158bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Represents a static or instance method of a struct/union/class.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
Represents an abstract call to a function or method along a particular path.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&...Args)
Register a single-part checker (derived from Checker): construct its singleton instance,...
void reportInvalidCheckerOptionValue(const CheckerFrontend *Checker, StringRef OptionName, StringRef ExpectedValueDesc) const
Emits an error through a DiagnosticsEngine about an invalid user supplied checker option value.
CheckerNameRef getCurrentCheckerName() const
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
Simple checker classes that implement one frontend (i.e.
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
std::optional< T > getConfiguration(CheckerManager &Mgr, Checker *Chk, StringRef Option, StringRef ConfigFile)
Read the given file from the filesystem and parse it as a yaml file.
const Regex Rule("(.+)/(.+)\\.framework/")
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)