28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/Support/YAMLTraits.h"
38#define DEBUG_TYPE "taint-checker"
44using llvm::ImmutableSet;
48class GenericTaintChecker;
51constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
63constexpr llvm::StringLiteral MsgCustomSink =
64 "Untrusted data is passed to a user-defined sink";
70constexpr ArgIdxTy ReturnValueIndex{-1};
72static ArgIdxTy fromArgumentCount(
unsigned Count) {
74 static_cast<std::size_t
>(std::numeric_limits<ArgIdxTy>::max()) &&
75 "ArgIdxTy is not large enough to represent the number of arguments.");
86 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
92 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
98 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
99 D = D->getCanonicalDecl();
100 if (D->getName() ==
"stdin" && D->hasExternalStorage() && D->isExternC()) {
102 const QualType Ty = D->getType().getCanonicalType();
112 const QualType ArgTy = LValue.getType(State->getStateManager().getContext());
114 return State->getSVal(LValue);
118 return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
124 return getPointeeOf(State, *LValue);
133 if (
auto Pointee = getPointeeOf(State, Arg))
143 return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
150 std::vector<SymbolRef> TaintedSymbols,
151 std::vector<ArgIdxTy> TaintedArgs,
153 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
154 TaintedArgs = std::move(TaintedArgs), CallLocation](
161 if (TaintedSymbols.empty())
162 return "Taint originated here";
164 for (
auto Sym : TaintedSymbols) {
167 LLVM_DEBUG(
for (
auto Arg
169 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
178const NoteTag *taintPropagationExplainerTag(
180 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
181 assert(TaintedSymbols.size() == TaintedArgs.size());
182 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
183 TaintedArgs = std::move(TaintedArgs), CallLocation](
186 llvm::raw_svector_ostream Out(Msg);
188 if (TaintedSymbols.empty() ||
192 int nofTaintedArgs = 0;
193 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
196 if (TaintedArgs[Idx] != ReturnValueIndex) {
197 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
198 << TaintedArgs[Idx] + 1 <<
"\n");
199 if (nofTaintedArgs == 0)
200 Out <<
"Taint propagated to the ";
203 Out << TaintedArgs[Idx] + 1
204 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
207 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
208 Out <<
"Taint propagated to the return value";
212 return std::string(Out.str());
222 ArgSet(ArgVecTy &&DiscreteArgs,
223 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
224 : DiscreteArgs(std::move(DiscreteArgs)),
225 VariadicIndex(std::move(VariadicIndex)) {}
227 bool contains(ArgIdxTy ArgIdx)
const {
228 if (llvm::is_contained(DiscreteArgs, ArgIdx))
231 return VariadicIndex && ArgIdx >= *VariadicIndex;
234 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
237 ArgVecTy DiscreteArgs;
238 std::optional<ArgIdxTy> VariadicIndex;
246class GenericTaintRule {
259 std::optional<StringRef> SinkMsg;
261 GenericTaintRule() =
default;
263 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
264 std::optional<StringRef> SinkMsg = std::nullopt)
265 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
266 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
272 static GenericTaintRule Sink(ArgSet &&SinkArgs,
273 std::optional<StringRef> Msg = std::nullopt) {
274 return {std::move(SinkArgs), {}, {}, {}, Msg};
278 static GenericTaintRule Filter(ArgSet &&FilterArgs) {
279 return {{}, std::move(FilterArgs), {}, {}};
284 static GenericTaintRule Source(ArgSet &&SourceArgs) {
285 return {{}, {}, {}, std::move(SourceArgs)};
289 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
290 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
300 return ArgIdx == ReturnValueIndex ?
Call.getOriginExpr()
301 :
Call.getArgExpr(ArgIdx);
311struct TaintConfiguration {
312 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
313 enum class VariadicType {
None, Src, Dst };
320 struct Sink : Common {
324 struct Filter : Common {
328 struct Propagation : Common {
331 VariadicType VarType;
335 std::vector<Propagation> Propagations;
336 std::vector<Filter> Filters;
337 std::vector<Sink> Sinks;
339 TaintConfiguration() =
default;
340 TaintConfiguration(
const TaintConfiguration &) =
default;
341 TaintConfiguration(TaintConfiguration &&) =
default;
342 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
343 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
346struct GenericTaintRuleParser {
351 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
352 RulesContTy parseConfiguration(
const std::string &Option,
353 TaintConfiguration &&Config)
const;
360 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
362 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &
C);
366 template <
typename Config>
367 static void consumeRulesFromConfig(
const Config &
C, GenericTaintRule &&Rule,
370 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&P,
371 RulesContTy &Rules)
const;
372 void parseConfig(
const std::string &Option, TaintConfiguration::Filter &&P,
373 RulesContTy &Rules)
const;
374 void parseConfig(
const std::string &Option,
375 TaintConfiguration::Propagation &&P,
376 RulesContTy &Rules)
const;
381class GenericTaintChecker
382 :
public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
388 void printState(raw_ostream &Out,
ProgramStateRef State,
const char *NL,
389 const char *Sep)
const override;
392 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
395 bool isTaintReporterCheckerEnabled =
false;
396 std::optional<BugType> BT;
413 mutable std::optional<RuleLookupTy> StaticTaintRules;
414 mutable std::optional<RuleLookupTy> DynamicTaintRules;
419LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
420LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
421LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
425template <>
struct MappingTraits<TaintConfiguration> {
426 static void mapping(IO &IO, TaintConfiguration &Config) {
427 IO.mapOptional(
"Propagations", Config.Propagations);
428 IO.mapOptional(
"Filters", Config.Filters);
429 IO.mapOptional(
"Sinks", Config.Sinks);
433template <>
struct MappingTraits<TaintConfiguration::Sink> {
434 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
435 IO.mapRequired(
"Name", Sink.Name);
436 IO.mapOptional(
"Scope", Sink.Scope);
437 IO.mapRequired(
"Args", Sink.SinkArgs);
441template <>
struct MappingTraits<TaintConfiguration::Filter> {
442 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
443 IO.mapRequired(
"Name", Filter.Name);
444 IO.mapOptional(
"Scope", Filter.Scope);
445 IO.mapRequired(
"Args", Filter.FilterArgs);
449template <>
struct MappingTraits<TaintConfiguration::Propagation> {
450 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
451 IO.mapRequired(
"Name", Propagation.Name);
452 IO.mapOptional(
"Scope", Propagation.Scope);
453 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
454 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
455 IO.mapOptional(
"VariadicType", Propagation.VarType);
456 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
460template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
462 IO.enumCase(
Value,
"None", TaintConfiguration::VariadicType::None);
463 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
464 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
475 ImmutableSet<ArgIdxTy>)
478void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
479 const ArgVecTy &Args)
const {
480 for (ArgIdxTy Arg : Args) {
481 if (Arg < ReturnValueIndex) {
483 Mgr.
getChecker<GenericTaintChecker>(), Option,
484 "an argument number for propagation rules greater or equal to -1");
489template <
typename Config>
490GenericTaintRuleParser::NamePartsTy
491GenericTaintRuleParser::parseNameParts(
const Config &
C) {
492 NamePartsTy NameParts;
493 if (!
C.Scope.empty()) {
496 StringRef{
C.Scope}.split(NameParts,
"::", -1,
499 NameParts.emplace_back(
C.Name);
503template <
typename Config>
504void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &
C,
505 GenericTaintRule &&Rule,
506 RulesContTy &Rules) {
507 NamePartsTy NameParts = parseNameParts(
C);
508 Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts),
512void GenericTaintRuleParser::parseConfig(
const std::string &Option,
513 TaintConfiguration::Sink &&S,
514 RulesContTy &Rules)
const {
515 validateArgVector(Option, S.SinkArgs);
516 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
520void GenericTaintRuleParser::parseConfig(
const std::string &Option,
521 TaintConfiguration::Filter &&S,
522 RulesContTy &Rules)
const {
523 validateArgVector(Option, S.FilterArgs);
524 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
528void GenericTaintRuleParser::parseConfig(
const std::string &Option,
529 TaintConfiguration::Propagation &&P,
530 RulesContTy &Rules)
const {
531 validateArgVector(Option, P.SrcArgs);
532 validateArgVector(Option, P.DstArgs);
533 bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
534 bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
535 std::optional<ArgIdxTy> JustVarIndex = P.VarIndex;
537 ArgSet SrcDesc(std::move(P.SrcArgs),
538 IsSrcVariadic ? JustVarIndex : std::nullopt);
539 ArgSet DstDesc(std::move(P.DstArgs),
540 IsDstVariadic ? JustVarIndex : std::nullopt);
542 consumeRulesFromConfig(
543 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
546GenericTaintRuleParser::RulesContTy
547GenericTaintRuleParser::parseConfiguration(
const std::string &Option,
548 TaintConfiguration &&Config)
const {
552 for (
auto &F : Config.Filters)
553 parseConfig(Option, std::move(F), Rules);
555 for (
auto &S : Config.Sinks)
556 parseConfig(Option, std::move(S), Rules);
558 for (
auto &P : Config.Propagations)
559 parseConfig(Option, std::move(P), Rules);
564void GenericTaintChecker::initTaintRules(CheckerContext &
C)
const {
568 if (StaticTaintRules || DynamicTaintRules)
571 using RulesConstructionTy =
572 std::vector<std::pair<CallDescription, GenericTaintRule>>;
573 using TR = GenericTaintRule;
575 RulesConstructionTy GlobalCRules{
577 {{CDM::CLibrary, {
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
578 {{CDM::CLibrary, {
"fopen"}}, TR::Source({{ReturnValueIndex}})},
579 {{CDM::CLibrary, {
"freopen"}}, TR::Source({{ReturnValueIndex}})},
580 {{CDM::CLibrary, {
"getch"}}, TR::Source({{ReturnValueIndex}})},
581 {{CDM::CLibrary, {
"getchar"}}, TR::Source({{ReturnValueIndex}})},
582 {{CDM::CLibrary, {
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
583 {{CDM::CLibrary, {
"gets"}}, TR::Source({{0, ReturnValueIndex}})},
584 {{CDM::CLibrary, {
"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},
585 {{CDM::CLibrary, {
"scanf"}}, TR::Source({{}, 1})},
586 {{CDM::CLibrary, {
"scanf_s"}}, TR::Source({{}, 1})},
587 {{CDM::CLibrary, {
"wgetch"}}, TR::Source({{ReturnValueIndex}})},
593 {{CDM::CLibrary, {
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
594 {{CDM::CLibrary, {
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
595 {{CDM::CLibrary, {
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
596 {{CDM::CLibrary, {
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
597 {{CDM::CLibrary, {
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
598 {{CDM::CLibrary, {
"get_current_dir_name"}},
599 TR::Source({{ReturnValueIndex}})},
600 {{CDM::CLibrary, {
"gethostname"}}, TR::Source({{0}})},
601 {{CDM::CLibrary, {
"getnameinfo"}}, TR::Source({{2, 4}})},
602 {{CDM::CLibrary, {
"getseuserbyname"}}, TR::Source({{1, 2}})},
603 {{CDM::CLibrary, {
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
604 {{CDM::CLibrary, {
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
605 {{CDM::CLibrary, {
"getlogin_r"}}, TR::Source({{0}})},
608 {{CDM::CLibrary, {
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
609 {{CDM::CLibrary, {
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
610 {{CDM::CLibrary, {
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
611 {{CDM::CLibrary, {
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
612 {{CDM::CLibrary, {
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
613 {{CDM::CLibrary, {
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
614 {{CDM::CLibraryMaybeHardened, {
"fgets"}},
615 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
616 {{CDM::CLibraryMaybeHardened, {
"fgetws"}},
617 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
618 {{CDM::CLibrary, {
"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
619 {{CDM::CLibrary, {
"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
620 {{CDM::CLibrary, {
"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
621 {{CDM::CLibrary, {
"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})},
623 {{CDM::CLibrary, {
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
624 {{CDM::CLibrary, {
"getc_unlocked"}},
625 TR::Prop({{0}}, {{ReturnValueIndex}})},
626 {{CDM::CLibrary, {
"getdelim"}}, TR::Prop({{3}}, {{0}})},
630 {{CDM::CLibrary, {
"getline"}}, TR::Prop({{2}}, {{0}})},
631 {{CDM::CLibrary, {
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
632 {{CDM::CLibraryMaybeHardened, {
"pread"}},
633 TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
634 {{CDM::CLibraryMaybeHardened, {
"read"}},
635 TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
636 {{CDM::CLibraryMaybeHardened, {
"fread"}},
637 TR::Prop({{3}}, {{0, ReturnValueIndex}})},
638 {{CDM::CLibraryMaybeHardened, {
"recv"}},
639 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
640 {{CDM::CLibraryMaybeHardened, {
"recvfrom"}},
641 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
643 {{CDM::CLibrary, {
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644 {{CDM::CLibrary, {
"ttyname_r"}},
645 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
647 {{CDM::CLibrary, {
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
648 {{CDM::CLibrary, {
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
649 {{CDM::CLibrary, {
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
651 {{CDM::CLibrary, {
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
652 {{CDM::CLibrary, {
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
653 {{CDM::CLibrary, {
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655 {{CDM::CLibrary, {
"memcmp"}},
656 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
657 {{CDM::CLibraryMaybeHardened, {
"memcpy"}},
658 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
659 {{CDM::CLibraryMaybeHardened, {
"memmove"}},
660 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
661 {{CDM::CLibraryMaybeHardened, {
"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},
667 {{CDM::CLibrary, {
"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
668 {{CDM::CLibrary, {
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
669 {{CDM::CLibrary, {
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
673 {{CDM::CLibraryMaybeHardened, {
"memchr"}},
674 TR::Prop({{0}}, {{ReturnValueIndex}})},
675 {{CDM::CLibraryMaybeHardened, {
"memrchr"}},
676 TR::Prop({{0}}, {{ReturnValueIndex}})},
677 {{CDM::CLibrary, {
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
678 {{CDM::CLibraryMaybeHardened, {
"strchr"}},
679 TR::Prop({{0}}, {{ReturnValueIndex}})},
680 {{CDM::CLibraryMaybeHardened, {
"strrchr"}},
681 TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{CDM::CLibraryMaybeHardened, {
"strchrnul"}},
683 TR::Prop({{0}}, {{ReturnValueIndex}})},
684 {{CDM::CLibrary, {
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
685 {{CDM::CLibrary, {
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
689 {{CDM::CLibrary, {
"qsort"}}, TR::Prop({{0}}, {{0}})},
690 {{CDM::CLibrary, {
"qsort_r"}}, TR::Prop({{0}}, {{0}})},
692 {{CDM::CLibrary, {
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
693 {{CDM::CLibrary, {
"strcasecmp"}},
694 TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
695 {{CDM::CLibrary, {
"strncmp"}},
696 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
697 {{CDM::CLibrary, {
"strncasecmp"}},
698 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
699 {{CDM::CLibrary, {
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
700 {{CDM::CLibrary, {
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
701 {{CDM::CLibrary, {
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
703 {{CDM::CLibrary, {
"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
704 {{CDM::CLibrary, {
"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
705 {{CDM::CLibrary, {
"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
706 {{CDM::CLibrary, {
"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
707 {{CDM::CLibrary, {
"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712 {{CDM::CLibrary, {
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
713 {{CDM::CLibrary, {
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
714 {{CDM::CLibrary, {
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
715 {{CDM::CLibrary, {
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
717 {{CDM::CLibrary, {
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{CDM::CLibrary, {
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
720 {{CDM::CLibrary, {
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
721 {{CDM::CLibrary, {
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
722 {{CDM::CLibrary, {
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
723 {{CDM::CLibrary, {
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
724 {{CDM::CLibrary, {
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
725 {{CDM::CLibrary, {
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
726 {{CDM::CLibrary, {
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
727 {{CDM::CLibrary, {
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
728 {{CDM::CLibrary, {
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
729 {{CDM::CLibrary, {
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
730 {{CDM::CLibrary, {
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
731 {{CDM::CLibrary, {
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
732 {{CDM::CLibrary, {
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
734 {{CDM::CLibraryMaybeHardened, {
"strcpy"}},
735 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
736 {{CDM::CLibraryMaybeHardened, {
"stpcpy"}},
737 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
738 {{CDM::CLibraryMaybeHardened, {
"strcat"}},
739 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
740 {{CDM::CLibraryMaybeHardened, {
"wcsncat"}},
741 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
742 {{CDM::CLibraryMaybeHardened, {
"strncpy"}},
743 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
744 {{CDM::CLibraryMaybeHardened, {
"strncat"}},
745 TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})},
746 {{CDM::CLibraryMaybeHardened, {
"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})},
747 {{CDM::CLibraryMaybeHardened, {
"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})},
755 {{CDM::CLibrary, {
"snprintf"}},
756 TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})},
758 {{CDM::CLibrary, {
"sprintf"}},
759 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
762 {{CDM::CLibrary, {
"__snprintf_chk"}},
763 TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})},
766 {{CDM::CLibrary, {
"__sprintf_chk"}},
767 TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})},
770 {{CDM::CLibrary, {
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
771 {{CDM::CLibrary, {
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
772 {{CDM::CLibrary, {
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
773 {{CDM::CLibrary, {
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
774 {{CDM::CLibrary, {
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
775 {{CDM::CLibrary, {
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
776 {{CDM::CLibrary, {
"execve"}},
777 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
778 {{CDM::CLibrary, {
"fexecve"}},
779 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
780 {{CDM::CLibrary, {
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
781 {{CDM::CLibrary, {
"execvpe"}},
782 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
783 {{CDM::CLibrary, {
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
791 {{CDM::CLibrary, {
"setproctitle"}},
792 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
793 {{CDM::CLibrary, {
"setproctitle_fast"}},
794 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};
796 if (TR::UntrustedEnv(
C)) {
799 GlobalCRules.push_back({{CDM::CLibrary, {
"setproctitle_init"}},
800 TR::Sink({{1, 2}}, MsgCustomSink)});
803 GlobalCRules.push_back(
804 {{CDM::CLibrary, {
"getenv"}}, TR::Source({{ReturnValueIndex}})});
806 CheckerManager *Mgr =
C.getAnalysisManager().getCheckerManager();
808 StaticTaintRules = RuleLookupTy{};
810 "EnableDefaultConfig"))
811 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
812 std::make_move_iterator(GlobalCRules.end()));
815 const GenericTaintRuleParser ConfigParser{*Mgr};
816 std::string Option{
"Config"};
817 StringRef ConfigFile =
819 std::optional<TaintConfiguration> Config =
823 DynamicTaintRules = RuleLookupTy{};
827 GenericTaintRuleParser::RulesContTy Rules{
828 ConfigParser.parseConfiguration(Option, std::move(*Config))};
830 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
831 std::make_move_iterator(Rules.end()));
844void GenericTaintChecker::checkBeginFunction(CheckerContext &
C)
const {
845 if (!
C.inTopFrame() ||
C.getAnalysisManager()
846 .getAnalyzerOptions()
847 .ShouldAssumeControlledEnvironment)
850 const auto *FD = dyn_cast<FunctionDecl>(
C.getLocationContext()->getDecl());
851 if (!FD || !FD->isMain() || FD->param_size() < 2)
854 if (!FD->parameters()[0]->getType()->isIntegerType())
861 const MemRegion *ArgcReg =
862 State->getRegion(FD->parameters()[0],
C.getLocationContext());
863 SVal ArgcSVal = State->getSVal(ArgcReg);
865 StringRef ArgcName = FD->parameters()[0]->getName();
866 if (
auto N = ArgcSVal.
getAs<NonLoc>()) {
867 ConstraintManager &CM =
C.getConstraintManager();
871 llvm::APSInt::getUnsigned(2097152),
true);
874 const MemRegion *ArgvReg =
875 State->getRegion(FD->parameters()[1],
C.getLocationContext());
876 SVal ArgvSVal = State->getSVal(ArgvReg);
878 StringRef ArgvName = FD->parameters()[1]->getName();
880 bool HaveEnvp = FD->param_size() > 2;
886 const MemRegion *EnvPReg =
887 State->getRegion(FD->parameters()[2],
C.getLocationContext());
888 EnvpSVal = State->getSVal(EnvPReg);
889 EnvpName = FD->parameters()[2]->getName();
893 const NoteTag *OriginatingTag =
894 C.getNoteTag([ArgvSVal, ArgcSVal, ArgcName, ArgvName, EnvpSVal,
895 EnvpName](PathSensitiveBugReport &BR) -> std::string {
903 Message +=
"'" + ArgvName.str() +
"'";
907 Message +=
"'" + ArgcName.str() +
"'";
912 Message +=
"'" + EnvpName.str() +
"'";
914 return "Taint originated in " +
Message;
916 C.addTransition(State, OriginatingTag);
919void GenericTaintChecker::checkPreCall(
const CallEvent &
Call,
920 CheckerContext &
C)
const {
925 if (
const auto *Rule =
926 Call.isGlobalCFunction() ? StaticTaintRules->lookup(
Call) :
nullptr)
928 else if (
const auto *Rule = DynamicTaintRules->lookup(
Call))
936 checkUncontrolledFormatString(
Call,
C);
940 taintUnsafeSocketProtocol(
Call,
C);
943void GenericTaintChecker::checkPostCall(
const CallEvent &
Call,
944 CheckerContext &
C)
const {
948 const StackFrameContext *CurrentFrame =
C.getStackFrame();
953 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
955 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
958 assert(!TaintArgs->isEmpty());
960 LLVM_DEBUG(
for (ArgIdxTy I
962 llvm::dbgs() <<
"PostCall<";
963 Call.dump(llvm::dbgs());
964 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
967 const NoteTag *InjectionTag =
nullptr;
968 std::vector<SymbolRef> TaintedSymbols;
969 std::vector<ArgIdxTy> TaintedIndexes;
970 for (ArgIdxTy ArgNum : *TaintArgs) {
972 if (ArgNum == ReturnValueIndex) {
974 std::vector<SymbolRef> TaintedSyms =
976 if (!TaintedSyms.empty()) {
977 TaintedSymbols.push_back(TaintedSyms[0]);
978 TaintedIndexes.push_back(ArgNum);
984 if (
auto V = getPointeeOf(State,
Call.getArgSVal(ArgNum))) {
987 if (!TaintedSyms.empty()) {
988 TaintedSymbols.push_back(TaintedSyms[0]);
989 TaintedIndexes.push_back(ArgNum);
995 InjectionTag = taintPropagationExplainerTag(
C, TaintedSymbols, TaintedIndexes,
996 Call.getCalleeStackFrame(0));
998 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
999 C.addTransition(State, InjectionTag);
1002void GenericTaintChecker::printState(raw_ostream &Out,
ProgramStateRef State,
1003 const char *NL,
const char *Sep)
const {
1007void GenericTaintRule::process(
const GenericTaintChecker &Checker,
1008 const CallEvent &
Call, CheckerContext &
C)
const {
1010 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1013 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
1014 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
1015 const Expr *E = GetArgExpr(I,
Call);
1016 Fun(I, E,
C.getSVal(E));
1021 ForEachCallArg([
this, &Checker, &
C, &State](ArgIdxTy I,
const Expr *E, SVal) {
1023 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
1026 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State,
C.getSVal(E)))
1027 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink),
C);
1031 ForEachCallArg([
this, &State](ArgIdxTy I,
const Expr *E, SVal S) {
1032 if (FilterArgs.contains(I)) {
1034 if (
auto P = getPointeeOf(State, S))
1044 bool IsMatching = PropSrcArgs.isEmpty();
1045 std::vector<SymbolRef> TaintedSymbols;
1046 std::vector<ArgIdxTy> TaintedIndexes;
1047 ForEachCallArg([
this, &
C, &IsMatching, &State, &TaintedSymbols,
1048 &TaintedIndexes](ArgIdxTy I,
const Expr *E, SVal) {
1049 std::optional<SVal> TaintedSVal =
1050 getTaintedPointeeOrPointer(State,
C.getSVal(E));
1052 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
1055 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
1056 std::vector<SymbolRef> TaintedArgSyms =
1058 if (!TaintedArgSyms.empty()) {
1059 llvm::append_range(TaintedSymbols, TaintedArgSyms);
1060 TaintedIndexes.push_back(I);
1070 const auto WouldEscape = [](SVal
V, QualType Ty) ->
bool {
1074 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
1075 const bool IsNonConstPtr =
1076 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
1078 return IsNonConstRef || IsNonConstPtr;
1082 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1083 ImmutableSet<ArgIdxTy>
Result = F.getEmptySet();
1085 [&](ArgIdxTy I,
const Expr *E, SVal
V) {
1086 if (PropDstArgs.contains(I)) {
1087 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<";
Call.dump(llvm::dbgs());
1089 <<
"> prepares tainting arg index: " << I <<
'\n';);
1099 if (WouldEscape(
V, E->
getType()) && getTaintedPointeeOrPointer(State,
V)) {
1100 LLVM_DEBUG(
if (!
Result.contains(I)) {
1101 llvm::dbgs() <<
"PreCall<";
1102 Call.dump(llvm::dbgs());
1103 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
1110 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(),
Result);
1111 const NoteTag *InjectionTag = taintOriginTrackerTag(
1112 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1113 Call.getCalleeStackFrame(0));
1114 C.addTransition(State, InjectionTag);
1117bool GenericTaintRule::UntrustedEnv(CheckerContext &
C) {
1118 return !
C.getAnalysisManager()
1119 .getAnalyzerOptions()
1120 .ShouldAssumeControlledEnvironment;
1123bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1124 CheckerContext &
C)
const {
1126 if (!isTaintReporterCheckerEnabled)
1128 std::optional<SVal> TaintedSVal =
1129 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1136 if (ExplodedNode *N =
C.generateNonFatalErrorNode(
C.getState())) {
1137 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
1140 report->markInteresting(TaintedSym);
1142 C.emitReport(std::move(report));
1160 const Decl *CallDecl =
Call.getDecl();
1167 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
1174 ArgNum = Format->getFormatIdx() - 1;
1183 const CXXMethodDecl *MDecl = dyn_cast<CXXMethodDecl>(FDecl);
1187 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1194bool GenericTaintChecker::checkUncontrolledFormatString(
1195 const CallEvent &
Call, CheckerContext &
C)
const {
1197 ArgIdxTy ArgNum = 0;
1203 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1204 MsgUncontrolledFormatString,
C);
1207void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &
Call,
1208 CheckerContext &
C)
const {
1209 if (
Call.getNumArgs() < 1)
1211 const IdentifierInfo *
ID =
Call.getCalleeIdentifier();
1214 if (
ID->getName() !=
"socket")
1217 SourceLocation DomLoc =
Call.getArgExpr(0)->getExprLoc();
1218 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1220 bool SafeProtocol = DomName ==
"AF_SYSTEM" || DomName ==
"AF_LOCAL" ||
1221 DomName ==
"AF_UNIX" || DomName ==
"AF_RESERVED_36";
1226 auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1227 ImmutableSet<ArgIdxTy>
Result = F.add(F.getEmptySet(), ReturnValueIndex);
1228 State = State->set<TaintArgsOnPostVisit>(
C.getStackFrame(),
Result);
1229 C.addTransition(State);
1233void ento::registerTaintPropagationChecker(CheckerManager &Mgr) {
1237bool ento::shouldRegisterTaintPropagationChecker(
const CheckerManager &mgr) {
1241void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
1242 GenericTaintChecker *checker = Mgr.
getChecker<GenericTaintChecker>();
1243 checker->isTaintReporterCheckerEnabled =
true;
1248bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
bool isPointerToCharArray(const QualType &QT)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Represents a static or instance method of a struct/union/class.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
Represents an abstract call to a function or method along a particular path.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&...Args)
Register a single-part checker (derived from Checker): construct its singleton instance,...
void reportInvalidCheckerOptionValue(const CheckerFrontend *Checker, StringRef OptionName, StringRef ExpectedValueDesc) const
Emits an error through a DiagnosticsEngine about an invalid user supplied checker option value.
CheckerNameRef getCurrentCheckerName() const
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
Simple checker classes that implement one frontend (i.e.
ProgramStateRef assumeInclusiveRange(ProgramStateRef State, NonLoc Value, const llvm::APSInt &From, const llvm::APSInt &To, bool InBound)
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
std::optional< T > getConfiguration(CheckerManager &Mgr, Checker *Chk, StringRef Option, StringRef ConfigFile)
Read the given file from the filesystem and parse it as a yaml file.
const Regex Rule("(.+)/(.+)\\.framework/")
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
@ Result
The result type of a method or function.
@ None
The alignment was not explicit in code.
Diagnostic wrappers for TextAPI types for error reporting.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)