clang 20.0.0git
Taint.cpp
Go to the documentation of this file.
1//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines basic, non-domain-specific mechanisms for tracking tainted values.
10//
11//===----------------------------------------------------------------------===//
12
17#include <optional>
18
19using namespace clang;
20using namespace ento;
21using namespace taint;
22
23// Fully tainted symbols.
25
26// Partially tainted symbols.
29REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
30
31void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
32 const char *Sep) {
33 TaintMapTy TM = State->get<TaintMap>();
34
35 if (!TM.isEmpty())
36 Out << "Tainted symbols:" << NL;
37
38 for (const auto &I : TM)
39 Out << I.first << " : " << I.second << NL;
40}
41
43 printTaint(State, llvm::errs());
44}
45
47 const LocationContext *LCtx,
48 TaintTagType Kind) {
49 return addTaint(State, State->getSVal(S, LCtx), Kind);
50}
51
53 TaintTagType Kind) {
54 SymbolRef Sym = V.getAsSymbol();
55 if (Sym)
56 return addTaint(State, Sym, Kind);
57
58 // If the SVal represents a structure, try to mass-taint all values within the
59 // structure. For now it only works efficiently on lazy compound values that
60 // were conjured during a conservative evaluation of a function - either as
61 // return values of functions that return structures or arrays by value, or as
62 // values of structures or arrays passed into the function by reference,
63 // directly or through pointer aliasing. Such lazy compound values are
64 // characterized by having exactly one binding in their captured store within
65 // their parent region, which is a conjured symbol default-bound to the base
66 // region of the parent region.
67 if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
68 if (std::optional<SVal> binding =
69 State->getStateManager().getStoreManager().getDefaultBinding(
70 *LCV)) {
71 if (SymbolRef Sym = binding->getAsSymbol())
72 return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
73 }
74 }
75
76 const MemRegion *R = V.getAsRegion();
77 return addTaint(State, R, Kind);
78}
79
81 TaintTagType Kind) {
82 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
83 return addTaint(State, SR->getSymbol(), Kind);
84 return State;
85}
86
88 TaintTagType Kind) {
89 // If this is a symbol cast, remove the cast before adding the taint. Taint
90 // is cast agnostic.
91 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
92 Sym = SC->getOperand();
93
94 ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
95 assert(NewState);
96 return NewState;
97}
98
100 SymbolRef Sym = V.getAsSymbol();
101 if (Sym)
102 return removeTaint(State, Sym);
103
104 const MemRegion *R = V.getAsRegion();
105 return removeTaint(State, R);
106}
107
109 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
110 return removeTaint(State, SR->getSymbol());
111 return State;
112}
113
115 // If this is a symbol cast, remove the cast before adding the taint. Taint
116 // is cast agnostic.
117 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
118 Sym = SC->getOperand();
119
120 ProgramStateRef NewState = State->remove<TaintMap>(Sym);
121 assert(NewState);
122 return NewState;
123}
124
126 SymbolRef ParentSym,
127 const SubRegion *SubRegion,
128 TaintTagType Kind) {
129 // Ignore partial taint if the entire parent symbol is already tainted.
130 if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
131 if (*T == Kind)
132 return State;
133
134 // Partial taint applies if only a portion of the symbol is tainted.
136 return addTaint(State, ParentSym, Kind);
137
138 const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
139 TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
140 TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
141
142 Regs = F.add(Regs, SubRegion, Kind);
143 ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
144 assert(NewState);
145 return NewState;
146}
147
149 const LocationContext *LCtx, TaintTagType Kind) {
150 return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
151 .empty();
152}
153
155 return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
156 .empty();
157}
158
160 TaintTagType K) {
161 return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
162 .empty();
163}
164
166 return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
167 .empty();
168}
169
170std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
171 const Stmt *S,
172 const LocationContext *LCtx,
173 TaintTagType Kind) {
174 return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
175}
176
177std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
178 TaintTagType Kind) {
179 return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
180}
181
182std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
183 SymbolRef Sym,
184 TaintTagType Kind) {
185 return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
186}
187
188std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
189 const MemRegion *Reg,
190 TaintTagType Kind) {
191 return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
192}
193
195 const Stmt *S,
196 const LocationContext *LCtx,
197 TaintTagType Kind,
198 bool returnFirstOnly) {
199 SVal val = State->getSVal(S, LCtx);
200 return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
201}
202
204 SVal V, TaintTagType Kind,
205 bool returnFirstOnly) {
206 if (SymbolRef Sym = V.getAsSymbol())
207 return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
208 if (const MemRegion *Reg = V.getAsRegion())
209 return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
210 return {};
211}
212
214 const MemRegion *Reg,
215 TaintTagType K,
216 bool returnFirstOnly) {
217 std::vector<SymbolRef> TaintedSymbols;
218 if (!Reg)
219 return TaintedSymbols;
220
221 // Element region (array element) is tainted if the offset is tainted.
222 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
223 std::vector<SymbolRef> TaintedIndex =
224 getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
225 llvm::append_range(TaintedSymbols, TaintedIndex);
226 if (returnFirstOnly && !TaintedSymbols.empty())
227 return TaintedSymbols; // return early if needed
228 }
229
230 // Symbolic region is tainted if the corresponding symbol is tainted.
231 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
232 std::vector<SymbolRef> TaintedRegions =
233 getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
234 llvm::append_range(TaintedSymbols, TaintedRegions);
235 if (returnFirstOnly && !TaintedSymbols.empty())
236 return TaintedSymbols; // return early if needed
237 }
238
239 // Any subregion (including Element and Symbolic regions) is tainted if its
240 // super-region is tainted.
241 if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
242 std::vector<SymbolRef> TaintedSubRegions =
243 getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
244 llvm::append_range(TaintedSymbols, TaintedSubRegions);
245 if (returnFirstOnly && !TaintedSymbols.empty())
246 return TaintedSymbols; // return early if needed
247 }
248
249 return TaintedSymbols;
250}
251
253 SymbolRef Sym,
254 TaintTagType Kind,
255 bool returnFirstOnly) {
256 std::vector<SymbolRef> TaintedSymbols;
257 if (!Sym)
258 return TaintedSymbols;
259
260 // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
261 if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
262 Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
263 return {};
264 }
265
266 // Traverse all the symbols this symbol depends on to see if any are tainted.
267 for (SymbolRef SubSym : Sym->symbols()) {
268 if (!isa<SymbolData>(SubSym))
269 continue;
270
271 if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
272 if (*Tag == Kind) {
273 TaintedSymbols.push_back(SubSym);
274 if (returnFirstOnly)
275 return TaintedSymbols; // return early if needed
276 }
277 }
278
279 if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
280 // If this is a SymbolDerived with a tainted parent, it's also tainted.
281 std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
282 State, SD->getParentSymbol(), Kind, returnFirstOnly);
283 llvm::append_range(TaintedSymbols, TaintedParents);
284 if (returnFirstOnly && !TaintedSymbols.empty())
285 return TaintedSymbols; // return early if needed
286
287 // If this is a SymbolDerived with the same parent symbol as another
288 // tainted SymbolDerived and a region that's a sub-region of that
289 // tainted symbol, it's also tainted.
290 if (const TaintedSubRegions *Regs =
291 State->get<DerivedSymTaint>(SD->getParentSymbol())) {
292 const TypedValueRegion *R = SD->getRegion();
293 for (auto I : *Regs) {
294 // FIXME: The logic to identify tainted regions could be more
295 // complete. For example, this would not currently identify
296 // overlapping fields in a union as tainted. To identify this we can
297 // check for overlapping/nested byte offsets.
298 if (Kind == I.second && R->isSubRegionOf(I.first)) {
299 TaintedSymbols.push_back(SD->getParentSymbol());
300 if (returnFirstOnly && !TaintedSymbols.empty())
301 return TaintedSymbols; // return early if needed
302 }
303 }
304 }
305 }
306
307 // If memory region is tainted, data is also tainted.
308 if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
309 std::vector<SymbolRef> TaintedRegions =
310 getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
311 llvm::append_range(TaintedSymbols, TaintedRegions);
312 if (returnFirstOnly && !TaintedSymbols.empty())
313 return TaintedSymbols; // return early if needed
314 }
315
316 // If this is a SymbolCast from a tainted value, it's also tainted.
317 if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
318 std::vector<SymbolRef> TaintedCasts =
319 getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
320 llvm::append_range(TaintedSymbols, TaintedCasts);
321 if (returnFirstOnly && !TaintedSymbols.empty())
322 return TaintedSymbols; // return early if needed
323 }
324 }
325 return TaintedSymbols;
326}
#define V(N, I)
Definition: ASTContext.h:3341
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map type Name and registers the factory for such maps in the program state,...
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Stmt - This represents one statement.
Definition: Stmt.h:84
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1199
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1354
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:446
bool isSubRegionOf(const MemRegion *R) const override
Check if the region is a subregion of the given region.
Definition: MemRegion.cpp:132
Symbolic value.
Definition: SymExpr.h:30
llvm::iterator_range< symbol_iterator > symbols() const
Definition: SymExpr.h:87
virtual unsigned computeComplexity() const =0
Represents a cast expression.
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:780
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:535
While nonloc::CompoundVal covers a few simple use cases, nonloc::LazyCompoundVal is a more performant...
Definition: SVals.h:383
std::vector< SymbolRef > getTaintedSymbolsImpl(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind, bool returnFirstOnly)
Definition: Taint.cpp:194
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
Definition: Taint.cpp:46
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
Definition: Taint.cpp:170
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
Definition: Taint.cpp:148
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
Definition: Taint.cpp:99
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
ProgramStateRef addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind=TaintTagGeneric)
Create a new state in a which a sub-region of a given symbol is tainted.
Definition: Taint.cpp:125
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State)
Definition: Taint.cpp:42
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T