clang 19.0.0git
Taint.cpp
Go to the documentation of this file.
1//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines basic, non-domain-specific mechanisms for tracking tainted values.
10//
11//===----------------------------------------------------------------------===//
12
16#include <optional>
17
18using namespace clang;
19using namespace ento;
20using namespace taint;
21
22// Fully tainted symbols.
24
25// Partially tainted symbols.
28REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
29
30void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
31 const char *Sep) {
32 TaintMapTy TM = State->get<TaintMap>();
33
34 if (!TM.isEmpty())
35 Out << "Tainted symbols:" << NL;
36
37 for (const auto &I : TM)
38 Out << I.first << " : " << I.second << NL;
39}
40
42 printTaint(State, llvm::errs());
43}
44
46 const LocationContext *LCtx,
47 TaintTagType Kind) {
48 return addTaint(State, State->getSVal(S, LCtx), Kind);
49}
50
52 TaintTagType Kind) {
53 SymbolRef Sym = V.getAsSymbol();
54 if (Sym)
55 return addTaint(State, Sym, Kind);
56
57 // If the SVal represents a structure, try to mass-taint all values within the
58 // structure. For now it only works efficiently on lazy compound values that
59 // were conjured during a conservative evaluation of a function - either as
60 // return values of functions that return structures or arrays by value, or as
61 // values of structures or arrays passed into the function by reference,
62 // directly or through pointer aliasing. Such lazy compound values are
63 // characterized by having exactly one binding in their captured store within
64 // their parent region, which is a conjured symbol default-bound to the base
65 // region of the parent region.
66 if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
67 if (std::optional<SVal> binding =
68 State->getStateManager().getStoreManager().getDefaultBinding(
69 *LCV)) {
70 if (SymbolRef Sym = binding->getAsSymbol())
71 return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
72 }
73 }
74
75 const MemRegion *R = V.getAsRegion();
76 return addTaint(State, R, Kind);
77}
78
80 TaintTagType Kind) {
81 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
82 return addTaint(State, SR->getSymbol(), Kind);
83 return State;
84}
85
87 TaintTagType Kind) {
88 // If this is a symbol cast, remove the cast before adding the taint. Taint
89 // is cast agnostic.
90 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
91 Sym = SC->getOperand();
92
93 ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
94 assert(NewState);
95 return NewState;
96}
97
99 SymbolRef Sym = V.getAsSymbol();
100 if (Sym)
101 return removeTaint(State, Sym);
102
103 const MemRegion *R = V.getAsRegion();
104 return removeTaint(State, R);
105}
106
108 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
109 return removeTaint(State, SR->getSymbol());
110 return State;
111}
112
114 // If this is a symbol cast, remove the cast before adding the taint. Taint
115 // is cast agnostic.
116 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
117 Sym = SC->getOperand();
118
119 ProgramStateRef NewState = State->remove<TaintMap>(Sym);
120 assert(NewState);
121 return NewState;
122}
123
125 SymbolRef ParentSym,
126 const SubRegion *SubRegion,
127 TaintTagType Kind) {
128 // Ignore partial taint if the entire parent symbol is already tainted.
129 if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
130 if (*T == Kind)
131 return State;
132
133 // Partial taint applies if only a portion of the symbol is tainted.
135 return addTaint(State, ParentSym, Kind);
136
137 const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
138 TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
139 TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
140
141 Regs = F.add(Regs, SubRegion, Kind);
142 ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
143 assert(NewState);
144 return NewState;
145}
146
148 const LocationContext *LCtx, TaintTagType Kind) {
149 return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
150 .empty();
151}
152
154 return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
155 .empty();
156}
157
159 TaintTagType K) {
160 return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
161 .empty();
162}
163
165 return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
166 .empty();
167}
168
169std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
170 const Stmt *S,
171 const LocationContext *LCtx,
172 TaintTagType Kind) {
173 return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
174}
175
176std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
177 TaintTagType Kind) {
178 return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
179}
180
181std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
182 SymbolRef Sym,
183 TaintTagType Kind) {
184 return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
185}
186
187std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
188 const MemRegion *Reg,
189 TaintTagType Kind) {
190 return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
191}
192
194 const Stmt *S,
195 const LocationContext *LCtx,
196 TaintTagType Kind,
197 bool returnFirstOnly) {
198 SVal val = State->getSVal(S, LCtx);
199 return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
200}
201
203 SVal V, TaintTagType Kind,
204 bool returnFirstOnly) {
205 if (SymbolRef Sym = V.getAsSymbol())
206 return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
207 if (const MemRegion *Reg = V.getAsRegion())
208 return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
209 return {};
210}
211
213 const MemRegion *Reg,
214 TaintTagType K,
215 bool returnFirstOnly) {
216 std::vector<SymbolRef> TaintedSymbols;
217 if (!Reg)
218 return TaintedSymbols;
219 // Element region (array element) is tainted if either the base or the offset
220 // are tainted.
221 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
222 std::vector<SymbolRef> TaintedIndex =
223 getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
224 llvm::append_range(TaintedSymbols, TaintedIndex);
225 if (returnFirstOnly && !TaintedSymbols.empty())
226 return TaintedSymbols; // return early if needed
227 std::vector<SymbolRef> TaintedSuperRegion =
228 getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
229 llvm::append_range(TaintedSymbols, TaintedSuperRegion);
230 if (returnFirstOnly && !TaintedSymbols.empty())
231 return TaintedSymbols; // return early if needed
232 }
233
234 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
235 std::vector<SymbolRef> TaintedRegions =
236 getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
237 llvm::append_range(TaintedSymbols, TaintedRegions);
238 if (returnFirstOnly && !TaintedSymbols.empty())
239 return TaintedSymbols; // return early if needed
240 }
241
242 if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
243 std::vector<SymbolRef> TaintedSubRegions =
244 getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
245 llvm::append_range(TaintedSymbols, TaintedSubRegions);
246 if (returnFirstOnly && !TaintedSymbols.empty())
247 return TaintedSymbols; // return early if needed
248 }
249
250 return TaintedSymbols;
251}
252
254 SymbolRef Sym,
255 TaintTagType Kind,
256 bool returnFirstOnly) {
257 std::vector<SymbolRef> TaintedSymbols;
258 if (!Sym)
259 return TaintedSymbols;
260
261 // Traverse all the symbols this symbol depends on to see if any are tainted.
262 for (SymbolRef SubSym : Sym->symbols()) {
263 if (!isa<SymbolData>(SubSym))
264 continue;
265
266 if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
267 if (*Tag == Kind) {
268 TaintedSymbols.push_back(SubSym);
269 if (returnFirstOnly)
270 return TaintedSymbols; // return early if needed
271 }
272 }
273
274 if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
275 // If this is a SymbolDerived with a tainted parent, it's also tainted.
276 std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
277 State, SD->getParentSymbol(), Kind, returnFirstOnly);
278 llvm::append_range(TaintedSymbols, TaintedParents);
279 if (returnFirstOnly && !TaintedSymbols.empty())
280 return TaintedSymbols; // return early if needed
281
282 // If this is a SymbolDerived with the same parent symbol as another
283 // tainted SymbolDerived and a region that's a sub-region of that
284 // tainted symbol, it's also tainted.
285 if (const TaintedSubRegions *Regs =
286 State->get<DerivedSymTaint>(SD->getParentSymbol())) {
287 const TypedValueRegion *R = SD->getRegion();
288 for (auto I : *Regs) {
289 // FIXME: The logic to identify tainted regions could be more
290 // complete. For example, this would not currently identify
291 // overlapping fields in a union as tainted. To identify this we can
292 // check for overlapping/nested byte offsets.
293 if (Kind == I.second && R->isSubRegionOf(I.first)) {
294 TaintedSymbols.push_back(SD->getParentSymbol());
295 if (returnFirstOnly && !TaintedSymbols.empty())
296 return TaintedSymbols; // return early if needed
297 }
298 }
299 }
300 }
301
302 // If memory region is tainted, data is also tainted.
303 if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
304 std::vector<SymbolRef> TaintedRegions =
305 getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
306 llvm::append_range(TaintedSymbols, TaintedRegions);
307 if (returnFirstOnly && !TaintedSymbols.empty())
308 return TaintedSymbols; // return early if needed
309 }
310
311 // If this is a SymbolCast from a tainted value, it's also tainted.
312 if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
313 std::vector<SymbolRef> TaintedCasts =
314 getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
315 llvm::append_range(TaintedSymbols, TaintedCasts);
316 if (returnFirstOnly && !TaintedSymbols.empty())
317 return TaintedSymbols; // return early if needed
318 }
319 }
320 return TaintedSymbols;
321}
#define V(N, I)
Definition: ASTContext.h:3273
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map type Name and registers the factory for such maps in the program state,...
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Stmt - This represents one statement.
Definition: Stmt.h:84
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1194
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:96
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1343
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:441
bool isSubRegionOf(const MemRegion *R) const override
Check if the region is a subregion of the given region.
Definition: MemRegion.cpp:132
Symbolic value.
Definition: SymExpr.h:30
llvm::iterator_range< symbol_iterator > symbols() const
Definition: SymExpr.h:87
Represents a cast expression.
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:775
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:530
std::vector< SymbolRef > getTaintedSymbolsImpl(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind, bool returnFirstOnly)
Definition: Taint.cpp:193
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
Definition: Taint.cpp:45
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
Definition: Taint.cpp:169
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
Definition: Taint.cpp:147
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
Definition: Taint.cpp:98
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
ProgramStateRef addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind=TaintTagGeneric)
Create a new state in a which a sub-region of a given symbol is tainted.
Definition: Taint.cpp:124
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State)
Definition: Taint.cpp:41
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T