clang 20.0.0git
Taint.cpp
Go to the documentation of this file.
1//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines basic, non-domain-specific mechanisms for tracking tainted values.
10//
11//===----------------------------------------------------------------------===//
12
16#include <optional>
17
18using namespace clang;
19using namespace ento;
20using namespace taint;
21
22// Fully tainted symbols.
24
25// Partially tainted symbols.
28REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
29
30void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
31 const char *Sep) {
32 TaintMapTy TM = State->get<TaintMap>();
33
34 if (!TM.isEmpty())
35 Out << "Tainted symbols:" << NL;
36
37 for (const auto &I : TM)
38 Out << I.first << " : " << I.second << NL;
39}
40
42 printTaint(State, llvm::errs());
43}
44
46 const LocationContext *LCtx,
47 TaintTagType Kind) {
48 return addTaint(State, State->getSVal(S, LCtx), Kind);
49}
50
52 TaintTagType Kind) {
53 SymbolRef Sym = V.getAsSymbol();
54 if (Sym)
55 return addTaint(State, Sym, Kind);
56
57 // If the SVal represents a structure, try to mass-taint all values within the
58 // structure. For now it only works efficiently on lazy compound values that
59 // were conjured during a conservative evaluation of a function - either as
60 // return values of functions that return structures or arrays by value, or as
61 // values of structures or arrays passed into the function by reference,
62 // directly or through pointer aliasing. Such lazy compound values are
63 // characterized by having exactly one binding in their captured store within
64 // their parent region, which is a conjured symbol default-bound to the base
65 // region of the parent region.
66 if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
67 if (std::optional<SVal> binding =
68 State->getStateManager().getStoreManager().getDefaultBinding(
69 *LCV)) {
70 if (SymbolRef Sym = binding->getAsSymbol())
71 return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
72 }
73 }
74
75 const MemRegion *R = V.getAsRegion();
76 return addTaint(State, R, Kind);
77}
78
80 TaintTagType Kind) {
81 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
82 return addTaint(State, SR->getSymbol(), Kind);
83 return State;
84}
85
87 TaintTagType Kind) {
88 // If this is a symbol cast, remove the cast before adding the taint. Taint
89 // is cast agnostic.
90 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
91 Sym = SC->getOperand();
92
93 ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
94 assert(NewState);
95 return NewState;
96}
97
99 SymbolRef Sym = V.getAsSymbol();
100 if (Sym)
101 return removeTaint(State, Sym);
102
103 const MemRegion *R = V.getAsRegion();
104 return removeTaint(State, R);
105}
106
108 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
109 return removeTaint(State, SR->getSymbol());
110 return State;
111}
112
114 // If this is a symbol cast, remove the cast before adding the taint. Taint
115 // is cast agnostic.
116 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
117 Sym = SC->getOperand();
118
119 ProgramStateRef NewState = State->remove<TaintMap>(Sym);
120 assert(NewState);
121 return NewState;
122}
123
125 SymbolRef ParentSym,
126 const SubRegion *SubRegion,
127 TaintTagType Kind) {
128 // Ignore partial taint if the entire parent symbol is already tainted.
129 if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
130 if (*T == Kind)
131 return State;
132
133 // Partial taint applies if only a portion of the symbol is tainted.
135 return addTaint(State, ParentSym, Kind);
136
137 const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
138 TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
139 TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
140
141 Regs = F.add(Regs, SubRegion, Kind);
142 ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
143 assert(NewState);
144 return NewState;
145}
146
148 const LocationContext *LCtx, TaintTagType Kind) {
149 return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
150 .empty();
151}
152
154 return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
155 .empty();
156}
157
159 TaintTagType K) {
160 return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
161 .empty();
162}
163
165 return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
166 .empty();
167}
168
169std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
170 const Stmt *S,
171 const LocationContext *LCtx,
172 TaintTagType Kind) {
173 return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
174}
175
176std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
177 TaintTagType Kind) {
178 return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
179}
180
181std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
182 SymbolRef Sym,
183 TaintTagType Kind) {
184 return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
185}
186
187std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
188 const MemRegion *Reg,
189 TaintTagType Kind) {
190 return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
191}
192
194 const Stmt *S,
195 const LocationContext *LCtx,
196 TaintTagType Kind,
197 bool returnFirstOnly) {
198 SVal val = State->getSVal(S, LCtx);
199 return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
200}
201
203 SVal V, TaintTagType Kind,
204 bool returnFirstOnly) {
205 if (SymbolRef Sym = V.getAsSymbol())
206 return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
207 if (const MemRegion *Reg = V.getAsRegion())
208 return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
209 return {};
210}
211
213 const MemRegion *Reg,
214 TaintTagType K,
215 bool returnFirstOnly) {
216 std::vector<SymbolRef> TaintedSymbols;
217 if (!Reg)
218 return TaintedSymbols;
219
220 // Element region (array element) is tainted if the offset is tainted.
221 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
222 std::vector<SymbolRef> TaintedIndex =
223 getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
224 llvm::append_range(TaintedSymbols, TaintedIndex);
225 if (returnFirstOnly && !TaintedSymbols.empty())
226 return TaintedSymbols; // return early if needed
227 }
228
229 // Symbolic region is tainted if the corresponding symbol is tainted.
230 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
231 std::vector<SymbolRef> TaintedRegions =
232 getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
233 llvm::append_range(TaintedSymbols, TaintedRegions);
234 if (returnFirstOnly && !TaintedSymbols.empty())
235 return TaintedSymbols; // return early if needed
236 }
237
238 // Any subregion (including Element and Symbolic regions) is tainted if its
239 // super-region is tainted.
240 if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
241 std::vector<SymbolRef> TaintedSubRegions =
242 getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
243 llvm::append_range(TaintedSymbols, TaintedSubRegions);
244 if (returnFirstOnly && !TaintedSymbols.empty())
245 return TaintedSymbols; // return early if needed
246 }
247
248 return TaintedSymbols;
249}
250
252 SymbolRef Sym,
253 TaintTagType Kind,
254 bool returnFirstOnly) {
255 std::vector<SymbolRef> TaintedSymbols;
256 if (!Sym)
257 return TaintedSymbols;
258
259 // Traverse all the symbols this symbol depends on to see if any are tainted.
260 for (SymbolRef SubSym : Sym->symbols()) {
261 if (!isa<SymbolData>(SubSym))
262 continue;
263
264 if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
265 if (*Tag == Kind) {
266 TaintedSymbols.push_back(SubSym);
267 if (returnFirstOnly)
268 return TaintedSymbols; // return early if needed
269 }
270 }
271
272 if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
273 // If this is a SymbolDerived with a tainted parent, it's also tainted.
274 std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
275 State, SD->getParentSymbol(), Kind, returnFirstOnly);
276 llvm::append_range(TaintedSymbols, TaintedParents);
277 if (returnFirstOnly && !TaintedSymbols.empty())
278 return TaintedSymbols; // return early if needed
279
280 // If this is a SymbolDerived with the same parent symbol as another
281 // tainted SymbolDerived and a region that's a sub-region of that
282 // tainted symbol, it's also tainted.
283 if (const TaintedSubRegions *Regs =
284 State->get<DerivedSymTaint>(SD->getParentSymbol())) {
285 const TypedValueRegion *R = SD->getRegion();
286 for (auto I : *Regs) {
287 // FIXME: The logic to identify tainted regions could be more
288 // complete. For example, this would not currently identify
289 // overlapping fields in a union as tainted. To identify this we can
290 // check for overlapping/nested byte offsets.
291 if (Kind == I.second && R->isSubRegionOf(I.first)) {
292 TaintedSymbols.push_back(SD->getParentSymbol());
293 if (returnFirstOnly && !TaintedSymbols.empty())
294 return TaintedSymbols; // return early if needed
295 }
296 }
297 }
298 }
299
300 // If memory region is tainted, data is also tainted.
301 if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
302 std::vector<SymbolRef> TaintedRegions =
303 getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
304 llvm::append_range(TaintedSymbols, TaintedRegions);
305 if (returnFirstOnly && !TaintedSymbols.empty())
306 return TaintedSymbols; // return early if needed
307 }
308
309 // If this is a SymbolCast from a tainted value, it's also tainted.
310 if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
311 std::vector<SymbolRef> TaintedCasts =
312 getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
313 llvm::append_range(TaintedSymbols, TaintedCasts);
314 if (returnFirstOnly && !TaintedSymbols.empty())
315 return TaintedSymbols; // return early if needed
316 }
317 }
318 return TaintedSymbols;
319}
#define V(N, I)
Definition: ASTContext.h:3341
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map type Name and registers the factory for such maps in the program state,...
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Stmt - This represents one statement.
Definition: Stmt.h:84
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1199
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1354
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:446
bool isSubRegionOf(const MemRegion *R) const override
Check if the region is a subregion of the given region.
Definition: MemRegion.cpp:132
Symbolic value.
Definition: SymExpr.h:30
llvm::iterator_range< symbol_iterator > symbols() const
Definition: SymExpr.h:87
Represents a cast expression.
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:780
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:535
While nonloc::CompoundVal covers a few simple use cases, nonloc::LazyCompoundVal is a more performant...
Definition: SVals.h:383
std::vector< SymbolRef > getTaintedSymbolsImpl(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind, bool returnFirstOnly)
Definition: Taint.cpp:193
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
Definition: Taint.cpp:45
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
Definition: Taint.cpp:169
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
Definition: Taint.cpp:147
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
Definition: Taint.cpp:98
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
ProgramStateRef addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind=TaintTagGeneric)
Create a new state in a which a sub-region of a given symbol is tainted.
Definition: Taint.cpp:124
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State)
Definition: Taint.cpp:41
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T