clang  15.0.0git
Taint.cpp
Go to the documentation of this file.
1 //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Defines basic, non-domain-specific mechanisms for tracking tainted values.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 
17 using namespace clang;
18 using namespace ento;
19 using namespace taint;
20 
21 // Fully tainted symbols.
23 
24 // Partially tainted symbols.
25 REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
27 REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
28 
29 void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
30  const char *Sep) {
31  TaintMapTy TM = State->get<TaintMap>();
32 
33  if (!TM.isEmpty())
34  Out << "Tainted symbols:" << NL;
35 
36  for (const auto &I : TM)
37  Out << I.first << " : " << I.second << NL;
38 }
39 
41  printTaint(State, llvm::errs());
42 }
43 
45  const LocationContext *LCtx,
47  return addTaint(State, State->getSVal(S, LCtx), Kind);
48 }
49 
52  SymbolRef Sym = V.getAsSymbol();
53  if (Sym)
54  return addTaint(State, Sym, Kind);
55 
56  // If the SVal represents a structure, try to mass-taint all values within the
57  // structure. For now it only works efficiently on lazy compound values that
58  // were conjured during a conservative evaluation of a function - either as
59  // return values of functions that return structures or arrays by value, or as
60  // values of structures or arrays passed into the function by reference,
61  // directly or through pointer aliasing. Such lazy compound values are
62  // characterized by having exactly one binding in their captured store within
63  // their parent region, which is a conjured symbol default-bound to the base
64  // region of the parent region.
65  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
66  if (Optional<SVal> binding =
67  State->getStateManager().getStoreManager().getDefaultBinding(
68  *LCV)) {
69  if (SymbolRef Sym = binding->getAsSymbol())
70  return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
71  }
72  }
73 
74  const MemRegion *R = V.getAsRegion();
75  return addTaint(State, R, Kind);
76 }
77 
80  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
81  return addTaint(State, SR->getSymbol(), Kind);
82  return State;
83 }
84 
87  // If this is a symbol cast, remove the cast before adding the taint. Taint
88  // is cast agnostic.
89  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
90  Sym = SC->getOperand();
91 
92  ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
93  assert(NewState);
94  return NewState;
95 }
96 
98  SymbolRef Sym = V.getAsSymbol();
99  if (Sym)
100  return removeTaint(State, Sym);
101 
102  const MemRegion *R = V.getAsRegion();
103  return removeTaint(State, R);
104 }
105 
107  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
108  return removeTaint(State, SR->getSymbol());
109  return State;
110 }
111 
113  // If this is a symbol cast, remove the cast before adding the taint. Taint
114  // is cast agnostic.
115  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
116  Sym = SC->getOperand();
117 
118  ProgramStateRef NewState = State->remove<TaintMap>(Sym);
119  assert(NewState);
120  return NewState;
121 }
122 
124  SymbolRef ParentSym,
125  const SubRegion *SubRegion,
126  TaintTagType Kind) {
127  // Ignore partial taint if the entire parent symbol is already tainted.
128  if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
129  if (*T == Kind)
130  return State;
131 
132  // Partial taint applies if only a portion of the symbol is tainted.
133  if (SubRegion == SubRegion->getBaseRegion())
134  return addTaint(State, ParentSym, Kind);
135 
136  const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
137  TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
138  TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
139 
140  Regs = F.add(Regs, SubRegion, Kind);
141  ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
142  assert(NewState);
143  return NewState;
144 }
145 
147  const LocationContext *LCtx, TaintTagType Kind) {
148  SVal val = State->getSVal(S, LCtx);
149  return isTainted(State, val, Kind);
150 }
151 
153  if (SymbolRef Sym = V.getAsSymbol())
154  return isTainted(State, Sym, Kind);
155  if (const MemRegion *Reg = V.getAsRegion())
156  return isTainted(State, Reg, Kind);
157  return false;
158 }
159 
160 bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
161  TaintTagType K) {
162  if (!Reg)
163  return false;
164 
165  // Element region (array element) is tainted if either the base or the offset
166  // are tainted.
167  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
168  return isTainted(State, ER->getSuperRegion(), K) ||
169  isTainted(State, ER->getIndex(), K);
170 
171  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
172  return isTainted(State, SR->getSymbol(), K);
173 
174  if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
175  return isTainted(State, ER->getSuperRegion(), K);
176 
177  return false;
178 }
179 
181  if (!Sym)
182  return false;
183 
184  // Traverse all the symbols this symbol depends on to see if any are tainted.
185  for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
186  SE = Sym->symbol_end();
187  SI != SE; ++SI) {
188  if (!isa<SymbolData>(*SI))
189  continue;
190 
191  if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
192  if (*Tag == Kind)
193  return true;
194  }
195 
196  if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
197  // If this is a SymbolDerived with a tainted parent, it's also tainted.
198  if (isTainted(State, SD->getParentSymbol(), Kind))
199  return true;
200 
201  // If this is a SymbolDerived with the same parent symbol as another
202  // tainted SymbolDerived and a region that's a sub-region of that tainted
203  // symbol, it's also tainted.
204  if (const TaintedSubRegions *Regs =
205  State->get<DerivedSymTaint>(SD->getParentSymbol())) {
206  const TypedValueRegion *R = SD->getRegion();
207  for (auto I : *Regs) {
208  // FIXME: The logic to identify tainted regions could be more
209  // complete. For example, this would not currently identify
210  // overlapping fields in a union as tainted. To identify this we can
211  // check for overlapping/nested byte offsets.
212  if (Kind == I.second && R->isSubRegionOf(I.first))
213  return true;
214  }
215  }
216  }
217 
218  // If memory region is tainted, data is also tainted.
219  if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
220  if (isTainted(State, SRV->getRegion(), Kind))
221  return true;
222  }
223 
224  // If this is a SymbolCast from a tainted value, it's also tainted.
225  if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
226  if (isTainted(State, SC->getOperand(), Kind))
227  return true;
228  }
229  }
230 
231  return false;
232 }
233 
235  BugReporterContext &BRC,
237 
238  // Find the ExplodedNode where the taint was first introduced
239  if (!isTainted(N->getState(), V) ||
240  isTainted(N->getFirstPred()->getState(), V))
241  return nullptr;
242 
243  const Stmt *S = N->getStmtForDiagnostics();
244  if (!S)
245  return nullptr;
246 
247  const LocationContext *NCtx = N->getLocationContext();
250  if (!L.isValid() || !L.asLocation().isValid())
251  return nullptr;
252 
253  return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
254 }
clang::ento::PathDiagnosticLocation
Definition: PathDiagnostic.h:195
clang::ento::ExplodedNode::getLocationContext
const LocationContext * getLocationContext() const
Definition: ExplodedGraph.h:146
clang::LocationContext
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Definition: AnalysisDeclContext.h:215
clang::ento::PathDiagnosticPieceRef
std::shared_ptr< PathDiagnosticPiece > PathDiagnosticPieceRef
Definition: PathDiagnostic.h:492
clang::ento::taint::printTaint
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
clang::ento::taint::TaintBugVisitor::VisitNode
PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override
Return a diagnostic piece which should be associated with the given node.
Definition: Taint.cpp:234
clang::ento::ProgramStateRef
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Definition: ProgramState_Fwd.h:37
clang::ento::ExplodedNode
Definition: ExplodedGraph.h:65
clang::ento::SymbolRef
const SymExpr * SymbolRef
Definition: SymExpr.h:111
llvm::Optional
Definition: LLVM.h:40
clang::ento::taint::isTainted
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
REGISTER_MAP_WITH_PROGRAMSTATE
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Definition: ProgramStateTrait.h:87
V
#define V(N, I)
Definition: ASTContext.h:3176
clang::ento::ExplodedNode::getState
const ProgramStateRef & getState() const
Definition: ExplodedGraph.h:168
clang::ento::BugReporterContext::getSourceManager
const SourceManager & getSourceManager() const
Definition: BugReporter.h:721
BugReporter.h
clang::ento::taint::addTaint
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
clang::ento::PathDiagnosticLocation::isValid
bool isValid() const
Definition: PathDiagnostic.h:335
clang::ento::BugReporterContext
Definition: BugReporter.h:701
clang::ento::taint::removeTaint
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, TaintTagType) void taint
Definition: Taint.cpp:25
clang::ento::taint::TaintTagType
unsigned TaintTagType
The type of taint, which helps to differentiate between different types of taint.
Definition: Taint.h:25
Taint.h
clang::ento::ExplodedNode::getFirstPred
ExplodedNode * getFirstPred()
Definition: ExplodedGraph.h:208
clang::ento::PathDiagnosticLocation::asLocation
FullSourceLoc asLocation() const
Definition: PathDiagnostic.h:339
clang::ento::taint::dumpTaint
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State)
State
LineState State
Definition: UnwrappedLineFormatter.cpp:1126
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::ento::PathSensitiveBugReport
Definition: BugReporter.h:289
clang
Definition: CalledOnceCheck.h:17
clang::Stmt
Stmt - This represents one statement.
Definition: Stmt.h:69
clang::SourceLocation::isValid
bool isValid() const
Return true if this is a valid SourceLocation object.
Definition: SourceLocation.h:110
clang::ento::PathDiagnosticLocation::createBegin
static PathDiagnosticLocation createBegin(const Decl *D, const SourceManager &SM)
Create a location for the beginning of the declaration.
Definition: PathDiagnostic.cpp:580
clang::ento::ExplodedNode::getStmtForDiagnostics
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
Definition: ExplodedGraph.cpp:320
ProgramStateTrait.h
clang::ento::taint::addPartialTaint
LLVM_NODISCARD ProgramStateRef addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind=TaintTagGeneric)
Create a new state in a which a sub-region of a given symbol is tainted.