clang  13.0.0git
ArrayBoundCheckerV2.cpp
Go to the documentation of this file.
1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Taint.h"
15 #include "clang/AST/CharUnits.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 using namespace taint;
30 
31 namespace {
32 class ArrayBoundCheckerV2 :
33  public Checker<check::Location> {
34  mutable std::unique_ptr<BuiltinBug> BT;
35 
36  enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
37 
38  void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
39  std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
40 
41 public:
42  void checkLocation(SVal l, bool isLoad, const Stmt*S,
43  CheckerContext &C) const;
44 };
45 
46 // FIXME: Eventually replace RegionRawOffset with this class.
47 class RegionRawOffsetV2 {
48 private:
49  const SubRegion *baseRegion;
50  SVal byteOffset;
51 
52  RegionRawOffsetV2()
53  : baseRegion(nullptr), byteOffset(UnknownVal()) {}
54 
55 public:
56  RegionRawOffsetV2(const SubRegion* base, SVal offset)
57  : baseRegion(base), byteOffset(offset) {}
58 
59  NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
60  const SubRegion *getRegion() const { return baseRegion; }
61 
62  static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
63  SValBuilder &svalBuilder,
64  SVal location);
65 
66  void dump() const;
67  void dumpToStream(raw_ostream &os) const;
68 };
69 }
70 
71 static SVal computeExtentBegin(SValBuilder &svalBuilder,
72  const MemRegion *region) {
73  const MemSpaceRegion *SR = region->getMemorySpace();
74  if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
75  return UnknownVal();
76  else
77  return svalBuilder.makeZeroArrayIndex();
78 }
79 
80 // TODO: once the constraint manager is smart enough to handle non simplified
81 // symbolic expressions remove this function. Note that this can not be used in
82 // the constraint manager as is, since this does not handle overflows. It is
83 // safe to assume, however, that memory offsets will not overflow.
84 static std::pair<NonLoc, nonloc::ConcreteInt>
85 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
86  SValBuilder &svalBuilder) {
87  Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
88  if (SymVal && SymVal->isExpression()) {
89  if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
90  llvm::APSInt constant =
91  APSIntType(extent.getValue()).convert(SIE->getRHS());
92  switch (SIE->getOpcode()) {
93  case BO_Mul:
94  // The constant should never be 0 here, since it the result of scaling
95  // based on the size of a type which is never 0.
96  if ((extent.getValue() % constant) != 0)
97  return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
98  else
99  return getSimplifiedOffsets(
100  nonloc::SymbolVal(SIE->getLHS()),
101  svalBuilder.makeIntVal(extent.getValue() / constant),
102  svalBuilder);
103  case BO_Add:
104  return getSimplifiedOffsets(
105  nonloc::SymbolVal(SIE->getLHS()),
106  svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
107  default:
108  break;
109  }
110  }
111  }
112 
113  return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
114 }
115 
116 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
117  const Stmt* LoadS,
118  CheckerContext &checkerContext) const {
119 
120  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
121  // some new logic here that reasons directly about memory region extents.
122  // Once that logic is more mature, we can bring it back to assumeInBound()
123  // for all clients to use.
124  //
125  // The algorithm we are using here for bounds checking is to see if the
126  // memory access is within the extent of the base region. Since we
127  // have some flexibility in defining the base region, we can achieve
128  // various levels of conservatism in our buffer overflow checking.
129  ProgramStateRef state = checkerContext.getState();
130 
131  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
132  const RegionRawOffsetV2 &rawOffset =
133  RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
134 
135  if (!rawOffset.getRegion())
136  return;
137 
138  NonLoc rawOffsetVal = rawOffset.getByteOffset();
139 
140  // CHECK LOWER BOUND: Is byteOffset < extent begin?
141  // If so, we are doing a load/store
142  // before the first valid offset in the memory region.
143 
144  SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
145 
146  if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
147  if (NV->getAs<nonloc::ConcreteInt>()) {
148  std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
149  getSimplifiedOffsets(rawOffset.getByteOffset(),
150  NV->castAs<nonloc::ConcreteInt>(),
151  svalBuilder);
152  rawOffsetVal = simplifiedOffsets.first;
153  *NV = simplifiedOffsets.second;
154  }
155 
156  SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV,
157  svalBuilder.getConditionType());
158 
159  Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
160  if (!lowerBoundToCheck)
161  return;
162 
163  ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
164  std::tie(state_precedesLowerBound, state_withinLowerBound) =
165  state->assume(*lowerBoundToCheck);
166 
167  // Are we constrained enough to definitely precede the lower bound?
168  if (state_precedesLowerBound && !state_withinLowerBound) {
169  reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
170  return;
171  }
172 
173  // Otherwise, assume the constraint of the lower bound.
174  assert(state_withinLowerBound);
175  state = state_withinLowerBound;
176  }
177 
178  do {
179  // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so,
180  // we are doing a load/store after the last valid offset.
181  const MemRegion *MR = rawOffset.getRegion();
182  DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder);
183  if (!Size.getAs<NonLoc>())
184  break;
185 
186  if (Size.getAs<nonloc::ConcreteInt>()) {
187  std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
188  getSimplifiedOffsets(rawOffset.getByteOffset(),
189  Size.castAs<nonloc::ConcreteInt>(), svalBuilder);
190  rawOffsetVal = simplifiedOffsets.first;
191  Size = simplifiedOffsets.second;
192  }
193 
194  SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal,
195  Size.castAs<NonLoc>(),
196  svalBuilder.getConditionType());
197 
198  Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
199  if (!upperboundToCheck)
200  break;
201 
202  ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
203  std::tie(state_exceedsUpperBound, state_withinUpperBound) =
204  state->assume(*upperboundToCheck);
205 
206  // If we are under constrained and the index variables are tainted, report.
207  if (state_exceedsUpperBound && state_withinUpperBound) {
208  SVal ByteOffset = rawOffset.getByteOffset();
209  if (isTainted(state, ByteOffset)) {
210  reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
211  std::make_unique<TaintBugVisitor>(ByteOffset));
212  return;
213  }
214  } else if (state_exceedsUpperBound) {
215  // If we are constrained enough to definitely exceed the upper bound,
216  // report.
217  assert(!state_withinUpperBound);
218  reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
219  return;
220  }
221 
222  assert(state_withinUpperBound);
223  state = state_withinUpperBound;
224  }
225  while (false);
226 
227  checkerContext.addTransition(state);
228 }
229 
230 void ArrayBoundCheckerV2::reportOOB(
231  CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
232  std::unique_ptr<BugReporterVisitor> Visitor) const {
233 
234  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
235  if (!errorNode)
236  return;
237 
238  if (!BT)
239  BT.reset(new BuiltinBug(this, "Out-of-bound access"));
240 
241  // FIXME: This diagnostics are preliminary. We should get far better
242  // diagnostics for explaining buffer overruns.
243 
244  SmallString<256> buf;
245  llvm::raw_svector_ostream os(buf);
246  os << "Out of bound memory access ";
247  switch (kind) {
248  case OOB_Precedes:
249  os << "(accessed memory precedes memory block)";
250  break;
251  case OOB_Excedes:
252  os << "(access exceeds upper limit of memory block)";
253  break;
254  case OOB_Tainted:
255  os << "(index is tainted)";
256  break;
257  }
258 
259  auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
260  BR->addVisitor(std::move(Visitor));
261  checkerContext.emitReport(std::move(BR));
262 }
263 
264 #ifndef NDEBUG
265 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
266  dumpToStream(llvm::errs());
267 }
268 
269 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
270  os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
271 }
272 #endif
273 
274 // Lazily computes a value to be used by 'computeOffset'. If 'val'
275 // is unknown or undefined, we lazily substitute '0'. Otherwise,
276 // return 'val'.
277 static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
278  return val.getAs<UndefinedVal>() ? svalBuilder.makeArrayIndex(0) : val;
279 }
280 
281 // Scale a base value by a scaling factor, and return the scaled
282 // value as an SVal. Used by 'computeOffset'.
283 static inline SVal scaleValue(ProgramStateRef state,
284  NonLoc baseVal, CharUnits scaling,
285  SValBuilder &sb) {
286  return sb.evalBinOpNN(state, BO_Mul, baseVal,
287  sb.makeArrayIndex(scaling.getQuantity()),
288  sb.getArrayIndexType());
289 }
290 
291 // Add an SVal to another, treating unknown and undefined values as
292 // summing to UnknownVal. Used by 'computeOffset'.
293 static SVal addValue(ProgramStateRef state, SVal x, SVal y,
294  SValBuilder &svalBuilder) {
295  // We treat UnknownVals and UndefinedVals the same here because we
296  // only care about computing offsets.
297  if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
298  return UnknownVal();
299 
300  return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(),
301  y.castAs<NonLoc>(),
302  svalBuilder.getArrayIndexType());
303 }
304 
305 /// Compute a raw byte offset from a base region. Used for array bounds
306 /// checking.
307 RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
308  SValBuilder &svalBuilder,
309  SVal location)
310 {
311  const MemRegion *region = location.getAsRegion();
312  SVal offset = UndefinedVal();
313 
314  while (region) {
315  switch (region->getKind()) {
316  default: {
317  if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
318  offset = getValue(offset, svalBuilder);
319  if (!offset.isUnknownOrUndef())
320  return RegionRawOffsetV2(subReg, offset);
321  }
322  return RegionRawOffsetV2();
323  }
324  case MemRegion::ElementRegionKind: {
325  const ElementRegion *elemReg = cast<ElementRegion>(region);
326  SVal index = elemReg->getIndex();
327  if (!index.getAs<NonLoc>())
328  return RegionRawOffsetV2();
329  QualType elemType = elemReg->getElementType();
330  // If the element is an incomplete type, go no further.
331  ASTContext &astContext = svalBuilder.getContext();
332  if (elemType->isIncompleteType())
333  return RegionRawOffsetV2();
334 
335  // Update the offset.
336  offset = addValue(state,
337  getValue(offset, svalBuilder),
339  index.castAs<NonLoc>(),
340  astContext.getTypeSizeInChars(elemType),
341  svalBuilder),
342  svalBuilder);
343 
344  if (offset.isUnknownOrUndef())
345  return RegionRawOffsetV2();
346 
347  region = elemReg->getSuperRegion();
348  continue;
349  }
350  }
351  }
352  return RegionRawOffsetV2();
353 }
354 
355 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
356  mgr.registerChecker<ArrayBoundCheckerV2>();
357 }
358 
359 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
360  return true;
361 }
clang::ASTContext::getTypeSizeInChars
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
Definition: ASTContext.cpp:2411
DynamicExtent.h
clang::ento::ProgramStateRef
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Definition: ProgramState_Fwd.h:37
clang::QualType
A (possibly-)qualified type.
Definition: Type.h:661
llvm::Optional
Definition: LLVM.h:40
clang::ento::taint::isTainted
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
getValue
static SVal getValue(SVal val, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:277
computeExtentBegin
static SVal computeExtentBegin(SValBuilder &svalBuilder, const MemRegion *region)
Definition: ArrayBoundCheckerV2.cpp:71
scaleValue
static SVal scaleValue(ProgramStateRef state, NonLoc baseVal, CharUnits scaling, SValBuilder &sb)
Definition: ArrayBoundCheckerV2.cpp:283
APSInt
llvm::APSInt APSInt
Definition: ByteCodeEmitter.cpp:18
BuiltinCheckerRegistration.h
CheckerManager.h
clang::ASTContext
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
llvm::SmallString
Definition: LLVM.h:37
x
IRgen optimization opportunities The common pattern of short x
Definition: README.txt:7
state
and static some checkers Checker The latter are built on top of the former via the Checker and CheckerVisitor and attempts to isolate them from much of the gore of the internal analysis the analyzer is basically a source code simulator that traces out possible paths of execution The state of the and the combination of state and program point is a node in an exploded which has the entry program point and initial state
Definition: README.txt:30
CharUnits.h
clang::syntax::NodeRole::Size
@ Size
clang::ento::SymIntExpr
BinarySymExprImpl< const SymExpr *, const llvm::APSInt &, SymExpr::Kind::SymIntExprKind > SymIntExpr
Represents a symbolic expression like 'x' + 3.
Definition: SymbolManager.h:407
Taint.h
BugType.h
dump
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
Definition: CoverageMappingGen.cpp:1521
APSIntType.h
CheckerContext.h
clang::ento::getDynamicExtent
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
Checker.h
ExprEngine.h
clang
Dataflow Directional Tag Classes.
Definition: CalledOnceCheck.h:17
clang::Stmt
Stmt - This represents one statement.
Definition: Stmt.h:68
getSimplifiedOffsets
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:85
addValue
static SVal addValue(ProgramStateRef state, SVal x, SVal y, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:293
clang::CharUnits
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
clang::Type::isIncompleteType
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
Definition: Type.cpp:2207
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:60
clang::CharUnits::getQuantity
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179