clang  16.0.0git
ArrayBoundCheckerV2.cpp
Go to the documentation of this file.
1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/CharUnits.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 using namespace taint;
30 
31 namespace {
32 class ArrayBoundCheckerV2 :
33  public Checker<check::Location> {
34  mutable std::unique_ptr<BuiltinBug> BT;
35 
36  enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
37 
38  void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
39  std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
40 
41 public:
42  void checkLocation(SVal l, bool isLoad, const Stmt*S,
43  CheckerContext &C) const;
44 };
45 
46 // FIXME: Eventually replace RegionRawOffset with this class.
47 class RegionRawOffsetV2 {
48 private:
49  const SubRegion *baseRegion;
50  SVal byteOffset;
51 
52  RegionRawOffsetV2()
53  : baseRegion(nullptr), byteOffset(UnknownVal()) {}
54 
55 public:
56  RegionRawOffsetV2(const SubRegion* base, SVal offset)
57  : baseRegion(base), byteOffset(offset) {}
58 
59  NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
60  const SubRegion *getRegion() const { return baseRegion; }
61 
62  static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
63  SValBuilder &svalBuilder,
64  SVal location);
65 
66  void dump() const;
67  void dumpToStream(raw_ostream &os) const;
68 };
69 }
70 
71 static SVal computeExtentBegin(SValBuilder &svalBuilder,
72  const MemRegion *region) {
73  const MemSpaceRegion *SR = region->getMemorySpace();
74  if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
75  return UnknownVal();
76  else
77  return svalBuilder.makeZeroArrayIndex();
78 }
79 
80 // TODO: once the constraint manager is smart enough to handle non simplified
81 // symbolic expressions remove this function. Note that this can not be used in
82 // the constraint manager as is, since this does not handle overflows. It is
83 // safe to assume, however, that memory offsets will not overflow.
84 static std::pair<NonLoc, nonloc::ConcreteInt>
85 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
86  SValBuilder &svalBuilder) {
87  Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
88  if (SymVal && SymVal->isExpression()) {
89  if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
90  llvm::APSInt constant =
91  APSIntType(extent.getValue()).convert(SIE->getRHS());
92  switch (SIE->getOpcode()) {
93  case BO_Mul:
94  // The constant should never be 0 here, since it the result of scaling
95  // based on the size of a type which is never 0.
96  if ((extent.getValue() % constant) != 0)
97  return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
98  else
99  return getSimplifiedOffsets(
100  nonloc::SymbolVal(SIE->getLHS()),
101  svalBuilder.makeIntVal(extent.getValue() / constant),
102  svalBuilder);
103  case BO_Add:
104  return getSimplifiedOffsets(
105  nonloc::SymbolVal(SIE->getLHS()),
106  svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
107  default:
108  break;
109  }
110  }
111  }
112 
113  return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
114 }
115 
116 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
117  const Stmt* LoadS,
118  CheckerContext &checkerContext) const {
119 
120  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
121  // some new logic here that reasons directly about memory region extents.
122  // Once that logic is more mature, we can bring it back to assumeInBound()
123  // for all clients to use.
124  //
125  // The algorithm we are using here for bounds checking is to see if the
126  // memory access is within the extent of the base region. Since we
127  // have some flexibility in defining the base region, we can achieve
128  // various levels of conservatism in our buffer overflow checking.
129  ProgramStateRef state = checkerContext.getState();
130 
131  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
132  const RegionRawOffsetV2 &rawOffset =
133  RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
134 
135  if (!rawOffset.getRegion())
136  return;
137 
138  NonLoc rawOffsetVal = rawOffset.getByteOffset();
139 
140  // CHECK LOWER BOUND: Is byteOffset < extent begin?
141  // If so, we are doing a load/store
142  // before the first valid offset in the memory region.
143 
144  SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
145 
146  if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
147  if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) {
148  std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
149  getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV,
150  svalBuilder);
151  rawOffsetVal = simplifiedOffsets.first;
152  *NV = simplifiedOffsets.second;
153  }
154 
155  SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV,
156  svalBuilder.getConditionType());
157 
158  Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
159  if (!lowerBoundToCheck)
160  return;
161 
162  ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
163  std::tie(state_precedesLowerBound, state_withinLowerBound) =
164  state->assume(*lowerBoundToCheck);
165 
166  // Are we constrained enough to definitely precede the lower bound?
167  if (state_precedesLowerBound && !state_withinLowerBound) {
168  reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
169  return;
170  }
171 
172  // Otherwise, assume the constraint of the lower bound.
173  assert(state_withinLowerBound);
174  state = state_withinLowerBound;
175  }
176 
177  do {
178  // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so,
179  // we are doing a load/store after the last valid offset.
180  const MemRegion *MR = rawOffset.getRegion();
181  DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder);
182  if (!isa<NonLoc>(Size))
183  break;
184 
185  if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) {
186  std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
187  getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize,
188  svalBuilder);
189  rawOffsetVal = simplifiedOffsets.first;
190  Size = simplifiedOffsets.second;
191  }
192 
193  SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal,
194  Size.castAs<NonLoc>(),
195  svalBuilder.getConditionType());
196 
197  Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
198  if (!upperboundToCheck)
199  break;
200 
201  ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
202  std::tie(state_exceedsUpperBound, state_withinUpperBound) =
203  state->assume(*upperboundToCheck);
204 
205  // If we are under constrained and the index variables are tainted, report.
206  if (state_exceedsUpperBound && state_withinUpperBound) {
207  SVal ByteOffset = rawOffset.getByteOffset();
208  if (isTainted(state, ByteOffset)) {
209  reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
210  std::make_unique<TaintBugVisitor>(ByteOffset));
211  return;
212  }
213  } else if (state_exceedsUpperBound) {
214  // If we are constrained enough to definitely exceed the upper bound,
215  // report.
216  assert(!state_withinUpperBound);
217  reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
218  return;
219  }
220 
221  assert(state_withinUpperBound);
222  state = state_withinUpperBound;
223  }
224  while (false);
225 
226  checkerContext.addTransition(state);
227 }
228 
229 void ArrayBoundCheckerV2::reportOOB(
230  CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
231  std::unique_ptr<BugReporterVisitor> Visitor) const {
232 
233  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
234  if (!errorNode)
235  return;
236 
237  if (!BT)
238  BT.reset(new BuiltinBug(this, "Out-of-bound access"));
239 
240  // FIXME: This diagnostics are preliminary. We should get far better
241  // diagnostics for explaining buffer overruns.
242 
243  SmallString<256> buf;
244  llvm::raw_svector_ostream os(buf);
245  os << "Out of bound memory access ";
246  switch (kind) {
247  case OOB_Precedes:
248  os << "(accessed memory precedes memory block)";
249  break;
250  case OOB_Excedes:
251  os << "(access exceeds upper limit of memory block)";
252  break;
253  case OOB_Tainted:
254  os << "(index is tainted)";
255  break;
256  }
257 
258  auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
259  BR->addVisitor(std::move(Visitor));
260  checkerContext.emitReport(std::move(BR));
261 }
262 
263 #ifndef NDEBUG
264 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
265  dumpToStream(llvm::errs());
266 }
267 
268 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
269  os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
270 }
271 #endif
272 
273 // Lazily computes a value to be used by 'computeOffset'. If 'val'
274 // is unknown or undefined, we lazily substitute '0'. Otherwise,
275 // return 'val'.
276 static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
277  return val.isUndef() ? svalBuilder.makeZeroArrayIndex() : val;
278 }
279 
280 // Scale a base value by a scaling factor, and return the scaled
281 // value as an SVal. Used by 'computeOffset'.
282 static inline SVal scaleValue(ProgramStateRef state,
283  NonLoc baseVal, CharUnits scaling,
284  SValBuilder &sb) {
285  return sb.evalBinOpNN(state, BO_Mul, baseVal,
286  sb.makeArrayIndex(scaling.getQuantity()),
287  sb.getArrayIndexType());
288 }
289 
290 // Add an SVal to another, treating unknown and undefined values as
291 // summing to UnknownVal. Used by 'computeOffset'.
292 static SVal addValue(ProgramStateRef state, SVal x, SVal y,
293  SValBuilder &svalBuilder) {
294  // We treat UnknownVals and UndefinedVals the same here because we
295  // only care about computing offsets.
296  if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
297  return UnknownVal();
298 
299  return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(),
300  y.castAs<NonLoc>(),
301  svalBuilder.getArrayIndexType());
302 }
303 
304 /// Compute a raw byte offset from a base region. Used for array bounds
305 /// checking.
306 RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
307  SValBuilder &svalBuilder,
308  SVal location)
309 {
310  const MemRegion *region = location.getAsRegion();
311  SVal offset = UndefinedVal();
312 
313  while (region) {
314  switch (region->getKind()) {
315  default: {
316  if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
317  offset = getValue(offset, svalBuilder);
318  if (!offset.isUnknownOrUndef())
319  return RegionRawOffsetV2(subReg, offset);
320  }
321  return RegionRawOffsetV2();
322  }
323  case MemRegion::ElementRegionKind: {
324  const ElementRegion *elemReg = cast<ElementRegion>(region);
325  SVal index = elemReg->getIndex();
326  if (!isa<NonLoc>(index))
327  return RegionRawOffsetV2();
328  QualType elemType = elemReg->getElementType();
329  // If the element is an incomplete type, go no further.
330  ASTContext &astContext = svalBuilder.getContext();
331  if (elemType->isIncompleteType())
332  return RegionRawOffsetV2();
333 
334  // Update the offset.
335  offset = addValue(state,
336  getValue(offset, svalBuilder),
338  index.castAs<NonLoc>(),
339  astContext.getTypeSizeInChars(elemType),
340  svalBuilder),
341  svalBuilder);
342 
343  if (offset.isUnknownOrUndef())
344  return RegionRawOffsetV2();
345 
346  region = elemReg->getSuperRegion();
347  continue;
348  }
349  }
350  }
351  return RegionRawOffsetV2();
352 }
353 
354 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
355  mgr.registerChecker<ArrayBoundCheckerV2>();
356 }
357 
358 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
359  return true;
360 }
clang::ASTContext::getTypeSizeInChars
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
Definition: ASTContext.cpp:2515
DynamicExtent.h
clang::ento::ProgramStateRef
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Definition: ProgramState_Fwd.h:37
clang::QualType
A (possibly-)qualified type.
Definition: Type.h:737
llvm::Optional
Definition: LLVM.h:40
clang::ento::taint::isTainted
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
getValue
static SVal getValue(SVal val, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:276
computeExtentBegin
static SVal computeExtentBegin(SValBuilder &svalBuilder, const MemRegion *region)
Definition: ArrayBoundCheckerV2.cpp:71
scaleValue
static SVal scaleValue(ProgramStateRef state, NonLoc baseVal, CharUnits scaling, SValBuilder &sb)
Definition: ArrayBoundCheckerV2.cpp:282
APSInt
llvm::APSInt APSInt
Definition: ByteCodeEmitter.cpp:19
BuiltinCheckerRegistration.h
CheckerManager.h
clang::ASTContext
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:209
llvm::SmallString
Definition: LLVM.h:37
x
IRgen optimization opportunities The common pattern of short x
Definition: README.txt:7
state
and static some checkers Checker The latter are built on top of the former via the Checker and CheckerVisitor and attempts to isolate them from much of the gore of the internal analysis the analyzer is basically a source code simulator that traces out possible paths of execution The state of the and the combination of state and program point is a node in an exploded which has the entry program point and initial state
Definition: README.txt:30
CharUnits.h
clang::syntax::NodeRole::Size
@ Size
clang::ento::SymIntExpr
BinarySymExprImpl< const SymExpr *, const llvm::APSInt &, SymExpr::Kind::SymIntExprKind > SymIntExpr
Represents a symbolic expression like 'x' + 3.
Definition: SymbolManager.h:471
Taint.h
BugType.h
dump
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
Definition: CoverageMappingGen.cpp:1539
APSIntType.h
CheckerContext.h
clang::ento::getDynamicExtent
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
Checker.h
ExprEngine.h
clang
Definition: CalledOnceCheck.h:17
clang::Stmt
Stmt - This represents one statement.
Definition: Stmt.h:71
getSimplifiedOffsets
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:85
addValue
static SVal addValue(ProgramStateRef state, SVal x, SVal y, SValBuilder &svalBuilder)
Definition: ArrayBoundCheckerV2.cpp:292
clang::CharUnits
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
clang::Type::isIncompleteType
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
Definition: Type.cpp:2235
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:62
clang::CharUnits::getQuantity
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179