clang  7.0.0svn
PointerArithChecker.cpp
Go to the documentation of this file.
1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This files defines PointerArithChecker, a builtin checker that checks for
11 // pointer arithmetic on locations other than array elements.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/ExprCXX.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 enum class AllocKind {
28  SingleObject,
29  Array,
30  Unknown,
31  Reinterpreted // Single object interpreted as an array.
32 };
33 } // end namespace
34 
35 namespace llvm {
36 template <> struct FoldingSetTrait<AllocKind> {
37  static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38  ID.AddInteger(static_cast<int>(X));
39  }
40 };
41 } // end namespace llvm
42 
43 namespace {
44 class PointerArithChecker
45  : public Checker<
46  check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47  check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48  check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49  check::PostStmt<CallExpr>, check::DeadSymbols> {
50  AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51  const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52  AllocKind &AKind, CheckerContext &C) const;
53  const MemRegion *getPointedRegion(const MemRegion *Region,
54  CheckerContext &C) const;
55  void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56  bool PointedNeeded = false) const;
57  void initAllocIdentifiers(ASTContext &C) const;
58 
59  mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
60  mutable std::unique_ptr<BuiltinBug> BT_polyArray;
61  mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
62 
63 public:
64  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66  void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68  void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72 };
73 } // end namespace
74 
76 
77 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
78  CheckerContext &C) const {
79  // TODO: intentional leak. Some information is garbage collected too early,
80  // see http://reviews.llvm.org/D14203 for further information.
81  /*ProgramStateRef State = C.getState();
82  RegionStateTy RegionStates = State->get<RegionState>();
83  for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end();
84  I != E; ++I) {
85  if (!SR.isLiveRegion(I->first))
86  State = State->remove<RegionState>(I->first);
87  }
88  C.addTransition(State);*/
89 }
90 
91 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
92  const FunctionDecl *FD) const {
93  // This checker try not to assume anything about placement and overloaded
94  // new to avoid false positives.
95  if (isa<CXXMethodDecl>(FD))
96  return AllocKind::Unknown;
97  if (FD->getNumParams() != 1 || FD->isVariadic())
98  return AllocKind::Unknown;
99  if (NE->isArray())
100  return AllocKind::Array;
101 
102  return AllocKind::SingleObject;
103 }
104 
105 const MemRegion *
106 PointerArithChecker::getPointedRegion(const MemRegion *Region,
107  CheckerContext &C) const {
108  assert(Region);
110  SVal S = State->getSVal(Region);
111  return S.getAsRegion();
112 }
113 
114 /// Checks whether a region is the part of an array.
115 /// In case there is a dericed to base cast above the array element, the
116 /// Polymorphic output value is set to true. AKind output value is set to the
117 /// allocation kind of the inspected region.
118 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
119  bool &Polymorphic,
120  AllocKind &AKind,
121  CheckerContext &C) const {
122  assert(Region);
123  while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) {
124  Region = Region->getAs<CXXBaseObjectRegion>()->getSuperRegion();
125  Polymorphic = true;
126  }
127  if (Region->getKind() == MemRegion::Kind::ElementRegionKind) {
128  Region = Region->getAs<ElementRegion>()->getSuperRegion();
129  }
130 
132  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
133  AKind = *Kind;
134  if (*Kind == AllocKind::Array)
135  return Region;
136  else
137  return nullptr;
138  }
139  // When the region is symbolic and we do not have any information about it,
140  // assume that this is an array to avoid false positives.
141  if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
142  return Region;
143 
144  // No AllocKind stored and not symbolic, assume that it points to a single
145  // object.
146  return nullptr;
147 }
148 
149 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
150  CheckerContext &C,
151  bool PointedNeeded) const {
152  SourceRange SR = E->getSourceRange();
153  if (SR.isInvalid())
154  return;
155 
157  const MemRegion *Region = C.getSVal(E).getAsRegion();
158  if (!Region)
159  return;
160  if (PointedNeeded)
161  Region = getPointedRegion(Region, C);
162  if (!Region)
163  return;
164 
165  bool IsPolymorphic = false;
167  if (const MemRegion *ArrayRegion =
168  getArrayRegion(Region, IsPolymorphic, Kind, C)) {
169  if (!IsPolymorphic)
170  return;
172  if (!BT_polyArray)
173  BT_polyArray.reset(new BuiltinBug(
174  this, "Dangerous pointer arithmetic",
175  "Pointer arithmetic on a pointer to base class is dangerous "
176  "because derived and base class may have different size."));
177  auto R = llvm::make_unique<BugReport>(*BT_polyArray,
178  BT_polyArray->getDescription(), N);
179  R->addRange(E->getSourceRange());
180  R->markInteresting(ArrayRegion);
181  C.emitReport(std::move(R));
182  }
183  return;
184  }
185 
186  if (Kind == AllocKind::Reinterpreted)
187  return;
188 
189  // We might not have enough information about symbolic regions.
190  if (Kind != AllocKind::SingleObject &&
191  Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
192  return;
193 
195  if (!BT_pointerArith)
196  BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
197  "Pointer arithmetic on non-array "
198  "variables relies on memory layout, "
199  "which is dangerous."));
200  auto R = llvm::make_unique<BugReport>(*BT_pointerArith,
201  BT_pointerArith->getDescription(), N);
202  R->addRange(SR);
203  R->markInteresting(Region);
204  C.emitReport(std::move(R));
205  }
206 }
207 
208 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
209  if (!AllocFunctions.empty())
210  return;
211  AllocFunctions.insert(&C.Idents.get("alloca"));
212  AllocFunctions.insert(&C.Idents.get("malloc"));
213  AllocFunctions.insert(&C.Idents.get("realloc"));
214  AllocFunctions.insert(&C.Idents.get("calloc"));
215  AllocFunctions.insert(&C.Idents.get("valloc"));
216 }
217 
218 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
219  CheckerContext &C) const {
221  const FunctionDecl *FD = C.getCalleeDecl(CE);
222  if (!FD)
223  return;
224  IdentifierInfo *FunI = FD->getIdentifier();
225  initAllocIdentifiers(C.getASTContext());
226  if (AllocFunctions.count(FunI) == 0)
227  return;
228 
229  SVal SV = C.getSVal(CE);
230  const MemRegion *Region = SV.getAsRegion();
231  if (!Region)
232  return;
233  // Assume that C allocation functions allocate arrays to avoid false
234  // positives.
235  // TODO: Add heuristics to distinguish alloc calls that allocates single
236  // objecs.
237  State = State->set<RegionState>(Region, AllocKind::Array);
238  C.addTransition(State);
239 }
240 
241 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
242  CheckerContext &C) const {
243  const FunctionDecl *FD = NE->getOperatorNew();
244  if (!FD)
245  return;
246 
247  AllocKind Kind = getKindOfNewOp(NE, FD);
248 
250  SVal AllocedVal = C.getSVal(NE);
251  const MemRegion *Region = AllocedVal.getAsRegion();
252  if (!Region)
253  return;
254  State = State->set<RegionState>(Region, Kind);
255  C.addTransition(State);
256 }
257 
258 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
259  CheckerContext &C) const {
260  if (CE->getCastKind() != CastKind::CK_BitCast)
261  return;
262 
263  const Expr *CastedExpr = CE->getSubExpr();
265  SVal CastedVal = C.getSVal(CastedExpr);
266 
267  const MemRegion *Region = CastedVal.getAsRegion();
268  if (!Region)
269  return;
270 
271  // Suppress reinterpret casted hits.
272  State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
273  C.addTransition(State);
274 }
275 
276 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
277  CheckerContext &C) const {
278  if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
279  return;
280 
281  const Expr *CastedExpr = CE->getSubExpr();
283  SVal CastedVal = C.getSVal(CastedExpr);
284 
285  const MemRegion *Region = CastedVal.getAsRegion();
286  if (!Region)
287  return;
288 
289  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
290  if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
291  return;
292  }
293  State = State->set<RegionState>(Region, AllocKind::Array);
294  C.addTransition(State);
295 }
296 
297 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
298  CheckerContext &C) const {
299  if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
300  return;
301  reportPointerArithMisuse(UOp->getSubExpr(), C, true);
302 }
303 
304 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
305  CheckerContext &C) const {
306  SVal Idx = C.getSVal(SubsExpr->getIdx());
307 
308  // Indexing with 0 is OK.
309  if (Idx.isZeroConstant())
310  return;
311 
312  // Indexing vector-type expressions is also OK.
313  if (SubsExpr->getBase()->getType()->isVectorType())
314  return;
315  reportPointerArithMisuse(SubsExpr->getBase(), C);
316 }
317 
318 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
319  CheckerContext &C) const {
320  BinaryOperatorKind OpKind = BOp->getOpcode();
321  if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
322  return;
323 
324  const Expr *Lhs = BOp->getLHS();
325  const Expr *Rhs = BOp->getRHS();
327 
328  if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
329  SVal RHSVal = C.getSVal(Rhs);
330  if (State->isNull(RHSVal).isConstrainedTrue())
331  return;
332  reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
333  }
334  // The int += ptr; case is not valid C++.
335  if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
336  SVal LHSVal = C.getSVal(Lhs);
337  if (State->isNull(LHSVal).isConstrainedTrue())
338  return;
339  reportPointerArithMisuse(Rhs, C);
340  }
341 }
342 
343 void ento::registerPointerArithChecker(CheckerManager &mgr) {
344  mgr.registerChecker<PointerArithChecker>();
345 }
Represents a function declaration or definition.
Definition: Decl.h:1714
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:94
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
FunctionDecl * getOperatorNew() const
Definition: ExprCXX.h:1946
Opcode getOpcode() const
Definition: Expr.h:3110
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
Defines the clang::Expr interface and subclasses for C++ expressions.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition: Decl.h:269
One of these records is kept for each identifier that is lexed.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:150
LineState State
static void Profile(AllocKind X, FoldingSetNodeID &ID)
static bool isIncrementDecrementOp(Opcode Op)
Definition: Expr.h:1852
Expr * getSubExpr()
Definition: Expr.h:2841
IdentifierTable & Idents
Definition: ASTContext.h:539
BinaryOperatorKind
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:3069
CastExpr - Base class for type casts, including both implicit casts (ImplicitCastExpr) and explicit c...
Definition: Expr.h:2788
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
const RegionTy * getAs() const
Definition: MemRegion.h:1180
Expr - This represents one expression.
Definition: Expr.h:106
bool isVariadic() const
Whether this function is variadic.
Definition: Decl.cpp:2620
QualType getType() const
Definition: Expr.h:128
bool isInvalid() const
UnaryOperator - This represents the unary-expression&#39;s (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
Definition: Expr.h:1782
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
CHECKER * registerChecker(AT... Args)
Used to register checkers.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
Kind getKind() const
Definition: MemRegion.h:165
Kind
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Expr * getSubExpr() const
Definition: Expr.h:1809
CastKind getCastKind() const
Definition: Expr.h:2835
const MemRegion * getAsRegion() const
Definition: SVals.cpp:151
Represents a new-expression for memory allocation and constructor calls, e.g: "new CXXNewExpr(foo)"...
Definition: ExprCXX.h:1846
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
Definition: SVals.h:76
bool isArray() const
Definition: ExprCXX.h:1951
A class responsible for cleaning up unused symbols.
bool isVectorType() const
Definition: Type.h:6191
Expr * getLHS() const
Definition: Expr.h:3113
Dataflow Directional Tag Classes.
bool isZeroConstant() const
Definition: SVals.cpp:230
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2195
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6367
const ProgramStateRef & getState() const
static bool isAdditiveOp(Opcode Opc)
Definition: Expr.h:3145
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13719
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate.h) and friends (in DeclFriend.h).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:266
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2283
ElementRegin is used to represent both array elements and casts.
Definition: MemRegion.h:1076
Expr * getRHS() const
Definition: Expr.h:3115
bool isPointerType() const
Definition: Type.h:6106
A trivial tuple used to represent a source range.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:2971