clang  6.0.0svn
PointerArithChecker.cpp
Go to the documentation of this file.
1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This files defines PointerArithChecker, a builtin checker that checks for
11 // pointer arithmetic on locations other than array elements.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/ExprCXX.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 enum class AllocKind {
28  SingleObject,
29  Array,
30  Unknown,
31  Reinterpreted // Single object interpreted as an array.
32 };
33 } // end namespace
34 
35 namespace llvm {
36 template <> struct FoldingSetTrait<AllocKind> {
37  static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38  ID.AddInteger(static_cast<int>(X));
39  }
40 };
41 } // end namespace llvm
42 
43 namespace {
44 class PointerArithChecker
45  : public Checker<
46  check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47  check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48  check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49  check::PostStmt<CallExpr>, check::DeadSymbols> {
50  AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51  const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52  AllocKind &AKind, CheckerContext &C) const;
53  const MemRegion *getPointedRegion(const MemRegion *Region,
54  CheckerContext &C) const;
55  void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56  bool PointedNeeded = false) const;
57  void initAllocIdentifiers(ASTContext &C) const;
58 
59  mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
60  mutable std::unique_ptr<BuiltinBug> BT_polyArray;
61  mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
62 
63 public:
64  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66  void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68  void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72 };
73 } // end namespace
74 
76 
77 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
78  CheckerContext &C) const {
79  // TODO: intentional leak. Some information is garbage collected too early,
80  // see http://reviews.llvm.org/D14203 for further information.
81  /*ProgramStateRef State = C.getState();
82  RegionStateTy RegionStates = State->get<RegionState>();
83  for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end();
84  I != E; ++I) {
85  if (!SR.isLiveRegion(I->first))
86  State = State->remove<RegionState>(I->first);
87  }
88  C.addTransition(State);*/
89 }
90 
91 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
92  const FunctionDecl *FD) const {
93  // This checker try not to assume anything about placement and overloaded
94  // new to avoid false positives.
95  if (isa<CXXMethodDecl>(FD))
96  return AllocKind::Unknown;
97  if (FD->getNumParams() != 1 || FD->isVariadic())
98  return AllocKind::Unknown;
99  if (NE->isArray())
100  return AllocKind::Array;
101 
102  return AllocKind::SingleObject;
103 }
104 
105 const MemRegion *
106 PointerArithChecker::getPointedRegion(const MemRegion *Region,
107  CheckerContext &C) const {
108  assert(Region);
110  SVal S = State->getSVal(Region);
111  return S.getAsRegion();
112 }
113 
114 /// Checks whether a region is the part of an array.
115 /// In case there is a dericed to base cast above the array element, the
116 /// Polymorphic output value is set to true. AKind output value is set to the
117 /// allocation kind of the inspected region.
118 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
119  bool &Polymorphic,
120  AllocKind &AKind,
121  CheckerContext &C) const {
122  assert(Region);
123  while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) {
124  Region = Region->getAs<CXXBaseObjectRegion>()->getSuperRegion();
125  Polymorphic = true;
126  }
127  if (Region->getKind() == MemRegion::Kind::ElementRegionKind) {
128  Region = Region->getAs<ElementRegion>()->getSuperRegion();
129  }
130 
132  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
133  AKind = *Kind;
134  if (*Kind == AllocKind::Array)
135  return Region;
136  else
137  return nullptr;
138  }
139  // When the region is symbolic and we do not have any information about it,
140  // assume that this is an array to avoid false positives.
141  if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
142  return Region;
143 
144  // No AllocKind stored and not symbolic, assume that it points to a single
145  // object.
146  return nullptr;
147 }
148 
149 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
150  CheckerContext &C,
151  bool PointedNeeded) const {
152  SourceRange SR = E->getSourceRange();
153  if (SR.isInvalid())
154  return;
155 
157  const MemRegion *Region =
158  State->getSVal(E, C.getLocationContext()).getAsRegion();
159  if (!Region)
160  return;
161  if (PointedNeeded)
162  Region = getPointedRegion(Region, C);
163  if (!Region)
164  return;
165 
166  bool IsPolymorphic = false;
168  if (const MemRegion *ArrayRegion =
169  getArrayRegion(Region, IsPolymorphic, Kind, C)) {
170  if (!IsPolymorphic)
171  return;
173  if (!BT_polyArray)
174  BT_polyArray.reset(new BuiltinBug(
175  this, "Dangerous pointer arithmetic",
176  "Pointer arithmetic on a pointer to base class is dangerous "
177  "because derived and base class may have different size."));
178  auto R = llvm::make_unique<BugReport>(*BT_polyArray,
179  BT_polyArray->getDescription(), N);
180  R->addRange(E->getSourceRange());
181  R->markInteresting(ArrayRegion);
182  C.emitReport(std::move(R));
183  }
184  return;
185  }
186 
187  if (Kind == AllocKind::Reinterpreted)
188  return;
189 
190  // We might not have enough information about symbolic regions.
191  if (Kind != AllocKind::SingleObject &&
192  Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
193  return;
194 
196  if (!BT_pointerArith)
197  BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
198  "Pointer arithmetic on non-array "
199  "variables relies on memory layout, "
200  "which is dangerous."));
201  auto R = llvm::make_unique<BugReport>(*BT_pointerArith,
202  BT_pointerArith->getDescription(), N);
203  R->addRange(SR);
204  R->markInteresting(Region);
205  C.emitReport(std::move(R));
206  }
207 }
208 
209 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
210  if (!AllocFunctions.empty())
211  return;
212  AllocFunctions.insert(&C.Idents.get("alloca"));
213  AllocFunctions.insert(&C.Idents.get("malloc"));
214  AllocFunctions.insert(&C.Idents.get("realloc"));
215  AllocFunctions.insert(&C.Idents.get("calloc"));
216  AllocFunctions.insert(&C.Idents.get("valloc"));
217 }
218 
219 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
220  CheckerContext &C) const {
222  const FunctionDecl *FD = C.getCalleeDecl(CE);
223  if (!FD)
224  return;
225  IdentifierInfo *FunI = FD->getIdentifier();
226  initAllocIdentifiers(C.getASTContext());
227  if (AllocFunctions.count(FunI) == 0)
228  return;
229 
230  SVal SV = State->getSVal(CE, C.getLocationContext());
231  const MemRegion *Region = SV.getAsRegion();
232  if (!Region)
233  return;
234  // Assume that C allocation functions allocate arrays to avoid false
235  // positives.
236  // TODO: Add heuristics to distinguish alloc calls that allocates single
237  // objecs.
238  State = State->set<RegionState>(Region, AllocKind::Array);
239  C.addTransition(State);
240 }
241 
242 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
243  CheckerContext &C) const {
244  const FunctionDecl *FD = NE->getOperatorNew();
245  if (!FD)
246  return;
247 
248  AllocKind Kind = getKindOfNewOp(NE, FD);
249 
251  SVal AllocedVal = State->getSVal(NE, C.getLocationContext());
252  const MemRegion *Region = AllocedVal.getAsRegion();
253  if (!Region)
254  return;
255  State = State->set<RegionState>(Region, Kind);
256  C.addTransition(State);
257 }
258 
259 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
260  CheckerContext &C) const {
261  if (CE->getCastKind() != CastKind::CK_BitCast)
262  return;
263 
264  const Expr *CastedExpr = CE->getSubExpr();
266  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
267 
268  const MemRegion *Region = CastedVal.getAsRegion();
269  if (!Region)
270  return;
271 
272  // Suppress reinterpret casted hits.
273  State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
274  C.addTransition(State);
275 }
276 
277 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
278  CheckerContext &C) const {
279  if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
280  return;
281 
282  const Expr *CastedExpr = CE->getSubExpr();
284  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
285 
286  const MemRegion *Region = CastedVal.getAsRegion();
287  if (!Region)
288  return;
289 
290  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
291  if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
292  return;
293  }
294  State = State->set<RegionState>(Region, AllocKind::Array);
295  C.addTransition(State);
296 }
297 
298 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
299  CheckerContext &C) const {
300  if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
301  return;
302  reportPointerArithMisuse(UOp->getSubExpr(), C, true);
303 }
304 
305 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
306  CheckerContext &C) const {
308  SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext());
309 
310  // Indexing with 0 is OK.
311  if (Idx.isZeroConstant())
312  return;
313  reportPointerArithMisuse(SubsExpr->getBase(), C);
314 }
315 
316 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
317  CheckerContext &C) const {
318  BinaryOperatorKind OpKind = BOp->getOpcode();
319  if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
320  return;
321 
322  const Expr *Lhs = BOp->getLHS();
323  const Expr *Rhs = BOp->getRHS();
325 
326  if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
327  SVal RHSVal = State->getSVal(Rhs, C.getLocationContext());
328  if (State->isNull(RHSVal).isConstrainedTrue())
329  return;
330  reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
331  }
332  // The int += ptr; case is not valid C++.
333  if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
334  SVal LHSVal = State->getSVal(Lhs, C.getLocationContext());
335  if (State->isNull(LHSVal).isConstrainedTrue())
336  return;
337  reportPointerArithMisuse(Rhs, C);
338  }
339 }
340 
341 void ento::registerPointerArithChecker(CheckerManager &mgr) {
342  mgr.registerChecker<PointerArithChecker>();
343 }
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1698
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:79
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:26
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
FunctionDecl * getOperatorNew() const
Definition: ExprCXX.h:1942
Opcode getOpcode() const
Definition: Expr.h:3026
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
Defines the clang::Expr interface and subclasses for C++ expressions.
IdentifierInfo * getIdentifier() const
getIdentifier - Get the identifier that names this declaration, if there is one.
Definition: Decl.h:265
One of these records is kept for each identifier that is lexed.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:149
LineState State
static void Profile(AllocKind X, FoldingSetNodeID &ID)
static bool isIncrementDecrementOp(Opcode Op)
Definition: Expr.h:1778
Expr * getSubExpr()
Definition: Expr.h:2761
IdentifierTable & Idents
Definition: ASTContext.h:537
BinaryOperatorKind
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:2985
CastExpr - Base class for type casts, including both implicit casts (ImplicitCastExpr) and explicit c...
Definition: Expr.h:2710
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
const RegionTy * getAs() const
Definition: MemRegion.h:1174
Expr - This represents one expression.
Definition: Expr.h:106
bool isVariadic() const
Whether this function is variadic.
Definition: Decl.cpp:2570
QualType getType() const
Definition: Expr.h:128
bool isInvalid() const
UnaryOperator - This represents the unary-expression&#39;s (except sizeof and alignof), the postinc/postdec operators from postfix-expression, and various extensions.
Definition: Expr.h:1717
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
Kind getKind() const
Definition: MemRegion.h:148
Kind
CHECKER * registerChecker()
Used to register checkers.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Expr * getSubExpr() const
Definition: Expr.h:1744
CastKind getCastKind() const
Definition: Expr.h:2757
const MemRegion * getAsRegion() const
Definition: SVals.cpp:140
Represents a new-expression for memory allocation and constructor calls, e.g: "new CXXNewExpr(foo)"...
Definition: ExprCXX.h:1842
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
Definition: SVals.h:63
bool isArray() const
Definition: ExprCXX.h:1947
A class responsible for cleaning up unused symbols.
Expr * getLHS() const
Definition: Expr.h:3029
Dataflow Directional Tag Classes.
bool isZeroConstant() const
Definition: SVals.cpp:219
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2121
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6193
const ProgramStateRef & getState() const
static bool isAdditiveOp(Opcode Opc)
Definition: Expr.h:3061
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13010
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate.h) and friends (in DeclFriend.h).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:265
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2209
ElementRegin is used to represent both array elements and casts.
Definition: MemRegion.h:1066
Expr * getRHS() const
Definition: Expr.h:3031
bool isPointerType() const
Definition: Type.h:5944
A trivial tuple used to represent a source range.
const LocationContext * getLocationContext() const
unsigned getNumParams() const
getNumParams - Return the number of parameters this function must have based on its FunctionType...
Definition: Decl.cpp:2906