clang 22.0.0git
PointerArithChecker.cpp
Go to the documentation of this file.
1//=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This files defines PointerArithChecker, a builtin checker that checks for
10// pointer arithmetic on locations other than array elements.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/DeclCXX.h"
15#include "clang/AST/ExprCXX.h"
21#include "llvm/ADT/StringRef.h"
22
23using namespace clang;
24using namespace ento;
25
26namespace {
27enum class AllocKind {
28 SingleObject,
29 Array,
30 Unknown,
31 Reinterpreted // Single object interpreted as an array.
32};
33} // end namespace
34
35namespace llvm {
36template <> struct FoldingSetTrait<AllocKind> {
37 static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38 ID.AddInteger(static_cast<int>(X));
39 }
40};
41} // end namespace llvm
42
43namespace {
44class PointerArithChecker
45 : public Checker<
46 check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47 check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48 check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49 check::PostStmt<CallExpr>, check::DeadSymbols> {
50 AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51 const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52 AllocKind &AKind, CheckerContext &C) const;
53 const MemRegion *getPointedRegion(const MemRegion *Region,
54 CheckerContext &C) const;
55 void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56 bool PointedNeeded = false) const;
57 void initAllocIdentifiers(ASTContext &C) const;
58
59 const BugType BT_pointerArith{this, "Dangerous pointer arithmetic"};
60 const BugType BT_polyArray{this, "Dangerous pointer arithmetic"};
61 mutable llvm::SmallPtrSet<IdentifierInfo *, 8> AllocFunctions;
62
63public:
64 void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65 void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66 void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67 void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68 void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69 void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72};
73} // end namespace
74
75REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
76
77static bool isArrayPlacementNew(const CXXNewExpr *NE) {
78 return NE->isArray() && NE->getNumPlacementArgs() > 0;
79}
80
82 const MemRegion *Region) {
83 while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
84 Region = BaseRegion->getSuperRegion();
85 }
86 if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
87 State = State->set<RegionState>(ElemRegion->getSuperRegion(),
88 AllocKind::Reinterpreted);
89 }
90 return State;
91}
92
93void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
94 CheckerContext &C) const {
95 // TODO: intentional leak. Some information is garbage collected too early,
96 // see http://reviews.llvm.org/D14203 for further information.
97 /*ProgramStateRef State = C.getState();
98 RegionStateTy RegionStates = State->get<RegionState>();
99 for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) {
100 if (!SR.isLiveRegion(Reg))
101 State = State->remove<RegionState>(Reg);
102 }
103 C.addTransition(State);*/
104}
105
106AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
107 const FunctionDecl *FD) const {
108 // This checker try not to assume anything about placement and overloaded
109 // new to avoid false positives.
110 if (isa<CXXMethodDecl>(FD))
111 return AllocKind::Unknown;
112 if (FD->getNumParams() != 1 || FD->isVariadic())
113 return AllocKind::Unknown;
114 if (NE->isArray())
115 return AllocKind::Array;
116
117 return AllocKind::SingleObject;
118}
119
120const MemRegion *
121PointerArithChecker::getPointedRegion(const MemRegion *Region,
122 CheckerContext &C) const {
123 assert(Region);
124 ProgramStateRef State = C.getState();
125 SVal S = State->getSVal(Region);
126 return S.getAsRegion();
127}
128
129/// Checks whether a region is the part of an array.
130/// In case there is a derived to base cast above the array element, the
131/// Polymorphic output value is set to true. AKind output value is set to the
132/// allocation kind of the inspected region.
133const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
134 bool &Polymorphic,
135 AllocKind &AKind,
136 CheckerContext &C) const {
137 assert(Region);
138 while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
139 Region = BaseRegion->getSuperRegion();
140 Polymorphic = true;
141 }
142 if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
143 Region = ElemRegion->getSuperRegion();
144 }
145
146 ProgramStateRef State = C.getState();
147 if (const AllocKind *Kind = State->get<RegionState>(Region)) {
148 AKind = *Kind;
149 if (*Kind == AllocKind::Array)
150 return Region;
151 else
152 return nullptr;
153 }
154 // When the region is symbolic and we do not have any information about it,
155 // assume that this is an array to avoid false positives.
156 if (isa<SymbolicRegion>(Region))
157 return Region;
158
159 // No AllocKind stored and not symbolic, assume that it points to a single
160 // object.
161 return nullptr;
162}
163
164void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
165 CheckerContext &C,
166 bool PointedNeeded) const {
167 SourceRange SR = E->getSourceRange();
168 if (SR.isInvalid())
169 return;
170
171 ProgramStateRef State = C.getState();
172 const MemRegion *Region = C.getSVal(E).getAsRegion();
173 if (!Region)
174 return;
175 if (PointedNeeded)
176 Region = getPointedRegion(Region, C);
177 if (!Region)
178 return;
179
180 bool IsPolymorphic = false;
181 AllocKind Kind = AllocKind::Unknown;
182 if (const MemRegion *ArrayRegion =
183 getArrayRegion(Region, IsPolymorphic, Kind, C)) {
184 if (!IsPolymorphic)
185 return;
186 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
187 constexpr llvm::StringLiteral Msg =
188 "Pointer arithmetic on a pointer to base class is dangerous "
189 "because derived and base class may have different size.";
190 auto R = std::make_unique<PathSensitiveBugReport>(BT_polyArray, Msg, N);
191 R->addRange(E->getSourceRange());
192 R->markInteresting(ArrayRegion);
193 C.emitReport(std::move(R));
194 }
195 return;
196 }
197
198 if (Kind == AllocKind::Reinterpreted)
199 return;
200
201 // We might not have enough information about symbolic regions.
202 if (Kind != AllocKind::SingleObject &&
203 Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
204 return;
205
206 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
207 constexpr llvm::StringLiteral Msg =
208 "Pointer arithmetic on non-array variables relies on memory layout, "
209 "which is dangerous.";
210 auto R = std::make_unique<PathSensitiveBugReport>(BT_pointerArith, Msg, N);
211 R->addRange(SR);
212 R->markInteresting(Region);
213 C.emitReport(std::move(R));
214 }
215}
216
217void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
218 if (!AllocFunctions.empty())
219 return;
220 AllocFunctions.insert(&C.Idents.get("alloca"));
221 AllocFunctions.insert(&C.Idents.get("malloc"));
222 AllocFunctions.insert(&C.Idents.get("realloc"));
223 AllocFunctions.insert(&C.Idents.get("calloc"));
224 AllocFunctions.insert(&C.Idents.get("valloc"));
225}
226
227void PointerArithChecker::checkPostStmt(const CallExpr *CE,
228 CheckerContext &C) const {
229 ProgramStateRef State = C.getState();
230 const FunctionDecl *FD = C.getCalleeDecl(CE);
231 if (!FD)
232 return;
233 IdentifierInfo *FunI = FD->getIdentifier();
234 initAllocIdentifiers(C.getASTContext());
235 if (AllocFunctions.count(FunI) == 0)
236 return;
237
238 SVal SV = C.getSVal(CE);
239 const MemRegion *Region = SV.getAsRegion();
240 if (!Region)
241 return;
242 // Assume that C allocation functions allocate arrays to avoid false
243 // positives.
244 // TODO: Add heuristics to distinguish alloc calls that allocates single
245 // objecs.
246 State = State->set<RegionState>(Region, AllocKind::Array);
247 C.addTransition(State);
248}
249
250void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
251 CheckerContext &C) const {
252 const FunctionDecl *FD = NE->getOperatorNew();
253 if (!FD)
254 return;
255
256 AllocKind Kind = getKindOfNewOp(NE, FD);
257
258 ProgramStateRef State = C.getState();
259 SVal AllocedVal = C.getSVal(NE);
260 const MemRegion *Region = AllocedVal.getAsRegion();
261 if (!Region)
262 return;
263
264 // For array placement-new, mark the original region as reinterpreted
265 if (isArrayPlacementNew(NE)) {
266 State = markSuperRegionReinterpreted(State, Region);
267 }
268
269 State = State->set<RegionState>(Region, Kind);
270 C.addTransition(State);
271}
272
273void PointerArithChecker::checkPostStmt(const CastExpr *CE,
274 CheckerContext &C) const {
275 // Casts to `void*` happen, for instance, on placement new calls.
276 // We consider `void*` not to erase the type information about the underlying
277 // region.
278 if (CE->getCastKind() != CastKind::CK_BitCast ||
279 CE->getType()->isVoidPointerType())
280 return;
281
282 const Expr *CastedExpr = CE->getSubExpr();
283 ProgramStateRef State = C.getState();
284 SVal CastedVal = C.getSVal(CastedExpr);
285
286 const MemRegion *Region = CastedVal.getAsRegion();
287 if (!Region)
288 return;
289
290 // Suppress reinterpret casted hits.
291 State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
292 C.addTransition(State);
293}
294
295void PointerArithChecker::checkPreStmt(const CastExpr *CE,
296 CheckerContext &C) const {
297 if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
298 return;
299
300 const Expr *CastedExpr = CE->getSubExpr();
301 ProgramStateRef State = C.getState();
302 SVal CastedVal = C.getSVal(CastedExpr);
303
304 const MemRegion *Region = CastedVal.getAsRegion();
305 if (!Region)
306 return;
307
308 if (const AllocKind *Kind = State->get<RegionState>(Region)) {
309 if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
310 return;
311 }
312 State = State->set<RegionState>(Region, AllocKind::Array);
313 C.addTransition(State);
314}
315
316void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
317 CheckerContext &C) const {
318 if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
319 return;
320 reportPointerArithMisuse(UOp->getSubExpr(), C, true);
321}
322
323void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
324 CheckerContext &C) const {
325 SVal Idx = C.getSVal(SubsExpr->getIdx());
326
327 // Indexing with 0 is OK.
328 if (Idx.isZeroConstant())
329 return;
330
331 // Indexing vector-type expressions is also OK.
332 if (SubsExpr->getBase()->getType()->isVectorType())
333 return;
334 reportPointerArithMisuse(SubsExpr->getBase(), C);
335}
336
337void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
338 CheckerContext &C) const {
339 BinaryOperatorKind OpKind = BOp->getOpcode();
340 if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
341 return;
342
343 const Expr *Lhs = BOp->getLHS();
344 const Expr *Rhs = BOp->getRHS();
345 ProgramStateRef State = C.getState();
346
347 if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
348 SVal RHSVal = C.getSVal(Rhs);
349 if (State->isNull(RHSVal).isConstrainedTrue())
350 return;
351 reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
352 }
353 // The int += ptr; case is not valid C++.
354 if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
355 SVal LHSVal = C.getSVal(Lhs);
356 if (State->isNull(LHSVal).isConstrainedTrue())
357 return;
358 reportPointerArithMisuse(Rhs, C);
359 }
360}
361
362void ento::registerPointerArithChecker(CheckerManager &mgr) {
363 mgr.registerChecker<PointerArithChecker>();
364}
365
366bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) {
367 return true;
368}
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
#define X(type, name)
Definition Value.h:97
static ProgramStateRef markSuperRegionReinterpreted(ProgramStateRef State, const MemRegion *Region)
static bool isArrayPlacementNew(const CXXNewExpr *NE)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Expr * getLHS() const
Definition Expr.h:4024
Expr * getRHS() const
Definition Expr.h:4026
static bool isAdditiveOp(Opcode Opc)
Definition Expr.h:4060
Opcode getOpcode() const
Definition Expr.h:4019
Represents a new-expression for memory allocation and constructor calls, e.g: "new CXXNewExpr(foo)".
Definition ExprCXX.h:2349
CastKind getCastKind() const
Definition Expr.h:3656
Expr * getSubExpr()
Definition Expr.h:3662
QualType getType() const
Definition Expr.h:144
bool isVariadic() const
Whether this function is variadic.
Definition Decl.cpp:3125
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3767
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition Decl.h:294
bool isInvalid() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:334
bool isVoidPointerType() const
Definition Type.cpp:712
bool isPointerType() const
Definition TypeBase.h:8522
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8922
bool isVectorType() const
Definition TypeBase.h:8661
Expr * getSubExpr() const
Definition Expr.h:2287
static bool isIncrementDecrementOp(Opcode Op)
Definition Expr.h:2342
CHECKER * registerChecker(AT &&...Args)
Register a single-part checker (derived from Checker): construct its singleton instance,...
Simple checker classes that implement one frontend (i.e.
Definition Checker.h:553
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
Kind getKind() const
Definition MemRegion.h:203
bool isZeroConstant() const
Definition SVals.cpp:257
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
A class responsible for cleaning up unused symbols.
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
bool NE(InterpState &S, CodePtr OpPC)
Definition Interp.h:1260
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
static void Profile(AllocKind X, FoldingSetNodeID &ID)