clang 23.0.0git
DereferenceChecker.cpp
Go to the documentation of this file.
1//===-- DereferenceChecker.cpp - Null dereference checker -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines NullDerefChecker, a builtin check in ExprEngine that performs
10// checks for null pointers at loads and stores.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/ExprObjC.h"
22#include "llvm/Support/FormatVariadic.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29
30class DerefBugType : public BugType {
31 StringRef ArrayMsg, FieldMsg;
32
33public:
34 DerefBugType(CheckerFrontend *FE, StringRef Desc, const char *AMsg,
35 const char *FMsg = nullptr)
36 : BugType(FE, Desc), ArrayMsg(AMsg), FieldMsg(FMsg ? FMsg : AMsg) {}
37 StringRef getArrayMsg() const { return ArrayMsg; }
38 StringRef getFieldMsg() const { return FieldMsg; }
39};
40
41class DereferenceChecker
42 : public CheckerFamily<check::Location, check::Bind,
43 check::PreStmt<BinaryOperator>,
44 EventDispatcher<ImplicitNullDerefEvent>> {
45 void reportDerefBug(const DerefBugType &BT, ProgramStateRef State,
46 const Stmt *S, CheckerContext &C) const;
47
48 bool suppressReport(CheckerContext &C, const Expr *E) const;
49
50public:
51 void checkLocation(SVal location, bool isLoad, const Stmt* S,
52 CheckerContext &C) const;
53 void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit,
54 CheckerContext &C) const;
55 void checkPreStmt(const BinaryOperator *Op, CheckerContext &C) const;
56
57 static void AddDerefSource(raw_ostream &os,
58 SmallVectorImpl<SourceRange> &Ranges,
59 const Expr *Ex, const ProgramState *state,
60 const StackFrame *SF, bool loadedFrom = false);
61
62 CheckerFrontend NullDerefChecker, FixedDerefChecker, NullPointerArithmChecker;
63 const DerefBugType NullBug{&NullDerefChecker, "Dereference of null pointer",
64 "a null pointer dereference",
65 "a dereference of a null pointer"};
66 const DerefBugType UndefBug{&NullDerefChecker,
67 "Dereference of undefined pointer value",
68 "an undefined pointer dereference",
69 "a dereference of an undefined pointer value"};
70 const DerefBugType LabelBug{&NullDerefChecker,
71 "Dereference of the address of a label",
72 "an undefined pointer dereference",
73 "a dereference of an address of a label"};
74 const DerefBugType FixedAddressBug{&FixedDerefChecker,
75 "Dereference of a fixed address",
76 "a dereference of a fixed address"};
77 const BugType NullPointerArithmBug{
78 &NullPointerArithmChecker,
79 "Possibly undefined arithmetic operation involving a null pointer"};
80
81 StringRef getDebugTag() const override { return "DereferenceChecker"; }
82};
83
84struct ValueDescStr {
85 SmallVectorImpl<SourceRange> &Ranges;
86 const Expr *Ex;
87 const ProgramState *State;
88 const StackFrame *SF;
89 bool IsPointer;
90 ConditionTruthVal IsNull;
91};
92
93} // end anonymous namespace
94
95void DereferenceChecker::AddDerefSource(raw_ostream &os,
97 const Expr *Ex,
98 const ProgramState *state,
99 const StackFrame *SF, bool loadedFrom) {
100 Ex = Ex->IgnoreParenLValueCasts();
101 switch (Ex->getStmtClass()) {
102 default:
103 break;
104 case Stmt::DeclRefExprClass: {
105 const DeclRefExpr *DR = cast<DeclRefExpr>(Ex);
106 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
107 os << " (" << (loadedFrom ? "loaded from" : "from")
108 << " variable '" << VD->getName() << "')";
109 Ranges.push_back(DR->getSourceRange());
110 }
111 break;
112 }
113 case Stmt::MemberExprClass: {
114 const MemberExpr *ME = cast<MemberExpr>(Ex);
115 os << " (" << (loadedFrom ? "loaded from" : "via")
116 << " field '" << ME->getMemberNameInfo() << "')";
117 SourceLocation L = ME->getMemberLoc();
118 Ranges.push_back(SourceRange(L, L));
119 break;
120 }
121 case Stmt::ObjCIvarRefExprClass: {
122 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Ex);
123 os << " (" << (loadedFrom ? "loaded from" : "via")
124 << " ivar '" << IV->getDecl()->getName() << "')";
125 SourceLocation L = IV->getLocation();
126 Ranges.push_back(SourceRange(L, L));
127 break;
128 }
129 }
130}
131
132static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){
133 const Expr *E = nullptr;
134
135 // Walk through lvalue casts to get the original expression
136 // that syntactically caused the load.
137 if (const Expr *expr = dyn_cast<Expr>(S))
138 E = expr->IgnoreParenLValueCasts();
139
140 if (IsBind) {
141 const VarDecl *VD;
142 const Expr *Init;
143 std::tie(VD, Init) = parseAssignment(S);
144 if (VD && Init)
145 E = Init;
146 }
147 return E;
148}
149
150bool DereferenceChecker::suppressReport(CheckerContext &C,
151 const Expr *E) const {
152 // Do not report dereferences on memory that use address space #256, #257,
153 // and #258. Those address spaces are used when dereferencing address spaces
154 // relative to the GS, FS, and SS segments on x86/x86-64 targets.
155 // Dereferencing a null pointer in these address spaces is not defined
156 // as an error. All other null dereferences in other address spaces
157 // are defined as an error unless explicitly defined.
158 // See https://clang.llvm.org/docs/LanguageExtensions.html, the section
159 // "X86/X86-64 Language Extensions"
160
161 QualType Ty = E->getType();
162 if (!Ty.hasAddressSpace())
163 return false;
164 if (C.getAnalysisManager()
165 .getAnalyzerOptions()
166 .ShouldSuppressAddressSpaceDereferences)
167 return true;
168
169 const llvm::Triple::ArchType Arch =
170 C.getASTContext().getTargetInfo().getTriple().getArch();
171
172 if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) {
174 case 256:
175 case 257:
176 case 258:
177 return true;
178 }
179 }
180 return false;
181}
182
183static bool isDeclRefExprToReference(const Expr *E) {
184 if (const auto *DRE = dyn_cast<DeclRefExpr>(E))
185 return DRE->getDecl()->getType()->isReferenceType();
186 return false;
187}
188
189void DereferenceChecker::reportDerefBug(const DerefBugType &BT,
190 ProgramStateRef State, const Stmt *S,
191 CheckerContext &C) const {
192 if (&BT == &FixedAddressBug) {
193 if (!FixedDerefChecker.isEnabled())
194 // Deliberately don't add a sink node if check is disabled.
195 // This situation may be valid in special cases.
196 return;
197 } else {
198 if (!NullDerefChecker.isEnabled()) {
199 C.addSink();
200 return;
201 }
202 }
203
204 // Generate an error node.
205 ExplodedNode *N = C.generateErrorNode(State);
206 if (!N)
207 return;
208
209 SmallString<100> Buf;
210 llvm::raw_svector_ostream Out(Buf);
211
212 SmallVector<SourceRange, 2> Ranges;
213
214 switch (S->getStmtClass()) {
215 case Stmt::ArraySubscriptExprClass: {
216 Out << "Array access";
217 const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(S);
218 AddDerefSource(Out, Ranges, AE->getBase()->IgnoreParenCasts(), State.get(),
219 N->getStackFrame());
220 Out << " results in " << BT.getArrayMsg();
221 break;
222 }
223 case Stmt::ArraySectionExprClass: {
224 Out << "Array access";
225 const ArraySectionExpr *AE = cast<ArraySectionExpr>(S);
226 AddDerefSource(Out, Ranges, AE->getBase()->IgnoreParenCasts(), State.get(),
227 N->getStackFrame());
228 Out << " results in " << BT.getArrayMsg();
229 break;
230 }
231 case Stmt::UnaryOperatorClass: {
232 Out << BT.getDescription();
233 const UnaryOperator *U = cast<UnaryOperator>(S);
234 AddDerefSource(Out, Ranges, U->getSubExpr()->IgnoreParens(), State.get(),
235 N->getStackFrame(), true);
236 break;
237 }
238 case Stmt::MemberExprClass: {
239 const MemberExpr *M = cast<MemberExpr>(S);
240 if (M->isArrow() || isDeclRefExprToReference(M->getBase())) {
241 Out << "Access to field '" << M->getMemberNameInfo() << "' results in "
242 << BT.getFieldMsg();
243 AddDerefSource(Out, Ranges, M->getBase()->IgnoreParenCasts(), State.get(),
244 N->getStackFrame(), true);
245 }
246 break;
247 }
248 case Stmt::ObjCIvarRefExprClass: {
249 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(S);
250 Out << "Access to instance variable '" << *IV->getDecl() << "' results in "
251 << BT.getFieldMsg();
252 AddDerefSource(Out, Ranges, IV->getBase()->IgnoreParenCasts(), State.get(),
253 N->getStackFrame(), true);
254 break;
255 }
256 default:
257 break;
258 }
259
260 auto BR = std::make_unique<PathSensitiveBugReport>(
261 BT, Buf.empty() ? BT.getDescription() : Buf.str(), N);
262
264
265 for (const auto &R : Ranges)
266 BR->addRange(R);
267
268 C.emitReport(std::move(BR));
269}
270
271void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
272 CheckerContext &C) const {
273 // Check for dereference of an undefined value.
274 if (l.isUndef()) {
275 const Expr *DerefExpr = getDereferenceExpr(S);
276 if (!suppressReport(C, DerefExpr))
277 reportDerefBug(UndefBug, C.getState(), DerefExpr, C);
278 return;
279 }
280
281 DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>();
282
283 // Check for null dereferences.
284 if (!isa<Loc>(location))
285 return;
286
287 ProgramStateRef state = C.getState();
288
289 ProgramStateRef notNullState, nullState;
290 std::tie(notNullState, nullState) = state->assume(location);
291
292 if (nullState) {
293 if (!notNullState) {
294 // We know that 'location' can only be null. This is what
295 // we call an "explicit" null dereference.
296 const Expr *expr = getDereferenceExpr(S);
297 if (!suppressReport(C, expr)) {
298 reportDerefBug(NullBug, nullState, expr, C);
299 return;
300 }
301 }
302
303 // Otherwise, we have the case where the location could either be
304 // null or not-null. Record the error node as an "implicit" null
305 // dereference.
306 if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) {
307 ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(),
308 /*IsDirectDereference=*/true};
309 dispatchEvent(event);
310 }
311 }
312
313 if (location.isConstant()) {
314 const Expr *DerefExpr = getDereferenceExpr(S, isLoad);
315 if (!DerefExpr->getType().isVolatileQualified() &&
316 !suppressReport(C, DerefExpr))
317 reportDerefBug(FixedAddressBug, notNullState, DerefExpr, C);
318 return;
319 }
320
321 // From this point forward, we know that the location is not null.
322 C.addTransition(notNullState);
323}
324
325void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
326 bool AtDeclInit, CheckerContext &C) const {
327 // If we're binding to a reference, check if the value is known to be null.
328 if (V.isUndef())
329 return;
330
331 // One should never write to label addresses.
332 if (auto Label = L.getAs<loc::GotoLabel>()) {
333 reportDerefBug(LabelBug, C.getState(), S, C);
334 return;
335 }
336
337 const MemRegion *MR = L.getAsRegion();
338 const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(MR);
339 if (!TVR)
340 return;
341
342 if (!TVR->getValueType()->isReferenceType())
343 return;
344
345 ProgramStateRef State = C.getState();
346
347 ProgramStateRef StNonNull, StNull;
348 std::tie(StNonNull, StNull) = State->assume(V.castAs<DefinedOrUnknownSVal>());
349
350 if (StNull) {
351 if (!StNonNull) {
352 const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
353 if (!suppressReport(C, expr)) {
354 reportDerefBug(NullBug, StNull, expr, C);
355 return;
356 }
357 }
358
359 // At this point the value could be either null or non-null.
360 // Record this as an "implicit" null dereference.
361 if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) {
362 ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N,
363 &C.getBugReporter(),
364 /*IsDirectDereference=*/true};
365 dispatchEvent(event);
366 }
367 }
368
369 if (V.isConstant()) {
370 const Expr *DerefExpr = getDereferenceExpr(S, true);
371 if (!suppressReport(C, DerefExpr))
372 reportDerefBug(FixedAddressBug, State, DerefExpr, C);
373 return;
374 }
375
376 // Unlike a regular null dereference, initializing a reference with a
377 // dereferenced null pointer does not actually cause a runtime exception in
378 // Clang's implementation of references.
379 //
380 // int &r = *p; // safe??
381 // if (p != NULL) return; // uh-oh
382 // r = 5; // trap here
383 //
384 // The standard says this is invalid as soon as we try to create a "null
385 // reference" (there is no such thing), but turning this into an assumption
386 // that 'p' is never null will not match our actual runtime behavior.
387 // So we do not record this assumption, allowing us to warn on the last line
388 // of this example.
389 //
390 // We do need to add a transition because we may have generated a sink for
391 // the "implicit" null dereference.
392 C.addTransition(State, this);
393}
394
395namespace llvm {
396template <> struct format_provider<ValueDescStr> {
397 static void format(const ValueDescStr &V, raw_ostream &Stream,
398 StringRef Style) {
399 static const char *ValueStr[2][3] = {
400 {"zero", "nonzero integer value", "probably nonzero integer value"},
401 {"null pointer", "non-null pointer", "probably non-null pointer"},
402 };
403 Stream
404 << ValueStr[V.IsPointer][V.IsNull.isConstrainedTrue()
405 ? 0
406 : (V.IsNull.isConstrainedFalse() ? 1 : 2)];
407 DereferenceChecker::AddDerefSource(Stream, V.Ranges, V.Ex, V.State, V.SF,
408 false);
409 }
410};
411} // namespace llvm
412
413void DereferenceChecker::checkPreStmt(const BinaryOperator *Op,
414 CheckerContext &C) const {
415 if (!Op->isAdditiveOp() || !NullPointerArithmChecker.isEnabled())
416 return;
417 const Expr *E1 = Op->getLHS();
418 const Expr *E2 = Op->getRHS();
419 QualType T1 = E1->getType().getCanonicalType();
420 QualType T2 = E2->getType().getCanonicalType();
421 bool T1IsPointer = T1->isPointerType();
422 bool T2IsPointer = T2->isPointerType();
423 if (T1->isIntegerType() && T2->isIntegerType())
424 return;
425 if (!T1IsPointer && !T1->isIntegerType() && !T2IsPointer &&
426 !T2->isIntegerType())
427 return;
428
429 ProgramStateRef State = C.getState();
430 ConditionTruthVal V1IsNull = State->isNull(C.getSVal(E1));
431 ConditionTruthVal V2IsNull = State->isNull(C.getSVal(E2));
432 bool IsConstrained = true;
433
434 // Check cases 'NULL + x' and 'NULL - x'
435 if (T1IsPointer && !T2IsPointer) {
436 if (!V1IsNull.isConstrainedTrue() || V2IsNull.isConstrainedTrue())
437 return;
438 IsConstrained = V2IsNull.isConstrainedFalse();
439 }
440
441 // Check case 'x + NULL'
442 if (!T1IsPointer && T2IsPointer) {
443 if (V1IsNull.isConstrainedTrue() || !V2IsNull.isConstrainedTrue())
444 return;
445 IsConstrained = V1IsNull.isConstrainedFalse();
446 }
447
448 // Check case 'NULL - p' or 'p - NULL'
449 if (T1IsPointer && T2IsPointer) {
450 if (!V1IsNull.isConstrainedTrue() && !V2IsNull.isConstrainedTrue())
451 return;
452 if (V1IsNull.isConstrainedTrue() && V2IsNull.isConstrainedTrue())
453 return;
454 IsConstrained =
455 V1IsNull.isConstrainedFalse() || V2IsNull.isConstrainedFalse();
456 }
457
458 SmallVector<SourceRange, 2> Ranges;
459 const char *OpcodeStr =
460 Op->getOpcode() == BO_Add ? "Addition" : "Subtraction";
461 const char *ResultStr = IsConstrained ? "results" : "may result";
462 ValueDescStr DerefArg1{Ranges, E1, State.get(), C.getStackFrame(),
463 T1IsPointer, V1IsNull};
464 ValueDescStr DerefArg2{Ranges, E2, State.get(), C.getStackFrame(),
465 T2IsPointer, V2IsNull};
466 std::string Msg =
467 llvm::formatv("{0} of a {1} and a {2} {3} in undefined behavior",
468 OpcodeStr, DerefArg1, DerefArg2, ResultStr);
469
470 ExplodedNode *N = C.generateErrorNode(State);
471 if (!N)
472 return;
473 auto BR =
474 std::make_unique<PathSensitiveBugReport>(NullPointerArithmBug, Msg, N);
475 if (V1IsNull.isConstrainedTrue())
477 if (V2IsNull.isConstrainedTrue())
479 for (const auto &R : Ranges)
480 BR->addRange(R);
481
482 C.emitReport(std::move(BR));
483}
484
485void ento::registerNullDereferenceChecker(CheckerManager &Mgr) {
486 Mgr.getChecker<DereferenceChecker>()->NullDerefChecker.enable(Mgr);
487}
488
489bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) {
490 return true;
491}
492
493void ento::registerFixedAddressDereferenceChecker(CheckerManager &Mgr) {
494 Mgr.getChecker<DereferenceChecker>()->FixedDerefChecker.enable(Mgr);
495}
496
497bool ento::shouldRegisterFixedAddressDereferenceChecker(
498 const CheckerManager &) {
499 return true;
500}
501
502void ento::registerNullPointerArithmChecker(CheckerManager &Mgr) {
503 Mgr.getChecker<DereferenceChecker>()->NullPointerArithmChecker.enable(Mgr);
504}
505
506bool ento::shouldRegisterNullPointerArithmChecker(const CheckerManager &) {
507 return true;
508}
#define V(N, I)
static const Expr * getDereferenceExpr(const Stmt *S, bool IsBind=false)
static bool isDeclRefExprToReference(const Expr *E)
Expr * getBase()
Get base of the array section.
Definition Expr.h:7297
Expr * getLHS() const
Definition Expr.h:4091
Expr * getRHS() const
Definition Expr.h:4093
static bool isAdditiveOp(Opcode Opc)
Definition Expr.h:4127
Opcode getOpcode() const
Definition Expr.h:4086
ValueDecl * getDecl()
Definition Expr.h:1341
This represents one expression.
Definition Expr.h:112
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3102
Expr * IgnoreParenLValueCasts() LLVM_READONLY
Skip past any parentheses and lvalue casts which might surround this expression until reaching a fixe...
Definition Expr.cpp:3114
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3093
QualType getType() const
Definition Expr.h:144
SourceLocation getMemberLoc() const
getMemberLoc - Return the location of the "member", in X->F, it is the location of 'F'.
Definition Expr.h:3556
Expr * getBase() const
Definition Expr.h:3444
DeclarationNameInfo getMemberNameInfo() const
Retrieve the member declaration name info.
Definition Expr.h:3544
bool isArrow() const
Definition Expr.h:3551
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
SourceLocation getLocation() const
Definition ExprObjC.h:623
ObjCIvarDecl * getDecl()
Definition ExprObjC.h:610
const Expr * getBase() const
Definition ExprObjC.h:614
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition TypeBase.h:8529
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8571
QualType getCanonicalType() const
Definition TypeBase.h:8497
bool hasAddressSpace() const
Check if this type has any address space qualifier.
Definition TypeBase.h:8566
It represents a stack frame of the call stack.
Stmt - This represents one statement.
Definition Stmt.h:86
StmtClass getStmtClass() const
Definition Stmt.h:1503
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
bool isPointerType() const
Definition TypeBase.h:8682
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:9092
bool isReferenceType() const
Definition TypeBase.h:8706
Expr * getSubExpr() const
Definition Expr.h:2288
Represents a variable declaration or definition.
Definition Decl.h:924
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:581
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
bool isConstrainedFalse() const
Return true if the constraint is perfectly constrained to 'false'.
bool isConstrainedTrue() const
Return true if the constraint is perfectly constrained to 'true'.
const StackFrame * getStackFrame() const
ProgramState - This class encapsulates:
bool isUndef() const
Definition SVals.h:107
bool isConstant() const
Definition SVals.cpp:245
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
virtual QualType getValueType() const =0
Defines the clang::TargetInfo interface.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
const Expr * getDerefExpr(const Stmt *S)
Given that expression S represents a pointer that would be dereferenced, try to find a sub-expression...
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
std::pair< const clang::VarDecl *, const clang::Expr * > parseAssignment(const Stmt *S)
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
unsigned toTargetAddressSpace(LangAS AS)
U cast(CodeGen::Address addr)
Definition Address.h:327
static void format(const ValueDescStr &V, raw_ostream &Stream, StringRef Style)