clang 23.0.0git
ExplodedGraph.cpp
Go to the documentation of this file.
1//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the template classes ExplodedNode and ExplodedGraph,
10// which represent a path-sensitive, intra-procedural "exploded graph."
11//
12//===----------------------------------------------------------------------===//
13
15#include "clang/AST/Expr.h"
16#include "clang/AST/ExprObjC.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/Stmt.h"
21#include "clang/Basic/LLVM.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/FoldingSet.h"
27#include "llvm/ADT/PointerUnion.h"
28#include <cassert>
29#include <memory>
30#include <optional>
31
32using namespace clang;
33using namespace ento;
34
35//===----------------------------------------------------------------------===//
36// Cleanup.
37//===----------------------------------------------------------------------===//
38
40
42
43//===----------------------------------------------------------------------===//
44// Node reclamation.
45//===----------------------------------------------------------------------===//
46
52
53bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
54 // First, we only consider nodes for reclamation of the following
55 // conditions apply:
56 //
57 // (1) 1 predecessor (that has one successor)
58 // (2) 1 successor (that has one predecessor)
59 //
60 // If a node has no successor it is on the "frontier", while a node
61 // with no predecessor is a root.
62 //
63 // After these prerequisites, we discard all "filler" nodes that
64 // are used only for intermediate processing, and are not essential
65 // for analyzer history:
66 //
67 // (a) PreStmtPurgeDeadSymbols
68 //
69 // We then discard all other nodes where *all* of the following conditions
70 // apply:
71 //
72 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
73 // (4) There is no 'tag' for the ProgramPoint.
74 // (5) The 'store' is the same as the predecessor.
75 // (6) The 'GDM' is the same as the predecessor.
76 // (7) The StackFrame is the same as the predecessor.
77 // (8) Expressions that are *not* lvalue expressions.
78 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
79 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
80 // PreImplicitCall (so that we would be able to find it when retrying a
81 // call with no inlining).
82 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
83
84 // Conditions 1 and 2.
85 if (node->pred_size() != 1 || node->succ_size() != 1)
86 return false;
87
88 const ExplodedNode *pred = *(node->pred_begin());
89 if (pred->succ_size() != 1)
90 return false;
91
92 const ExplodedNode *succ = *(node->succ_begin());
93 if (succ->pred_size() != 1)
94 return false;
95
96 // Now reclaim any nodes that are (by definition) not essential to
97 // analysis history and are not consulted by any client code.
98 ProgramPoint progPoint = node->getLocation();
99 if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
100 return !progPoint.getTag();
101
102 // Condition 3.
103 if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
104 return false;
105
106 // Condition 4.
107 if (progPoint.getTag())
108 return false;
109
110 // Conditions 5, 6, and 7.
111 ProgramStateRef state = node->getState();
112 ProgramStateRef pred_state = pred->getState();
113 if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
114 progPoint.getStackFrame() != pred->getStackFrame())
115 return false;
116
117 // All further checks require expressions. As per #3, we know that we have
118 // a PostStmt.
119 const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
120 if (!Ex)
121 return false;
122
123 // Condition 8.
124 // Do not collect nodes for "interesting" lvalue expressions since they are
125 // used extensively for generating path diagnostics.
127 return false;
128
129 // Condition 9.
130 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
131 // diagnostic generation; specifically, so that we could anchor arrows
132 // pointing to the beginning of statements (as written in code).
133 const ParentMap &PM = progPoint.getStackFrame()->getParentMap();
134 if (!PM.isConsumedExpr(Ex))
135 return false;
136
137 // Condition 10.
138 const ProgramPoint SuccLoc = succ->getLocation();
139 if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
140 if (CallEvent::isCallStmt(SP->getStmt()))
141 return false;
142
143 // Condition 10, continuation.
144 if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
145 return false;
146
147 return true;
148}
149
150void ExplodedGraph::collectNode(ExplodedNode *node) {
151 // Removing a node means:
152 // (a) changing the predecessors successor to the successor of this node
153 // (b) changing the successors predecessor to the predecessor of this node
154 // (c) Putting 'node' onto freeNodes.
155 assert(node->pred_size() == 1 || node->succ_size() == 1);
156 ExplodedNode *pred = *(node->pred_begin());
157 ExplodedNode *succ = *(node->succ_begin());
158 pred->replaceSuccessor(succ);
159 succ->replacePredecessor(pred);
160 FreeNodes.push_back(node);
161 Nodes.RemoveNode(node);
162 --NumNodes;
163 node->~ExplodedNode();
164}
165
167 if (ChangedNodes.empty())
168 return;
169
170 // Only periodically reclaim nodes so that we can build up a set of
171 // nodes that meet the reclamation criteria. Freshly created nodes
172 // by definition have no successor, and thus cannot be reclaimed (see below).
173 assert(ReclaimCounter > 0);
174 if (--ReclaimCounter != 0)
175 return;
177
178 for (const auto node : ChangedNodes)
179 if (shouldCollect(node))
180 collectNode(node);
181 ChangedNodes.clear();
182}
183
184//===----------------------------------------------------------------------===//
185// ExplodedNode.
186//===----------------------------------------------------------------------===//
187
188// An NodeGroup's storage type is actually very much like a TinyPtrVector:
189// it can be either a pointer to a single ExplodedNode, or a pointer to a
190// BumpVector allocated with the ExplodedGraph's allocator. This allows the
191// common case of single-node NodeGroups to be implemented with no extra memory.
192//
193// Consequently, each of the NodeGroup methods have up to four cases to handle:
194// 1. The flag is set and this group does not actually contain any nodes.
195// 2. The group is empty, in which case the storage value is null.
196// 3. The group contains a single node.
197// 4. The group contains more than one node.
199using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
200
202 assert(!V->isSink());
203 Preds.addNode(V, G);
204 V->Succs.addNode(this, G);
205}
206
207void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
208 assert(!getFlag());
209
210 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
211 assert(isa<ExplodedNode *>(Storage));
212 Storage = node;
213 assert(isa<ExplodedNode *>(Storage));
214}
215
216void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
217 assert(!getFlag());
218
219 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
220 if (Storage.isNull()) {
221 Storage = N;
222 assert(isa<ExplodedNode *>(Storage));
223 return;
224 }
225
226 ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage);
227
228 if (!V) {
229 // Switch from single-node to multi-node representation.
230 auto *Old = cast<ExplodedNode *>(Storage);
231
233 V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
234 V->push_back(Old, Ctx);
235
236 Storage = V;
237 assert(!getFlag());
238 assert(isa<ExplodedNodeVector *>(Storage));
239 }
240
241 V->push_back(N, G.getNodeAllocator());
242}
243
244unsigned ExplodedNode::NodeGroup::size() const {
245 if (getFlag())
246 return 0;
247
248 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
249 if (Storage.isNull())
250 return 0;
251 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
252 return V->size();
253 return 1;
254}
255
256ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
257 if (getFlag())
258 return nullptr;
259
260 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
261 if (Storage.isNull())
262 return nullptr;
263 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
264 return V->begin();
265 return Storage.getAddrOfPtr1();
266}
267
268ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
269 if (getFlag())
270 return nullptr;
271
272 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
273 if (Storage.isNull())
274 return nullptr;
275 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
276 return V->end();
277 return Storage.getAddrOfPtr1() + 1;
278}
279
281 return pred_size() == 1 && succ_size() == 1 &&
282 getFirstPred()->getState()->getID() == getState()->getID() &&
283 getFirstPred()->succ_size() == 1;
284}
285
288 if (auto BEP = P.getAs<BlockEntrance>())
289 return BEP->getBlock();
290
291 // Find the node's current statement in the CFG.
292 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
293 // a valid statement for body farms, do we need this behavior here?
294 if (const Stmt *S = getStmtForDiagnostics())
296 S);
297
298 return nullptr;
299}
300
301static const StackFrame *
304 const StackFrame *ParentSF = SF->getParent();
305 assert(ParentSF && "We don't start analysis from autosynthesized code");
306 while (ParentSF->getAnalysisDeclContext()->isBodyAutosynthesized()) {
307 SF = ParentSF;
308 ParentSF = SF->getParent();
309 assert(ParentSF && "We don't start analysis from autosynthesized code");
310 }
311 return SF;
312}
313
315 // We cannot place diagnostics on autosynthesized code.
316 // Put them onto the call site through which we jumped into autosynthesized
317 // code for the first time.
318 const StackFrame *SF = getStackFrame();
320 // It must be a stack frame because we only autosynthesize functions.
322 }
323 // Otherwise, see if the node's program point directly points to a statement.
324 // FIXME: Refactor into a ProgramPoint method?
326 if (auto SP = P.getAs<StmtPoint>())
327 return SP->getStmt();
328 if (auto BE = P.getAs<BlockEdge>())
329 return BE->getSrc()->getTerminatorStmt();
330 if (auto CE = P.getAs<CallEnter>())
331 return CE->getCallExpr();
332 if (auto CEE = P.getAs<CallExitEnd>())
333 return CEE->getCalleeStackFrame()->getCallSite();
334 if (auto PIPP = P.getAs<PostInitializer>())
335 return PIPP->getInitializer()->getInit();
336 if (auto CEB = P.getAs<CallExitBegin>())
337 return CEB->getReturnStmt();
338 if (auto FEP = P.getAs<FunctionExitPoint>())
339 return FEP->getStmt();
340 if (auto LE = P.getAs<LifetimeEnd>())
341 return LE->getTriggerStmt();
342
343 return nullptr;
344}
345
347 for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
348 if (N->getLocation().isPurgeKind())
349 continue;
350 if (const Stmt *S = N->getStmtForDiagnostics()) {
351 // Check if the statement is '?' or '&&'/'||'. These are "merges",
352 // not actual statement points.
353 switch (S->getStmtClass()) {
354 case Stmt::ChooseExprClass:
355 case Stmt::BinaryConditionalOperatorClass:
356 case Stmt::ConditionalOperatorClass:
357 continue;
358 case Stmt::BinaryOperatorClass: {
359 BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
360 if (Op == BO_LAnd || Op == BO_LOr)
361 continue;
362 break;
363 }
364 default:
365 break;
366 }
367 // We found the statement, so return it.
368 return S;
369 }
370 }
371
372 return nullptr;
373}
374
376 for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
377 if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(S))
378 return S;
379
380 return nullptr;
381}
382
384 if (const Stmt *S = getStmtForDiagnostics())
385 return S;
386
388}
389
391 ProgramStateRef State,
392 bool IsSink,
393 bool* IsNew) {
394 // Profile 'State' to determine if we already have an existing node.
395 llvm::FoldingSetNodeID profile;
396 void *InsertPos = nullptr;
397
398 NodeTy::Profile(profile, L, State, IsSink);
399 NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
400
401 if (!V) {
402 if (!FreeNodes.empty()) {
403 V = FreeNodes.back();
404 FreeNodes.pop_back();
405 }
406 else {
407 // Allocate a new node.
408 V = getAllocator().Allocate<NodeTy>();
409 }
410
411 ++NumNodes;
412 new (V) NodeTy(L, State, NumNodes, IsSink);
413
415 ChangedNodes.push_back(V);
416
417 // Insert the node into the node set and return it.
418 Nodes.InsertNode(V, InsertPos);
419
420 if (IsNew) *IsNew = true;
421 }
422 else
423 if (IsNew) *IsNew = false;
424
425 return V;
426}
427
429 ProgramStateRef State,
430 int64_t Id,
431 bool IsSink) {
432 NodeTy *V = getAllocator().Allocate<NodeTy>();
433 new (V) NodeTy(L, State, Id, IsSink);
434 return V;
435}
436
437std::unique_ptr<ExplodedGraph>
439 InterExplodedGraphMap *ForwardMap,
440 InterExplodedGraphMap *InverseMap) const {
441 // FIXME: The two-pass algorithm of this function (which was introduced in
442 // 2008) is terribly overcomplicated and should be replaced by a single
443 // (backward) pass.
444
445 if (Nodes.empty())
446 return nullptr;
447
448 using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
449 Pass1Ty Pass1;
450
451 using Pass2Ty = InterExplodedGraphMap;
452 InterExplodedGraphMap Pass2Scratch;
453 Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
454
456
457 // ===- Pass 1 (reverse DFS) -===
458 for (const auto Sink : Sinks)
459 if (Sink)
460 WL1.push_back(Sink);
461
462 // Process the first worklist until it is empty.
463 while (!WL1.empty()) {
464 const ExplodedNode *N = WL1.pop_back_val();
465
466 // Have we already visited this node? If so, continue to the next one.
467 if (!Pass1.insert(N).second)
468 continue;
469
470 // If this is the root enqueue it to the second worklist.
471 if (N->Preds.empty()) {
472 assert(N == getRoot() && "Found non-root node with no predecessors!");
473 WL2.push_back(N);
474 continue;
475 }
476
477 // Visit our predecessors and enqueue them.
478 WL1.append(N->Preds.begin(), N->Preds.end());
479 }
480
481 // We didn't hit the root? Return with a null pointer for the new graph.
482 if (WL2.empty())
483 return nullptr;
484
485 assert(WL2.size() == 1 && "There must be only one root!");
486
487 // Create an empty graph.
488 std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
489
490 // ===- Pass 2 (forward DFS to construct the new graph) -===
491 while (!WL2.empty()) {
492 const ExplodedNode *N = WL2.pop_back_val();
493
494 auto [Place, Inserted] = Pass2.try_emplace(N);
495
496 // Skip this node if we have already processed it.
497 if (!Inserted)
498 continue;
499
500 // Create the corresponding node in the new graph and record the mapping
501 // from the old node to the new node.
502 ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
503 N->getID(), N->isSink());
504 Place->second = NewN;
505
506 // Also record the reverse mapping from the new node to the old node.
507 if (InverseMap) (*InverseMap)[NewN] = N;
508
509 // If this node is the root, designate it as such in the graph.
510 if (N->Preds.empty()) {
511 assert(N == getRoot());
512 G->designateAsRoot(NewN);
513 }
514
515 // In the case that some of the intended predecessors of NewN have already
516 // been created, we should hook them up as predecessors.
517
518 // Walk through the predecessors of 'N' and hook up their corresponding
519 // nodes in the new graph (if any) to the freshly created node.
520 for (const ExplodedNode *Pred : N->Preds) {
521 Pass2Ty::iterator PI = Pass2.find(Pred);
522 if (PI == Pass2.end())
523 continue;
524
525 NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
526 }
527
528 // In the case that some of the intended successors of NewN have already
529 // been created, we should hook them up as successors. Otherwise, enqueue
530 // the new nodes from the original graph that should have nodes created
531 // in the new graph.
532 for (const ExplodedNode *Succ : N->Succs) {
533 Pass2Ty::iterator PI = Pass2.find(Succ);
534 if (PI != Pass2.end()) {
535 const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
536 continue;
537 }
538
539 // Enqueue nodes to the worklist that were marked during pass 1.
540 if (Pass1.count(Succ))
541 WL2.push_back(Succ);
542 }
543 }
544
545 return G;
546}
#define V(N, I)
static const StackFrame * findTopAutosynthesizedParentStackFrame(const StackFrame *SF)
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
BumpVector< ExplodedNode * > ExplodedNodeVector
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Represents a single basic block in a source-level CFG.
Definition CFG.h:652
const CFGBlock * getBlock(const Stmt *S) const
Returns the CFGBlock the specified Stmt* appears in.
Represents a point when we begin processing an inlined call.
Represents a point when we start the call exit sequence (for inlined call).
Represents a point when we finish the call exit sequence (for inlined call).
This represents one expression.
Definition Expr.h:112
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition Expr.h:284
Represents a point when the lifetime of an automatic object ends.
bool isConsumedExpr(Expr *E) const
Represents a program point after a store evaluation.
Represents a program point just before an implicit call event.
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
const ProgramPointTag * getTag() const
bool isPurgeKind()
Is this a program point corresponding to purge/removal of dead symbols and bindings.
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type.
const StackFrame * getStackFrame() const
std::optional< T > getAs() const
Convert to the specified ProgramPoint type, returning std::nullopt if this ProgramPoint is not of the...
It represents a stack frame of the call stack.
const ParentMap & getParentMap() const
LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext * getAnalysisDeclContext() const
const Expr * getCallSite() const
const StackFrame * getParent() const
It might return null.
const Stmt * getStmt() const
Stmt - This represents one statement.
Definition Stmt.h:86
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy * > Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes.
BumpVectorContext & getNodeAllocator()
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
int64_t NumNodes
NumNodes - The number of nodes in the graph.
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called.
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.
llvm::BumpPtrAllocator & getAllocator()
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location, State) pair, where the 'Location' is a ProgramPoint in...
ExplodedNode * getRoot() const
Get the root node of the graph.
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don't store it for deduplication later.
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
NodeVector FreeNodes
A list of nodes that can be reused.
const CFGBlock * getCFGBlock() const
const ProgramStateRef & getState() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it's predecessor has only one...
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
ExplodedNode * getFirstSucc()
unsigned pred_size() const
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
ExplodedNode(const ProgramPoint &loc, ProgramStateRef state, int64_t Id, bool IsSink)
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node's execution path.
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
ExplodedNode * getFirstPred()
unsigned succ_size() const
const StackFrame * getStackFrame() const
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
U cast(CodeGen::Address addr)
Definition Address.h:327