clang 22.0.0git
ExplodedGraph.cpp
Go to the documentation of this file.
1//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the template classes ExplodedNode and ExplodedGraph,
10// which represent a path-sensitive, intra-procedural "exploded graph."
11//
12//===----------------------------------------------------------------------===//
13
15#include "clang/AST/Expr.h"
16#include "clang/AST/ExprObjC.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/Stmt.h"
21#include "clang/Basic/LLVM.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/FoldingSet.h"
27#include "llvm/ADT/PointerUnion.h"
28#include <cassert>
29#include <memory>
30#include <optional>
31
32using namespace clang;
33using namespace ento;
34
35//===----------------------------------------------------------------------===//
36// Cleanup.
37//===----------------------------------------------------------------------===//
38
40
42
43//===----------------------------------------------------------------------===//
44// Node reclamation.
45//===----------------------------------------------------------------------===//
46
52
53bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
54 // First, we only consider nodes for reclamation of the following
55 // conditions apply:
56 //
57 // (1) 1 predecessor (that has one successor)
58 // (2) 1 successor (that has one predecessor)
59 //
60 // If a node has no successor it is on the "frontier", while a node
61 // with no predecessor is a root.
62 //
63 // After these prerequisites, we discard all "filler" nodes that
64 // are used only for intermediate processing, and are not essential
65 // for analyzer history:
66 //
67 // (a) PreStmtPurgeDeadSymbols
68 //
69 // We then discard all other nodes where *all* of the following conditions
70 // apply:
71 //
72 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
73 // (4) There is no 'tag' for the ProgramPoint.
74 // (5) The 'store' is the same as the predecessor.
75 // (6) The 'GDM' is the same as the predecessor.
76 // (7) The LocationContext is the same as the predecessor.
77 // (8) Expressions that are *not* lvalue expressions.
78 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
79 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
80 // PreImplicitCall (so that we would be able to find it when retrying a
81 // call with no inlining).
82 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
83
84 // Conditions 1 and 2.
85 if (node->pred_size() != 1 || node->succ_size() != 1)
86 return false;
87
88 const ExplodedNode *pred = *(node->pred_begin());
89 if (pred->succ_size() != 1)
90 return false;
91
92 const ExplodedNode *succ = *(node->succ_begin());
93 if (succ->pred_size() != 1)
94 return false;
95
96 // Now reclaim any nodes that are (by definition) not essential to
97 // analysis history and are not consulted by any client code.
98 ProgramPoint progPoint = node->getLocation();
99 if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
100 return !progPoint.getTag();
101
102 // Condition 3.
103 if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
104 return false;
105
106 // Condition 4.
107 if (progPoint.getTag())
108 return false;
109
110 // Conditions 5, 6, and 7.
111 ProgramStateRef state = node->getState();
112 ProgramStateRef pred_state = pred->getState();
113 if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
114 progPoint.getLocationContext() != pred->getLocationContext())
115 return false;
116
117 // All further checks require expressions. As per #3, we know that we have
118 // a PostStmt.
119 const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
120 if (!Ex)
121 return false;
122
123 // Condition 8.
124 // Do not collect nodes for "interesting" lvalue expressions since they are
125 // used extensively for generating path diagnostics.
127 return false;
128
129 // Condition 9.
130 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
131 // diagnostic generation; specifically, so that we could anchor arrows
132 // pointing to the beginning of statements (as written in code).
133 const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
134 if (!PM.isConsumedExpr(Ex))
135 return false;
136
137 // Condition 10.
138 const ProgramPoint SuccLoc = succ->getLocation();
139 if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
140 if (CallEvent::isCallStmt(SP->getStmt()))
141 return false;
142
143 // Condition 10, continuation.
144 if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
145 return false;
146
147 return true;
148}
149
150void ExplodedGraph::collectNode(ExplodedNode *node) {
151 // Removing a node means:
152 // (a) changing the predecessors successor to the successor of this node
153 // (b) changing the successors predecessor to the predecessor of this node
154 // (c) Putting 'node' onto freeNodes.
155 assert(node->pred_size() == 1 || node->succ_size() == 1);
156 ExplodedNode *pred = *(node->pred_begin());
157 ExplodedNode *succ = *(node->succ_begin());
158 pred->replaceSuccessor(succ);
159 succ->replacePredecessor(pred);
160 FreeNodes.push_back(node);
161 Nodes.RemoveNode(node);
162 --NumNodes;
163 node->~ExplodedNode();
164}
165
167 if (ChangedNodes.empty())
168 return;
169
170 // Only periodically reclaim nodes so that we can build up a set of
171 // nodes that meet the reclamation criteria. Freshly created nodes
172 // by definition have no successor, and thus cannot be reclaimed (see below).
173 assert(ReclaimCounter > 0);
174 if (--ReclaimCounter != 0)
175 return;
177
178 for (const auto node : ChangedNodes)
179 if (shouldCollect(node))
180 collectNode(node);
181 ChangedNodes.clear();
182}
183
184//===----------------------------------------------------------------------===//
185// ExplodedNode.
186//===----------------------------------------------------------------------===//
187
188// An NodeGroup's storage type is actually very much like a TinyPtrVector:
189// it can be either a pointer to a single ExplodedNode, or a pointer to a
190// BumpVector allocated with the ExplodedGraph's allocator. This allows the
191// common case of single-node NodeGroups to be implemented with no extra memory.
192//
193// Consequently, each of the NodeGroup methods have up to four cases to handle:
194// 1. The flag is set and this group does not actually contain any nodes.
195// 2. The group is empty, in which case the storage value is null.
196// 3. The group contains a single node.
197// 4. The group contains more than one node.
199using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
200
202 assert(!V->isSink());
203 Preds.addNode(V, G);
204 V->Succs.addNode(this, G);
205}
206
207void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
208 assert(!getFlag());
209
210 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
211 assert(isa<ExplodedNode *>(Storage));
212 Storage = node;
213 assert(isa<ExplodedNode *>(Storage));
214}
215
216void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
217 assert(!getFlag());
218
219 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
220 if (Storage.isNull()) {
221 Storage = N;
222 assert(isa<ExplodedNode *>(Storage));
223 return;
224 }
225
226 ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage);
227
228 if (!V) {
229 // Switch from single-node to multi-node representation.
230 auto *Old = cast<ExplodedNode *>(Storage);
231
233 V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
234 V->push_back(Old, Ctx);
235
236 Storage = V;
237 assert(!getFlag());
238 assert(isa<ExplodedNodeVector *>(Storage));
239 }
240
241 V->push_back(N, G.getNodeAllocator());
242}
243
244unsigned ExplodedNode::NodeGroup::size() const {
245 if (getFlag())
246 return 0;
247
248 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
249 if (Storage.isNull())
250 return 0;
251 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
252 return V->size();
253 return 1;
254}
255
256ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
257 if (getFlag())
258 return nullptr;
259
260 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
261 if (Storage.isNull())
262 return nullptr;
263 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
264 return V->begin();
265 return Storage.getAddrOfPtr1();
266}
267
268ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
269 if (getFlag())
270 return nullptr;
271
272 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
273 if (Storage.isNull())
274 return nullptr;
275 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
276 return V->end();
277 return Storage.getAddrOfPtr1() + 1;
278}
279
281 return pred_size() == 1 && succ_size() == 1 &&
282 getFirstPred()->getState()->getID() == getState()->getID() &&
283 getFirstPred()->succ_size() == 1;
284}
285
288 if (auto BEP = P.getAs<BlockEntrance>())
289 return BEP->getBlock();
290
291 // Find the node's current statement in the CFG.
292 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
293 // a valid statement for body farms, do we need this behavior here?
294 if (const Stmt *S = getStmtForDiagnostics())
295 return getLocationContext()
297 ->getCFGStmtMap()
298 ->getBlock(S);
299
300 return nullptr;
301}
302
303static const LocationContext *
306 const LocationContext *ParentLC = LC->getParent();
307 assert(ParentLC && "We don't start analysis from autosynthesized code");
308 while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
309 LC = ParentLC;
310 ParentLC = LC->getParent();
311 assert(ParentLC && "We don't start analysis from autosynthesized code");
312 }
313 return LC;
314}
315
317 // We cannot place diagnostics on autosynthesized code.
318 // Put them onto the call site through which we jumped into autosynthesized
319 // code for the first time.
322 // It must be a stack frame because we only autosynthesize functions.
324 ->getCallSite();
325 }
326 // Otherwise, see if the node's program point directly points to a statement.
327 // FIXME: Refactor into a ProgramPoint method?
329 if (auto SP = P.getAs<StmtPoint>())
330 return SP->getStmt();
331 if (auto BE = P.getAs<BlockEdge>())
332 return BE->getSrc()->getTerminatorStmt();
333 if (auto CE = P.getAs<CallEnter>())
334 return CE->getCallExpr();
335 if (auto CEE = P.getAs<CallExitEnd>())
336 return CEE->getCalleeContext()->getCallSite();
337 if (auto PIPP = P.getAs<PostInitializer>())
338 return PIPP->getInitializer()->getInit();
339 if (auto CEB = P.getAs<CallExitBegin>())
340 return CEB->getReturnStmt();
341 if (auto FEP = P.getAs<FunctionExitPoint>())
342 return FEP->getStmt();
343
344 return nullptr;
345}
346
348 for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
349 if (N->getLocation().isPurgeKind())
350 continue;
351 if (const Stmt *S = N->getStmtForDiagnostics()) {
352 // Check if the statement is '?' or '&&'/'||'. These are "merges",
353 // not actual statement points.
354 switch (S->getStmtClass()) {
355 case Stmt::ChooseExprClass:
356 case Stmt::BinaryConditionalOperatorClass:
357 case Stmt::ConditionalOperatorClass:
358 continue;
359 case Stmt::BinaryOperatorClass: {
360 BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
361 if (Op == BO_LAnd || Op == BO_LOr)
362 continue;
363 break;
364 }
365 default:
366 break;
367 }
368 // We found the statement, so return it.
369 return S;
370 }
371 }
372
373 return nullptr;
374}
375
377 for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
378 if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(S))
379 return S;
380
381 return nullptr;
382}
383
385 if (const Stmt *S = getStmtForDiagnostics())
386 return S;
387
389}
390
392 ProgramStateRef State,
393 bool IsSink,
394 bool* IsNew) {
395 // Profile 'State' to determine if we already have an existing node.
396 llvm::FoldingSetNodeID profile;
397 void *InsertPos = nullptr;
398
399 NodeTy::Profile(profile, L, State, IsSink);
400 NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
401
402 if (!V) {
403 if (!FreeNodes.empty()) {
404 V = FreeNodes.back();
405 FreeNodes.pop_back();
406 }
407 else {
408 // Allocate a new node.
409 V = getAllocator().Allocate<NodeTy>();
410 }
411
412 ++NumNodes;
413 new (V) NodeTy(L, State, NumNodes, IsSink);
414
416 ChangedNodes.push_back(V);
417
418 // Insert the node into the node set and return it.
419 Nodes.InsertNode(V, InsertPos);
420
421 if (IsNew) *IsNew = true;
422 }
423 else
424 if (IsNew) *IsNew = false;
425
426 return V;
427}
428
430 ProgramStateRef State,
431 int64_t Id,
432 bool IsSink) {
433 NodeTy *V = getAllocator().Allocate<NodeTy>();
434 new (V) NodeTy(L, State, Id, IsSink);
435 return V;
436}
437
438std::unique_ptr<ExplodedGraph>
440 InterExplodedGraphMap *ForwardMap,
441 InterExplodedGraphMap *InverseMap) const {
442 // FIXME: The two-pass algorithm of this function (which was introduced in
443 // 2008) is terribly overcomplicated and should be replaced by a single
444 // (backward) pass.
445
446 if (Nodes.empty())
447 return nullptr;
448
449 using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
450 Pass1Ty Pass1;
451
452 using Pass2Ty = InterExplodedGraphMap;
453 InterExplodedGraphMap Pass2Scratch;
454 Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
455
457
458 // ===- Pass 1 (reverse DFS) -===
459 for (const auto Sink : Sinks)
460 if (Sink)
461 WL1.push_back(Sink);
462
463 // Process the first worklist until it is empty.
464 while (!WL1.empty()) {
465 const ExplodedNode *N = WL1.pop_back_val();
466
467 // Have we already visited this node? If so, continue to the next one.
468 if (!Pass1.insert(N).second)
469 continue;
470
471 // If this is the root enqueue it to the second worklist.
472 if (N->Preds.empty()) {
473 assert(N == getRoot() && "Found non-root node with no predecessors!");
474 WL2.push_back(N);
475 continue;
476 }
477
478 // Visit our predecessors and enqueue them.
479 WL1.append(N->Preds.begin(), N->Preds.end());
480 }
481
482 // We didn't hit the root? Return with a null pointer for the new graph.
483 if (WL2.empty())
484 return nullptr;
485
486 assert(WL2.size() == 1 && "There must be only one root!");
487
488 // Create an empty graph.
489 std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
490
491 // ===- Pass 2 (forward DFS to construct the new graph) -===
492 while (!WL2.empty()) {
493 const ExplodedNode *N = WL2.pop_back_val();
494
495 auto [Place, Inserted] = Pass2.try_emplace(N);
496
497 // Skip this node if we have already processed it.
498 if (!Inserted)
499 continue;
500
501 // Create the corresponding node in the new graph and record the mapping
502 // from the old node to the new node.
503 ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
504 N->getID(), N->isSink());
505 Place->second = NewN;
506
507 // Also record the reverse mapping from the new node to the old node.
508 if (InverseMap) (*InverseMap)[NewN] = N;
509
510 // If this node is the root, designate it as such in the graph.
511 if (N->Preds.empty()) {
512 assert(N == getRoot());
513 G->designateAsRoot(NewN);
514 }
515
516 // In the case that some of the intended predecessors of NewN have already
517 // been created, we should hook them up as predecessors.
518
519 // Walk through the predecessors of 'N' and hook up their corresponding
520 // nodes in the new graph (if any) to the freshly created node.
521 for (const ExplodedNode *Pred : N->Preds) {
522 Pass2Ty::iterator PI = Pass2.find(Pred);
523 if (PI == Pass2.end())
524 continue;
525
526 NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
527 }
528
529 // In the case that some of the intended successors of NewN have already
530 // been created, we should hook them up as successors. Otherwise, enqueue
531 // the new nodes from the original graph that should have nodes created
532 // in the new graph.
533 for (const ExplodedNode *Succ : N->Succs) {
534 Pass2Ty::iterator PI = Pass2.find(Succ);
535 if (PI != Pass2.end()) {
536 const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
537 continue;
538 }
539
540 // Enqueue nodes to the worklist that were marked during pass 1.
541 if (Pass1.count(Succ))
542 WL2.push_back(Succ);
543 }
544 }
545
546 return G;
547}
#define V(N, I)
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
BumpVector< ExplodedNode * > ExplodedNodeVector
static const LocationContext * findTopAutosynthesizedParentContext(const LocationContext *LC)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Represents a single basic block in a source-level CFG.
Definition CFG.h:605
const CFGBlock * getBlock(const Stmt *S) const
Returns the CFGBlock the specified Stmt* appears in.
Represents a point when we begin processing an inlined call.
Represents a point when we start the call exit sequence (for inlined call).
Represents a point when we finish the call exit sequence (for inlined call).
This represents one expression.
Definition Expr.h:112
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition Expr.h:284
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
const ParentMap & getParentMap() const
LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext * getAnalysisDeclContext() const
const LocationContext * getParent() const
It might return null.
bool isConsumedExpr(Expr *E) const
Represents a program point after a store evaluation.
Represents a program point just before an implicit call event.
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
const ProgramPointTag * getTag() const
bool isPurgeKind()
Is this a program point corresponding to purge/removal of dead symbols and bindings.
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type.
std::optional< T > getAs() const
Convert to the specified ProgramPoint type, returning std::nullopt if this ProgramPoint is not of the...
const LocationContext * getLocationContext() const
const Stmt * getStmt() const
Stmt - This represents one statement.
Definition Stmt.h:85
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy * > Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes.
BumpVectorContext & getNodeAllocator()
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
int64_t NumNodes
NumNodes - The number of nodes in the graph.
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called.
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.
llvm::BumpPtrAllocator & getAllocator()
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location, State) pair, where the 'Location' is a ProgramPoint in...
ExplodedNode * getRoot() const
Get the root node of the graph.
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don't store it for deduplication later.
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
NodeVector FreeNodes
A list of nodes that can be reused.
const CFGBlock * getCFGBlock() const
const ProgramStateRef & getState() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it's predecessor has only one...
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
ExplodedNode * getFirstSucc()
unsigned pred_size() const
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
ExplodedNode(const ProgramPoint &loc, ProgramStateRef state, int64_t Id, bool IsSink)
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node's execution path.
const LocationContext * getLocationContext() const
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
ExplodedNode * getFirstPred()
unsigned succ_size() const
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
U cast(CodeGen::Address addr)
Definition Address.h:327