clang  10.0.0svn
ExplodedGraph.cpp
Go to the documentation of this file.
1 //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the template classes ExplodedNode and ExplodedGraph,
10 // which represent a path-sensitive, intra-procedural "exploded graph."
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprObjC.h"
17 #include "clang/AST/ParentMap.h"
18 #include "clang/AST/Stmt.h"
22 #include "clang/Basic/LLVM.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/FoldingSet.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/PointerUnion.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/Support/Casting.h"
32 #include <cassert>
33 #include <memory>
34 
35 using namespace clang;
36 using namespace ento;
37 
38 //===----------------------------------------------------------------------===//
39 // Cleanup.
40 //===----------------------------------------------------------------------===//
41 
43 
45 
46 //===----------------------------------------------------------------------===//
47 // Node reclamation.
48 //===----------------------------------------------------------------------===//
49 
51  if (!Ex->isLValue())
52  return false;
53  return isa<DeclRefExpr>(Ex) ||
54  isa<MemberExpr>(Ex) ||
55  isa<ObjCIvarRefExpr>(Ex);
56 }
57 
58 bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
59  // First, we only consider nodes for reclamation of the following
60  // conditions apply:
61  //
62  // (1) 1 predecessor (that has one successor)
63  // (2) 1 successor (that has one predecessor)
64  //
65  // If a node has no successor it is on the "frontier", while a node
66  // with no predecessor is a root.
67  //
68  // After these prerequisites, we discard all "filler" nodes that
69  // are used only for intermediate processing, and are not essential
70  // for analyzer history:
71  //
72  // (a) PreStmtPurgeDeadSymbols
73  //
74  // We then discard all other nodes where *all* of the following conditions
75  // apply:
76  //
77  // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
78  // (4) There is no 'tag' for the ProgramPoint.
79  // (5) The 'store' is the same as the predecessor.
80  // (6) The 'GDM' is the same as the predecessor.
81  // (7) The LocationContext is the same as the predecessor.
82  // (8) Expressions that are *not* lvalue expressions.
83  // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
84  // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
85  // PreImplicitCall (so that we would be able to find it when retrying a
86  // call with no inlining).
87  // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
88 
89  // Conditions 1 and 2.
90  if (node->pred_size() != 1 || node->succ_size() != 1)
91  return false;
92 
93  const ExplodedNode *pred = *(node->pred_begin());
94  if (pred->succ_size() != 1)
95  return false;
96 
97  const ExplodedNode *succ = *(node->succ_begin());
98  if (succ->pred_size() != 1)
99  return false;
100 
101  // Now reclaim any nodes that are (by definition) not essential to
102  // analysis history and are not consulted by any client code.
103  ProgramPoint progPoint = node->getLocation();
104  if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
105  return !progPoint.getTag();
106 
107  // Condition 3.
108  if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
109  return false;
110 
111  // Condition 4.
112  if (progPoint.getTag())
113  return false;
114 
115  // Conditions 5, 6, and 7.
116  ProgramStateRef state = node->getState();
117  ProgramStateRef pred_state = pred->getState();
118  if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
119  progPoint.getLocationContext() != pred->getLocationContext())
120  return false;
121 
122  // All further checks require expressions. As per #3, we know that we have
123  // a PostStmt.
124  const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
125  if (!Ex)
126  return false;
127 
128  // Condition 8.
129  // Do not collect nodes for "interesting" lvalue expressions since they are
130  // used extensively for generating path diagnostics.
131  if (isInterestingLValueExpr(Ex))
132  return false;
133 
134  // Condition 9.
135  // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
136  // diagnostic generation; specifically, so that we could anchor arrows
137  // pointing to the beginning of statements (as written in code).
138  const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
139  if (!PM.isConsumedExpr(Ex))
140  return false;
141 
142  // Condition 10.
143  const ProgramPoint SuccLoc = succ->getLocation();
144  if (Optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
145  if (CallEvent::isCallStmt(SP->getStmt()))
146  return false;
147 
148  // Condition 10, continuation.
149  if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
150  return false;
151 
152  return true;
153 }
154 
155 void ExplodedGraph::collectNode(ExplodedNode *node) {
156  // Removing a node means:
157  // (a) changing the predecessors successor to the successor of this node
158  // (b) changing the successors predecessor to the predecessor of this node
159  // (c) Putting 'node' onto freeNodes.
160  assert(node->pred_size() == 1 || node->succ_size() == 1);
161  ExplodedNode *pred = *(node->pred_begin());
162  ExplodedNode *succ = *(node->succ_begin());
163  pred->replaceSuccessor(succ);
164  succ->replacePredecessor(pred);
165  FreeNodes.push_back(node);
166  Nodes.RemoveNode(node);
167  --NumNodes;
168  node->~ExplodedNode();
169 }
170 
172  if (ChangedNodes.empty())
173  return;
174 
175  // Only periodically reclaim nodes so that we can build up a set of
176  // nodes that meet the reclamation criteria. Freshly created nodes
177  // by definition have no successor, and thus cannot be reclaimed (see below).
178  assert(ReclaimCounter > 0);
179  if (--ReclaimCounter != 0)
180  return;
182 
183  for (const auto node : ChangedNodes)
184  if (shouldCollect(node))
185  collectNode(node);
186  ChangedNodes.clear();
187 }
188 
189 //===----------------------------------------------------------------------===//
190 // ExplodedNode.
191 //===----------------------------------------------------------------------===//
192 
193 // An NodeGroup's storage type is actually very much like a TinyPtrVector:
194 // it can be either a pointer to a single ExplodedNode, or a pointer to a
195 // BumpVector allocated with the ExplodedGraph's allocator. This allows the
196 // common case of single-node NodeGroups to be implemented with no extra memory.
197 //
198 // Consequently, each of the NodeGroup methods have up to four cases to handle:
199 // 1. The flag is set and this group does not actually contain any nodes.
200 // 2. The group is empty, in which case the storage value is null.
201 // 3. The group contains a single node.
202 // 4. The group contains more than one node.
204 using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
205 
207  assert(!V->isSink());
208  Preds.addNode(V, G);
209  V->Succs.addNode(this, G);
210 }
211 
212 void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
213  assert(!getFlag());
214 
215  GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
216  assert(Storage.is<ExplodedNode *>());
217  Storage = node;
218  assert(Storage.is<ExplodedNode *>());
219 }
220 
221 void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
222  assert(!getFlag());
223 
224  GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
225  if (Storage.isNull()) {
226  Storage = N;
227  assert(Storage.is<ExplodedNode *>());
228  return;
229  }
230 
231  ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>();
232 
233  if (!V) {
234  // Switch from single-node to multi-node representation.
235  ExplodedNode *Old = Storage.get<ExplodedNode *>();
236 
238  V = G.getAllocator().Allocate<ExplodedNodeVector>();
239  new (V) ExplodedNodeVector(Ctx, 4);
240  V->push_back(Old, Ctx);
241 
242  Storage = V;
243  assert(!getFlag());
244  assert(Storage.is<ExplodedNodeVector *>());
245  }
246 
247  V->push_back(N, G.getNodeAllocator());
248 }
249 
250 unsigned ExplodedNode::NodeGroup::size() const {
251  if (getFlag())
252  return 0;
253 
254  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
255  if (Storage.isNull())
256  return 0;
257  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
258  return V->size();
259  return 1;
260 }
261 
262 ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
263  if (getFlag())
264  return nullptr;
265 
266  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
267  if (Storage.isNull())
268  return nullptr;
269  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
270  return V->begin();
271  return Storage.getAddrOfPtr1();
272 }
273 
274 ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
275  if (getFlag())
276  return nullptr;
277 
278  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
279  if (Storage.isNull())
280  return nullptr;
281  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
282  return V->end();
283  return Storage.getAddrOfPtr1() + 1;
284 }
285 
287  return pred_size() == 1 && succ_size() == 1 &&
288  getFirstPred()->getState()->getID() == getState()->getID() &&
289  getFirstPred()->succ_size() == 1;
290 }
291 
293  ProgramPoint P = getLocation();
294  if (auto BEP = P.getAs<BlockEntrance>())
295  return BEP->getBlock();
296 
297  // Find the node's current statement in the CFG.
298  // FIXME: getStmtForDiagnostics() does nasty things in order to provide
299  // a valid statement for body farms, do we need this behavior here?
300  if (const Stmt *S = getStmtForDiagnostics())
301  return getLocationContext()
302  ->getAnalysisDeclContext()
303  ->getCFGStmtMap()
304  ->getBlock(S);
305 
306  return nullptr;
307 }
308 
309 static const LocationContext *
312  const LocationContext *ParentLC = LC->getParent();
313  assert(ParentLC && "We don't start analysis from autosynthesized code");
314  while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
315  LC = ParentLC;
316  ParentLC = LC->getParent();
317  assert(ParentLC && "We don't start analysis from autosynthesized code");
318  }
319  return LC;
320 }
321 
323  // We cannot place diagnostics on autosynthesized code.
324  // Put them onto the call site through which we jumped into autosynthesized
325  // code for the first time.
326  const LocationContext *LC = getLocationContext();
328  // It must be a stack frame because we only autosynthesize functions.
329  return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
330  ->getCallSite();
331  }
332  // Otherwise, see if the node's program point directly points to a statement.
333  // FIXME: Refactor into a ProgramPoint method?
334  ProgramPoint P = getLocation();
335  if (auto SP = P.getAs<StmtPoint>())
336  return SP->getStmt();
337  if (auto BE = P.getAs<BlockEdge>())
338  return BE->getSrc()->getTerminatorStmt();
339  if (auto CE = P.getAs<CallEnter>())
340  return CE->getCallExpr();
341  if (auto CEE = P.getAs<CallExitEnd>())
342  return CEE->getCalleeContext()->getCallSite();
343  if (auto PIPP = P.getAs<PostInitializer>())
344  return PIPP->getInitializer()->getInit();
345  if (auto CEB = P.getAs<CallExitBegin>())
346  return CEB->getReturnStmt();
347  if (auto FEP = P.getAs<FunctionExitPoint>())
348  return FEP->getStmt();
349 
350  return nullptr;
351 }
352 
354  for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
355  if (const Stmt *S = N->getStmtForDiagnostics()) {
356  // Check if the statement is '?' or '&&'/'||'. These are "merges",
357  // not actual statement points.
358  switch (S->getStmtClass()) {
359  case Stmt::ChooseExprClass:
360  case Stmt::BinaryConditionalOperatorClass:
361  case Stmt::ConditionalOperatorClass:
362  continue;
363  case Stmt::BinaryOperatorClass: {
364  BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
365  if (Op == BO_LAnd || Op == BO_LOr)
366  continue;
367  break;
368  }
369  default:
370  break;
371  }
372  // We found the statement, so return it.
373  return S;
374  }
375  }
376 
377  return nullptr;
378 }
379 
381  for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
382  if (const Stmt *S = N->getStmtForDiagnostics())
383  return S;
384 
385  return nullptr;
386 }
387 
389  if (const Stmt *S = getStmtForDiagnostics())
390  return S;
391 
392  return getPreviousStmtForDiagnostics();
393 }
394 
397  bool IsSink,
398  bool* IsNew) {
399  // Profile 'State' to determine if we already have an existing node.
400  llvm::FoldingSetNodeID profile;
401  void *InsertPos = nullptr;
402 
403  NodeTy::Profile(profile, L, State, IsSink);
404  NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
405 
406  if (!V) {
407  if (!FreeNodes.empty()) {
408  V = FreeNodes.back();
409  FreeNodes.pop_back();
410  }
411  else {
412  // Allocate a new node.
413  V = (NodeTy*) getAllocator().Allocate<NodeTy>();
414  }
415 
416  ++NumNodes;
417  new (V) NodeTy(L, State, NumNodes, IsSink);
418 
420  ChangedNodes.push_back(V);
421 
422  // Insert the node into the node set and return it.
423  Nodes.InsertNode(V, InsertPos);
424 
425  if (IsNew) *IsNew = true;
426  }
427  else
428  if (IsNew) *IsNew = false;
429 
430  return V;
431 }
432 
435  int64_t Id,
436  bool IsSink) {
437  NodeTy *V = (NodeTy *) getAllocator().Allocate<NodeTy>();
438  new (V) NodeTy(L, State, Id, IsSink);
439  return V;
440 }
441 
442 std::unique_ptr<ExplodedGraph>
444  InterExplodedGraphMap *ForwardMap,
445  InterExplodedGraphMap *InverseMap) const {
446  if (Nodes.empty())
447  return nullptr;
448 
449  using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
450  Pass1Ty Pass1;
451 
452  using Pass2Ty = InterExplodedGraphMap;
453  InterExplodedGraphMap Pass2Scratch;
454  Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
455 
457 
458  // ===- Pass 1 (reverse DFS) -===
459  for (const auto Sink : Sinks)
460  if (Sink)
461  WL1.push_back(Sink);
462 
463  // Process the first worklist until it is empty.
464  while (!WL1.empty()) {
465  const ExplodedNode *N = WL1.pop_back_val();
466 
467  // Have we already visited this node? If so, continue to the next one.
468  if (!Pass1.insert(N).second)
469  continue;
470 
471  // If this is a root enqueue it to the second worklist.
472  if (N->Preds.empty()) {
473  WL2.push_back(N);
474  continue;
475  }
476 
477  // Visit our predecessors and enqueue them.
478  WL1.append(N->Preds.begin(), N->Preds.end());
479  }
480 
481  // We didn't hit a root? Return with a null pointer for the new graph.
482  if (WL2.empty())
483  return nullptr;
484 
485  // Create an empty graph.
486  std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph();
487 
488  // ===- Pass 2 (forward DFS to construct the new graph) -===
489  while (!WL2.empty()) {
490  const ExplodedNode *N = WL2.pop_back_val();
491 
492  // Skip this node if we have already processed it.
493  if (Pass2.find(N) != Pass2.end())
494  continue;
495 
496  // Create the corresponding node in the new graph and record the mapping
497  // from the old node to the new node.
498  ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
499  N->getID(), N->isSink());
500  Pass2[N] = NewN;
501 
502  // Also record the reverse mapping from the new node to the old node.
503  if (InverseMap) (*InverseMap)[NewN] = N;
504 
505  // If this node is a root, designate it as such in the graph.
506  if (N->Preds.empty())
507  G->addRoot(NewN);
508 
509  // In the case that some of the intended predecessors of NewN have already
510  // been created, we should hook them up as predecessors.
511 
512  // Walk through the predecessors of 'N' and hook up their corresponding
513  // nodes in the new graph (if any) to the freshly created node.
514  for (ExplodedNode::pred_iterator I = N->Preds.begin(), E = N->Preds.end();
515  I != E; ++I) {
516  Pass2Ty::iterator PI = Pass2.find(*I);
517  if (PI == Pass2.end())
518  continue;
519 
520  NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
521  }
522 
523  // In the case that some of the intended successors of NewN have already
524  // been created, we should hook them up as successors. Otherwise, enqueue
525  // the new nodes from the original graph that should have nodes created
526  // in the new graph.
527  for (ExplodedNode::succ_iterator I = N->Succs.begin(), E = N->Succs.end();
528  I != E; ++I) {
529  Pass2Ty::iterator PI = Pass2.find(*I);
530  if (PI != Pass2.end()) {
531  const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
532  continue;
533  }
534 
535  // Enqueue nodes to the worklist that were marked during pass 1.
536  if (Pass1.count(*I))
537  WL2.push_back(*I);
538  }
539  }
540 
541  return G;
542 }
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
const Stmt * getStmtForDiagnostics() const
If the node&#39;s program point corresponds to a statement, retrieve that statement.
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
Stmt - This represents one statement.
Definition: Stmt.h:66
NodeVector FreeNodes
A list of nodes that can be reused.
Represents a point when we begin processing an inlined call.
Definition: ProgramPoint.h:630
StringRef P
const CFGBlock * getCFGBlock() const
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
Definition: ProgramPoint.h:473
Represents a program point just before an implicit call event.
Definition: ProgramPoint.h:583
const ProgramStateRef & getState() const
bool isConsumedExpr(Expr *E) const
Definition: ParentMap.cpp:171
int64_t NumNodes
NumNodes - The number of nodes in the graph.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
LineState State
succ_iterator succ_begin()
Represents a program point after a store evaluation.
Definition: ProgramPoint.h:431
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy *> Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes...
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final * state
Represents a point when we start the call exit sequence (for inlined call).
Definition: ProgramPoint.h:668
BinaryOperatorKind
unsigned succ_size() const
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
const LocationContext * getLocationContext() const
std::unique_ptr< ExplodedGraph > MakeEmptyGraph() const
const LocationContext * getParent() const
ExplodedNode * getFirstPred()
unsigned pred_size() const
Represents a single basic block in a source-level CFG.
Definition: CFG.h:576
Represents a point when we finish the call exit sequence (for inlined call).
Definition: ProgramPoint.h:688
This represents one expression.
Definition: Expr.h:108
int Id
Definition: ASTDiff.cpp:190
ExplodedNode * getFirstSucc()
#define V(N, I)
Definition: ASTContext.h:2921
ExplodedNode *const * succ_iterator
static const LocationContext * findTopAutosynthesizedParentContext(const LocationContext *LC)
bool isBodyAutosynthesized() const
Checks if the body of the Decl is generated by the BodyFarm.
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don&#39;t store it for deduplication later.
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location,State) pair, where the &#39;Location&#39; is a ProgramPoint in ...
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type...
Definition: ProgramPoint.h:140
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
Definition: CallEvent.cpp:456
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language...
Definition: Expr.h:258
void push_back(const_reference Elt, BumpVectorContext &C)
Definition: BumpVector.h:159
Dataflow Directional Tag Classes.
BumpVectorContext & getNodeAllocator()
BumpVector< ExplodedNode * > ExplodedNodeVector
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called. ...
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it&#39;s predecessor has only one...
const ProgramPointTag * getTag() const
Definition: ProgramPoint.h:177
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
const LocationContext * getLocationContext() const
Definition: ProgramPoint.h:179
const ParentMap & getParentMap() const
pred_iterator pred_begin()
llvm::BumpPtrAllocator & getAllocator()
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node&#39;s execution path.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon (for non-expression statements).
Optional< T > getAs() const
Convert to the specified ProgramPoint type, returning None if this ProgramPoint is not of the desired...
Definition: ProgramPoint.h:151
AnalysisDeclContext * getAnalysisDeclContext() const
ExplodedNode *const * pred_iterator
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.