clang 18.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/SetVector.h"
29#include "llvm/Support/Compiler.h"
30#include <cassert>
31#include <memory>
32#include <optional>
33#include <type_traits>
34#include <utility>
35#include <vector>
36
37namespace clang {
38namespace dataflow {
39class Logger;
40
41/// Skip past nodes that the CFG does not emit. These nodes are invisible to
42/// flow-sensitive analysis, and should be ignored as they will effectively not
43/// exist.
44///
45/// * `ParenExpr` - The CFG takes the operator precedence into account, but
46/// otherwise omits the node afterwards.
47///
48/// * `ExprWithCleanups` - The CFG will generate the appropriate calls to
49/// destructors and then omit the node.
50///
51const Expr &ignoreCFGOmittedNodes(const Expr &E);
52const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
53
54/// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic
55/// iteration order.
57
58/// Returns the set of all fields in the type.
60
61/// Returns whether `Fields` and `FieldLocs` contain the same fields.
62bool containsSameFields(const FieldSet &Fields,
63 const RecordStorageLocation::FieldToLoc &FieldLocs);
64
66 /// The maximum depth to analyze. A value of zero is equivalent to disabling
67 /// context-sensitive analysis entirely.
68 unsigned Depth = 2;
69};
70
71/// Owns objects that encompass the state of a program and stores context that
72/// is used during dataflow analysis.
74public:
75 struct Options {
76 /// Options for analyzing function bodies when present in the translation
77 /// unit, or empty to disable context-sensitive analysis. Note that this is
78 /// fundamentally limited: some constructs, such as recursion, are
79 /// explicitly unsupported.
80 std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
81
82 /// If provided, analysis details will be recorded here.
83 /// (This is always non-null within an AnalysisContext, the framework
84 /// provides a fallback no-op logger).
85 Logger *Log = nullptr;
86 };
87
88 /// Constructs a dataflow analysis context.
89 ///
90 /// Requirements:
91 ///
92 /// `S` must not be null.
93 DataflowAnalysisContext(std::unique_ptr<Solver> S,
94 Options Opts = Options{
95 /*ContextSensitiveOpts=*/std::nullopt,
96 /*Logger=*/nullptr});
98
99 /// Sets a callback that returns the names and types of the synthetic fields
100 /// to add to a `RecordStorageLocation` of a given type.
101 /// Typically, this is called from the constructor of a `DataflowAnalysis`
102 ///
103 /// To maintain the invariant that all `RecordStorageLocation`s of a given
104 /// type have the same fields:
105 /// * The callback must always return the same result for a given type
106 /// * `setSyntheticFieldCallback()` must be called before any
107 // `RecordStorageLocation`s are created.
109 std::function<llvm::StringMap<QualType>(QualType)> CB) {
110 assert(!RecordStorageLocationCreated);
111 SyntheticFieldCallback = CB;
112 }
113
114 /// Returns a new storage location appropriate for `Type`.
115 ///
116 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
118
119 /// Creates a `RecordStorageLocation` for the given type and with the given
120 /// fields.
121 ///
122 /// Requirements:
123 ///
124 /// `FieldLocs` must contain exactly the fields returned by
125 /// `getModeledFields(Type)`.
126 /// `SyntheticFields` must contain exactly the fields returned by
127 /// `getSyntheticFields(Type)`.
131
132 /// Returns a stable storage location for `D`.
134
135 /// Returns a stable storage location for `E`.
137
138 /// Returns a pointer value that represents a null pointer. Calls with
139 /// `PointeeType` that are canonically equivalent will return the same result.
140 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
142
143 /// Adds `Constraint` to current and future flow conditions in this context.
144 ///
145 /// Invariants must contain only flow-insensitive information, i.e. facts that
146 /// are true on all paths through the program.
147 /// Information can be added eagerly (when analysis begins), or lazily (e.g.
148 /// when values are first used). The analysis must be careful that the same
149 /// information is added regardless of which order blocks are analyzed in.
150 void addInvariant(const Formula &Constraint);
151
152 /// Adds `Constraint` to the flow condition identified by `Token`.
153 void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
154
155 /// Creates a new flow condition with the same constraints as the flow
156 /// condition identified by `Token` and returns its token.
158
159 /// Creates a new flow condition that represents the disjunction of the flow
160 /// conditions identified by `FirstToken` and `SecondToken`, and returns its
161 /// token.
162 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
163
164 /// Returns true if the constraints of the flow condition identified by
165 /// `Token` imply that `F` is true.
166 /// Returns false if the flow condition does not imply `F` or if the solver
167 /// times out.
168 bool flowConditionImplies(Atom Token, const Formula &F);
169
170 /// Returns true if the constraints of the flow condition identified by
171 /// `Token` still allow `F` to be true.
172 /// Returns false if the flow condition implies that `F` is false or if the
173 /// solver times out.
174 bool flowConditionAllows(Atom Token, const Formula &F);
175
176 /// Returns true if `Val1` is equivalent to `Val2`.
177 /// Note: This function doesn't take into account constraints on `Val1` and
178 /// `Val2` imposed by the flow condition.
179 bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
180
181 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
182 llvm::raw_ostream &OS = llvm::dbgs());
183
184 /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise,
185 /// returns null.
187
188 const Options &getOptions() { return Opts; }
189
190 Arena &arena() { return *A; }
191
192 /// Returns the outcome of satisfiability checking on `Constraints`.
193 ///
194 /// Flow conditions are not incorporated, so they may need to be manually
195 /// included in `Constraints` to provide contextually-accurate results, e.g.
196 /// if any definitions or relationships of the values in `Constraints` have
197 /// been stored in flow conditions.
198 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
199
200 /// Returns the fields of `Type`, limited to the set of fields modeled by this
201 /// context.
203
204 /// Returns the names and types of the synthetic fields for the given record
205 /// type.
206 llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
207 assert(Type->isRecordType());
208 if (SyntheticFieldCallback)
209 return SyntheticFieldCallback(Type);
210 return {};
211 }
212
213private:
214 friend class Environment;
215
216 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
217 static QualType getEmptyKey() {
218 // Allow a NULL `QualType` by using a different value as the empty key.
219 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
220 }
221
222 using DenseMapInfo::getHashValue;
223 using DenseMapInfo::getTombstoneKey;
224 using DenseMapInfo::isEqual;
225 };
226
227 // Extends the set of modeled field declarations.
228 void addModeledFields(const FieldSet &Fields);
229
230 /// Adds all constraints of the flow condition identified by `Token` and all
231 /// of its transitive dependencies to `Constraints`.
232 void
233 addTransitiveFlowConditionConstraints(Atom Token,
234 llvm::SetVector<const Formula *> &Out);
235
236 /// Returns true if the solver is able to prove that there is a satisfying
237 /// assignment for `Constraints`.
238 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
239 return querySolver(std::move(Constraints)).getStatus() ==
241 }
242
243 /// Returns true if the solver is able to prove that there is no satisfying
244 /// assignment for `Constraints`
245 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
246 return querySolver(std::move(Constraints)).getStatus() ==
248 }
249
250 std::unique_ptr<Solver> S;
251 std::unique_ptr<Arena> A;
252
253 // Maps from program declarations and statements to storage locations that are
254 // assigned to them. These assignments are global (aggregated across all basic
255 // blocks) and are used to produce stable storage locations when the same
256 // basic blocks are evaluated multiple times. The storage locations that are
257 // in scope for a particular basic block are stored in `Environment`.
258 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
259 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
260
261 // Null pointer values, keyed by the canonical pointee type.
262 //
263 // FIXME: The pointer values are indexed by the pointee types which are
264 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
265 // creating a type-independent `NullPointerValue` without a `PointeeLoc`
266 // field.
267 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
268 NullPointerVals;
269
270 Options Opts;
271
272 // Flow conditions are tracked symbolically: each unique flow condition is
273 // associated with a fresh symbolic variable (token), bound to the clause that
274 // defines the flow condition. Conceptually, each binding corresponds to an
275 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
276 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
277 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
278 // the `FlowConditionConstraints` map, keyed by the token of the flow
279 // condition.
280 //
281 // Flow conditions depend on other flow conditions if they are created using
282 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
283 // dependencies is stored in the `FlowConditionDeps` map.
284 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
285 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
286 const Formula *Invariant = nullptr;
287
288 llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
289
290 // Fields modeled by environments covered by this context.
291 FieldSet ModeledFields;
292
293 std::unique_ptr<Logger> LogOwner; // If created via flags.
294
295 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
296
297 /// Has any `RecordStorageLocation` been created yet?
298 bool RecordStorageLocationCreated = false;
299};
300
301} // namespace dataflow
302} // namespace clang
303
304#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
Allows QualTypes to be sorted and hence used in maps and sets.
This represents one expression.
Definition: Expr.h:110
Represents a function declaration or definition.
Definition: Decl.h:1957
A (possibly-)qualified type.
Definition: Type.h:736
static QualType getFromOpaquePtr(const void *Ptr)
Definition: Type.h:785
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
The base class of the type hierarchy.
Definition: Type.h:1602
bool isRecordType() const
Definition: Type.h:7123
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:704
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Holds CFG and other derived context that is needed to perform dataflow analysis.
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
const ControlFlowContext * getControlFlowContext(const FunctionDecl *F)
Returns the ControlFlowContext registered for F, if any.
Holds the state of the program (store and heap) at a given program point.
A logger is notified as the analysis progresses.
Definition: Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:168
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition: Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.