clang 23.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
20#include "clang/AST/Type.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/DenseSet.h"
30#include "llvm/ADT/SetVector.h"
31#include "llvm/Support/Compiler.h"
32#include <cassert>
33#include <memory>
34#include <optional>
35
36namespace clang {
37namespace dataflow {
38class Logger;
39
41 /// The maximum depth to analyze. A value of zero is equivalent to disabling
42 /// context-sensitive analysis entirely.
43 unsigned Depth = 2;
44};
45
46/// A simple representation of essential elements of the logical context used in
47/// environments. Designed for import/export for applications requiring
48/// serialization support.
50 // Global invariant that applies for all definitions in the context.
52 // Flow-condition tokens in the context.
53 llvm::DenseMap<Atom, const Formula *> TokenDefs;
54 // Dependencies between flow-condition definitions.
55 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> TokenDeps;
56};
57
58/// Owns objects that encompass the state of a program and stores context that
59/// is used during dataflow analysis.
61public:
62 struct Options {
63 /// Options for analyzing function bodies when present in the translation
64 /// unit, or empty to disable context-sensitive analysis. Note that this is
65 /// fundamentally limited: some constructs, such as recursion, are
66 /// explicitly unsupported.
67 std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
68
69 /// If provided, analysis details will be recorded here.
70 /// (This is always non-null within an AnalysisContext, the framework
71 /// provides a fallback no-op logger).
72 Logger *Log = nullptr;
73 };
74
75 /// Constructs a dataflow analysis context.
76 ///
77 /// Requirements:
78 ///
79 /// `S` must not be null.
80 DataflowAnalysisContext(std::unique_ptr<Solver> S,
81 Options Opts = Options{
82 /*ContextSensitiveOpts=*/std::nullopt,
83 /*Logger=*/nullptr})
84 : DataflowAnalysisContext(*S, std::move(S), Opts) {}
85
86 /// Constructs a dataflow analysis context.
87 ///
88 /// Requirements:
89 ///
90 /// `S` must outlive the `DataflowAnalysisContext`.
92 /*ContextSensitiveOpts=*/std::nullopt,
93 /*Logger=*/nullptr})
94 : DataflowAnalysisContext(S, nullptr, Opts) {}
95
97
98 /// Sets a callback that returns the names and types of the synthetic fields
99 /// to add to a `RecordStorageLocation` of a given type.
100 /// Typically, this is called from the constructor of a `DataflowAnalysis`
101 ///
102 /// The field types returned by the callback may not have reference type.
103 ///
104 /// To maintain the invariant that all `RecordStorageLocation`s of a given
105 /// type have the same fields:
106 /// * The callback must always return the same result for a given type
107 /// * `setSyntheticFieldCallback()` must be called before any
108 // `RecordStorageLocation`s are created.
110 std::function<llvm::StringMap<QualType>(QualType)> CB) {
111 assert(!RecordStorageLocationCreated);
112 SyntheticFieldCallback = CB;
113 }
114
115 /// Returns a new storage location appropriate for `Type`.
116 ///
117 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
119
120 /// Creates a `RecordStorageLocation` for the given type and with the given
121 /// fields.
122 ///
123 /// Requirements:
124 ///
125 /// `FieldLocs` must contain exactly the fields returned by
126 /// `getModeledFields(Type)`.
127 /// `SyntheticFields` must contain exactly the fields returned by
128 /// `getSyntheticFields(Type)`.
132
133 /// Returns a stable storage location for `D`.
135
136 /// Returns a stable storage location for `E`.
138
139 /// Returns a pointer value that represents a null pointer. Calls with
140 /// `PointeeType` that are canonically equivalent will return the same result.
141 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
143
144 /// Adds `Constraint` to current and future flow conditions in this context.
145 ///
146 /// Invariants must contain only flow-insensitive information, i.e. facts that
147 /// are true on all paths through the program.
148 /// Information can be added eagerly (when analysis begins), or lazily (e.g.
149 /// when values are first used). The analysis must be careful that the same
150 /// information is added regardless of which order blocks are analyzed in.
151 void addInvariant(const Formula &Constraint);
152
153 /// Adds `Constraint` to the flow condition identified by `Token`.
154 void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
155
156 /// Adds `Deps` to the dependencies of the flow condition identified by
157 /// `Token`. Intended for use in deserializing contexts. The formula alone
158 /// doesn't have enough information to indicate its deps.
159 void addFlowConditionDeps(Atom Token, const llvm::DenseSet<Atom> &Deps) {
160 // Avoid creating an entry for `Token` with an empty set.
161 if (!Deps.empty())
162 FlowConditionDeps[Token].insert(Deps.begin(), Deps.end());
163 }
164
165 /// Creates a new flow condition with the same constraints as the flow
166 /// condition identified by `Token` and returns its token.
168
169 /// Creates a new flow condition that represents the disjunction of the flow
170 /// conditions identified by `FirstToken` and `SecondToken`, and returns its
171 /// token.
172 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
173
174 /// Returns true if the constraints of the flow condition identified by
175 /// `Token` imply that `F` is true.
176 /// Returns false if the flow condition does not imply `F` or if the solver
177 /// times out.
178 bool flowConditionImplies(Atom Token, const Formula &F);
179
180 /// Returns true if the constraints of the flow condition identified by
181 /// `Token` still allow `F` to be true.
182 /// Returns false if the flow condition implies that `F` is false or if the
183 /// solver times out.
184 bool flowConditionAllows(Atom Token, const Formula &F);
185
186 /// Returns true if `Val1` is equivalent to `Val2`.
187 /// Note: This function doesn't take into account constraints on `Val1` and
188 /// `Val2` imposed by the flow condition.
189 bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
190
191 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
192 llvm::raw_ostream &OS = llvm::dbgs());
193
194 /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
195 /// returns null.
196 const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
197
198 const Options &getOptions() { return Opts; }
199
200 Arena &arena() { return *A; }
201
202 /// Returns the outcome of satisfiability checking on `Constraints`.
203 ///
204 /// Flow conditions are not incorporated, so they may need to be manually
205 /// included in `Constraints` to provide contextually-accurate results, e.g.
206 /// if any definitions or relationships of the values in `Constraints` have
207 /// been stored in flow conditions.
208 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
209
210 /// Returns the fields of `Type`, limited to the set of fields modeled by this
211 /// context. The returned reference is valid for the lifetime of the context,
212 /// or until `addModeledFields()` is called.
214
215 /// Returns the names and types of the synthetic fields for the given record
216 /// type.
217 llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
218 assert(Type->isRecordType());
219 if (SyntheticFieldCallback) {
220 llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
221 // Synthetic fields are not allowed to have reference type.
222 assert([&Result] {
223 for (const auto &Entry : Result)
224 if (Entry.getValue()->isReferenceType())
225 return false;
226 return true;
227 }());
228 return Result;
229 }
230 return {};
231 }
232
233 /// Export the logical-context portions of `AC`, limited to the given target
234 /// flow-condition tokens.
236 exportLogicalContext(llvm::DenseSet<dataflow::Atom> TargetTokens) const;
237
238 /// Initializes this context's "logical" components with `LC`.
240
241private:
242 friend class Environment;
243
244 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
245 static QualType getEmptyKey() {
246 // Allow a NULL `QualType` by using a different value as the empty key.
247 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
248 }
249
250 using DenseMapInfo::getHashValue;
251 using DenseMapInfo::getTombstoneKey;
252 using DenseMapInfo::isEqual;
253 };
254
255 /// `S` is the solver to use. `OwnedSolver` may be:
256 /// * Null (in which case `S` is non-onwed and must outlive this object), or
257 /// * Non-null (in which case it must refer to `S`, and the
258 /// `DataflowAnalysisContext will take ownership of `OwnedSolver`).
259 DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver,
260 Options Opts);
261
262 /// Computes the transitive closure of dependencies of (flow-condition)
263 /// `Tokens`. That is, the set of flow-condition tokens reachable from
264 /// `Tokens` in the dependency graph.
265 llvm::DenseSet<Atom> collectDependencies(llvm::DenseSet<Atom> Tokens) const;
266
267 /// Computes and returns the fields of `Type`, limited to the set of fields
268 /// modeled by this context.
269 FieldSet computeModeledFields(QualType Type);
270
271 /// Extends the set of modeled field declarations.
272 void addModeledFields(const FieldSet &Fields);
273
274 /// Adds all constraints of the flow condition identified by `Token` and all
275 /// of its transitive dependencies to `Constraints`.
276 void
277 addTransitiveFlowConditionConstraints(Atom Token,
278 llvm::SetVector<const Formula *> &Out);
279
280 /// Returns true if the solver is able to prove that there is a satisfying
281 /// assignment for `Constraints`.
282 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
283 return querySolver(std::move(Constraints)).getStatus() ==
285 }
286
287 /// Returns true if the solver is able to prove that there is no satisfying
288 /// assignment for `Constraints`
289 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
290 return querySolver(std::move(Constraints)).getStatus() ==
292 }
293
294 Solver &S;
295 std::unique_ptr<Solver> OwnedSolver;
296 std::unique_ptr<Arena> A;
297
298 // Maps from program declarations and statements to storage locations that are
299 // assigned to them. These assignments are global (aggregated across all basic
300 // blocks) and are used to produce stable storage locations when the same
301 // basic blocks are evaluated multiple times. The storage locations that are
302 // in scope for a particular basic block are stored in `Environment`.
303 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
304 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
305
306 // Null pointer values, keyed by the canonical pointee type.
307 //
308 // FIXME: The pointer values are indexed by the pointee types which are
309 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
310 // creating a type-independent `NullPointerValue` without a `PointeeLoc`
311 // field.
312 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
313 NullPointerVals;
314
315 Options Opts;
316
317 // Flow conditions are tracked symbolically: each unique flow condition is
318 // associated with a fresh symbolic variable (token), bound to the clause that
319 // defines the flow condition. Conceptually, each binding corresponds to an
320 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
321 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
322 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
323 // the `FlowConditionConstraints` map, keyed by the token of the flow
324 // condition.
325 //
326 // Flow conditions depend on other flow conditions if they are created using
327 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
328 // dependencies is stored in the `FlowConditionDeps` map.
329 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
330 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
331 const Formula *Invariant = nullptr;
332
333 llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
334
335 // Fields (from any record Type) modeled by environments using this context.
336 // The set may only contain fields that are referenced in the scope of
337 // the environments (but it is up to the environment what is relevant to
338 // model).
339 FieldSet ModeledFields;
340
341 // A lazily-computed and cached version of ModeledFields that is split by
342 // record Type.
343 llvm::DenseMap<QualType, std::unique_ptr<FieldSet>> CachedModeledFields;
344
345 std::unique_ptr<Logger> LogOwner; // If created via flags.
346
347 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
348
349 /// Has any `RecordStorageLocation` been created yet?
350 bool RecordStorageLocationCreated = false;
351};
352
353} // namespace dataflow
354} // namespace clang
355
356#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
Allows QualTypes to be sorted and hence used in maps and sets.
C Language Family Type Representation.
This represents one expression.
Definition Expr.h:112
Represents a function declaration or definition.
Definition Decl.h:2000
A (possibly-)qualified type.
Definition TypeBase.h:937
static QualType getFromOpaquePtr(const void *Ptr)
Definition TypeBase.h:986
Token - This structure provides full information about a lexed token.
Definition Token.h:36
The base class of the type hierarchy.
Definition TypeBase.h:1839
bool isRecordType() const
Definition TypeBase.h:8752
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition AdornedCFG.h:47
The Arena owns the objects that model data within an analysis.
Definition Arena.h:21
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
DataflowAnalysisContext(std::unique_ptr< Solver > S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
const FieldSet & getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
void addFlowConditionDeps(Atom Token, const llvm::DenseSet< Atom > &Deps)
Adds Deps to the dependencies of the flow condition identified by Token.
SimpleLogicalContext exportLogicalContext(llvm::DenseSet< dataflow::Atom > TargetTokens) const
Export the logical-context portions of AC, limited to the given target flow-condition tokens.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
void initLogicalContext(SimpleLogicalContext LC)
Initializes this context's "logical" components with LC.
DataflowAnalysisContext(Solver &S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
A logger is notified as the analysis progresses.
Definition Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
An interface for a SAT solver that can be used by dataflow analyses.
Definition Solver.h:28
Base class for elements of the local variable store and of the heap.
Dataflow Directional Tag Classes.
Definition AdornedCFG.h:29
Atom
Identifies an atomic boolean variable such as "V1".
Definition Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition ASTOps.h:42
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ Result
The result type of a method or function.
Definition TypeBase.h:905
@ Invariant
The parameter is invariant: must match exactly.
Definition DeclObjC.h:555
int const char * function
Definition c++config.h:31
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
A simple representation of essential elements of the logical context used in environments.
llvm::DenseMap< Atom, const Formula * > TokenDefs
llvm::DenseMap< Atom, llvm::DenseSet< Atom > > TokenDeps
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
Definition Solver.h:38
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.
Definition Solver.h:34