clang 19.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/DenseSet.h"
29#include "llvm/ADT/SetVector.h"
30#include "llvm/Support/Compiler.h"
31#include <cassert>
32#include <memory>
33#include <optional>
34
35namespace clang {
36namespace dataflow {
37class Logger;
38
40 /// The maximum depth to analyze. A value of zero is equivalent to disabling
41 /// context-sensitive analysis entirely.
42 unsigned Depth = 2;
43};
44
45/// Owns objects that encompass the state of a program and stores context that
46/// is used during dataflow analysis.
48public:
49 struct Options {
50 /// Options for analyzing function bodies when present in the translation
51 /// unit, or empty to disable context-sensitive analysis. Note that this is
52 /// fundamentally limited: some constructs, such as recursion, are
53 /// explicitly unsupported.
54 std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
55
56 /// If provided, analysis details will be recorded here.
57 /// (This is always non-null within an AnalysisContext, the framework
58 /// provides a fallback no-op logger).
59 Logger *Log = nullptr;
60 };
61
62 /// Constructs a dataflow analysis context.
63 ///
64 /// Requirements:
65 ///
66 /// `S` must not be null.
67 DataflowAnalysisContext(std::unique_ptr<Solver> S,
68 Options Opts = Options{
69 /*ContextSensitiveOpts=*/std::nullopt,
70 /*Logger=*/nullptr});
72
73 /// Sets a callback that returns the names and types of the synthetic fields
74 /// to add to a `RecordStorageLocation` of a given type.
75 /// Typically, this is called from the constructor of a `DataflowAnalysis`
76 ///
77 /// The field types returned by the callback may not have reference type.
78 ///
79 /// To maintain the invariant that all `RecordStorageLocation`s of a given
80 /// type have the same fields:
81 /// * The callback must always return the same result for a given type
82 /// * `setSyntheticFieldCallback()` must be called before any
83 // `RecordStorageLocation`s are created.
85 std::function<llvm::StringMap<QualType>(QualType)> CB) {
86 assert(!RecordStorageLocationCreated);
87 SyntheticFieldCallback = CB;
88 }
89
90 /// Returns a new storage location appropriate for `Type`.
91 ///
92 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
94
95 /// Creates a `RecordStorageLocation` for the given type and with the given
96 /// fields.
97 ///
98 /// Requirements:
99 ///
100 /// `FieldLocs` must contain exactly the fields returned by
101 /// `getModeledFields(Type)`.
102 /// `SyntheticFields` must contain exactly the fields returned by
103 /// `getSyntheticFields(Type)`.
107
108 /// Returns a stable storage location for `D`.
110
111 /// Returns a stable storage location for `E`.
113
114 /// Returns a pointer value that represents a null pointer. Calls with
115 /// `PointeeType` that are canonically equivalent will return the same result.
116 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
118
119 /// Adds `Constraint` to current and future flow conditions in this context.
120 ///
121 /// Invariants must contain only flow-insensitive information, i.e. facts that
122 /// are true on all paths through the program.
123 /// Information can be added eagerly (when analysis begins), or lazily (e.g.
124 /// when values are first used). The analysis must be careful that the same
125 /// information is added regardless of which order blocks are analyzed in.
126 void addInvariant(const Formula &Constraint);
127
128 /// Adds `Constraint` to the flow condition identified by `Token`.
129 void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
130
131 /// Creates a new flow condition with the same constraints as the flow
132 /// condition identified by `Token` and returns its token.
134
135 /// Creates a new flow condition that represents the disjunction of the flow
136 /// conditions identified by `FirstToken` and `SecondToken`, and returns its
137 /// token.
138 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
139
140 /// Returns true if the constraints of the flow condition identified by
141 /// `Token` imply that `F` is true.
142 /// Returns false if the flow condition does not imply `F` or if the solver
143 /// times out.
144 bool flowConditionImplies(Atom Token, const Formula &F);
145
146 /// Returns true if the constraints of the flow condition identified by
147 /// `Token` still allow `F` to be true.
148 /// Returns false if the flow condition implies that `F` is false or if the
149 /// solver times out.
150 bool flowConditionAllows(Atom Token, const Formula &F);
151
152 /// Returns true if `Val1` is equivalent to `Val2`.
153 /// Note: This function doesn't take into account constraints on `Val1` and
154 /// `Val2` imposed by the flow condition.
155 bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
156
157 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
158 llvm::raw_ostream &OS = llvm::dbgs());
159
160 /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
161 /// returns null.
162 const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
163
164 const Options &getOptions() { return Opts; }
165
166 Arena &arena() { return *A; }
167
168 /// Returns the outcome of satisfiability checking on `Constraints`.
169 ///
170 /// Flow conditions are not incorporated, so they may need to be manually
171 /// included in `Constraints` to provide contextually-accurate results, e.g.
172 /// if any definitions or relationships of the values in `Constraints` have
173 /// been stored in flow conditions.
174 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
175
176 /// Returns the fields of `Type`, limited to the set of fields modeled by this
177 /// context.
179
180 /// Returns the names and types of the synthetic fields for the given record
181 /// type.
182 llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
183 assert(Type->isRecordType());
184 if (SyntheticFieldCallback) {
185 llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
186 // Synthetic fields are not allowed to have reference type.
187 assert([&Result] {
188 for (const auto &Entry : Result)
189 if (Entry.getValue()->isReferenceType())
190 return false;
191 return true;
192 }());
193 return Result;
194 }
195 return {};
196 }
197
198private:
199 friend class Environment;
200
201 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
202 static QualType getEmptyKey() {
203 // Allow a NULL `QualType` by using a different value as the empty key.
204 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
205 }
206
207 using DenseMapInfo::getHashValue;
208 using DenseMapInfo::getTombstoneKey;
209 using DenseMapInfo::isEqual;
210 };
211
212 // Extends the set of modeled field declarations.
213 void addModeledFields(const FieldSet &Fields);
214
215 /// Adds all constraints of the flow condition identified by `Token` and all
216 /// of its transitive dependencies to `Constraints`.
217 void
218 addTransitiveFlowConditionConstraints(Atom Token,
219 llvm::SetVector<const Formula *> &Out);
220
221 /// Returns true if the solver is able to prove that there is a satisfying
222 /// assignment for `Constraints`.
223 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
224 return querySolver(std::move(Constraints)).getStatus() ==
226 }
227
228 /// Returns true if the solver is able to prove that there is no satisfying
229 /// assignment for `Constraints`
230 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
231 return querySolver(std::move(Constraints)).getStatus() ==
233 }
234
235 std::unique_ptr<Solver> S;
236 std::unique_ptr<Arena> A;
237
238 // Maps from program declarations and statements to storage locations that are
239 // assigned to them. These assignments are global (aggregated across all basic
240 // blocks) and are used to produce stable storage locations when the same
241 // basic blocks are evaluated multiple times. The storage locations that are
242 // in scope for a particular basic block are stored in `Environment`.
243 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
244 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
245
246 // Null pointer values, keyed by the canonical pointee type.
247 //
248 // FIXME: The pointer values are indexed by the pointee types which are
249 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
250 // creating a type-independent `NullPointerValue` without a `PointeeLoc`
251 // field.
252 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
253 NullPointerVals;
254
255 Options Opts;
256
257 // Flow conditions are tracked symbolically: each unique flow condition is
258 // associated with a fresh symbolic variable (token), bound to the clause that
259 // defines the flow condition. Conceptually, each binding corresponds to an
260 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
261 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
262 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
263 // the `FlowConditionConstraints` map, keyed by the token of the flow
264 // condition.
265 //
266 // Flow conditions depend on other flow conditions if they are created using
267 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
268 // dependencies is stored in the `FlowConditionDeps` map.
269 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
270 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
271 const Formula *Invariant = nullptr;
272
273 llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
274
275 // Fields modeled by environments covered by this context.
276 FieldSet ModeledFields;
277
278 std::unique_ptr<Logger> LogOwner; // If created via flags.
279
280 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
281
282 /// Has any `RecordStorageLocation` been created yet?
283 bool RecordStorageLocationCreated = false;
284};
285
286} // namespace dataflow
287} // namespace clang
288
289#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
Allows QualTypes to be sorted and hence used in maps and sets.
This represents one expression.
Definition: Expr.h:110
Represents a function declaration or definition.
Definition: Decl.h:1971
A (possibly-)qualified type.
Definition: Type.h:738
static QualType getFromOpaquePtr(const void *Ptr)
Definition: Type.h:787
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1607
bool isRecordType() const
Definition: Type.h:7496
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
Holds the state of the program (store and heap) at a given program point.
A logger is notified as the analysis progresses.
Definition: Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:172
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition: ASTOps.h:41
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition: Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.