clang 18.0.0git
DataflowAnalysis.h
Go to the documentation of this file.
1//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines base types and functions for building dataflow analyses
10// that run over Control-Flow Graphs (CFGs).
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
15#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
16
17#include <iterator>
18#include <optional>
19#include <type_traits>
20#include <utility>
21#include <vector>
22
24#include "clang/Analysis/CFG.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/STLFunctionalExtras.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/Support/Errc.h"
35#include "llvm/Support/Error.h"
36
37namespace clang {
38namespace dataflow {
39
40/// Base class template for dataflow analyses built on a single lattice type.
41///
42/// Requirements:
43///
44/// `Derived` must be derived from a specialization of this class template and
45/// must provide the following public members:
46/// * `LatticeT initialElement()` - returns a lattice element that models the
47/// initial state of a basic block;
48/// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
49/// the analysis transfer function for a given CFG element and lattice
50/// element.
51///
52/// `Derived` can optionally provide the following members:
53/// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
54/// Environment &Env)` - applies the analysis transfer
55/// function for a given edge from a CFG block of a conditional statement.
56///
57/// `Derived` can optionally override the following members:
58/// * `bool merge(QualType, const Value &, const Value &, Value &,
59/// Environment &)` - joins distinct values. This could be a strict
60/// lattice join or a more general widening operation.
61///
62/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
63/// provide the following public members:
64/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
65/// argument by computing their least upper bound, modifies the object if
66/// necessary, and returns an effect indicating whether any changes were
67/// made to it;
68/// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
69/// * `bool operator==(const LatticeT &) const` - returns true if and only if
70/// the object is equal to the argument.
71///
72/// `LatticeT` can optionally provide the following members:
73/// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
74/// lattice element with an approximation that can reach a fixed point more
75/// quickly than iterated application of the transfer function alone. The
76/// previous value is provided to inform the choice of widened value. The
77/// function must also serve as a comparison operation, by indicating whether
78/// the widened value is equivalent to the previous value with the returned
79/// `LatticeJoinEffect`.
80template <typename Derived, typename LatticeT>
82public:
83 /// Bounded join-semilattice that is used in the analysis.
84 using Lattice = LatticeT;
85
86 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
87
88 explicit DataflowAnalysis(ASTContext &Context,
90 : TypeErasedDataflowAnalysis(Options), Context(Context) {}
91
92 ASTContext &getASTContext() final { return Context; }
93
95 return {static_cast<Derived *>(this)->initialElement()};
96 }
97
99 const TypeErasedLattice &E2) final {
100 // FIXME: change the signature of join() to avoid copying here.
101 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
102 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
103 L1.join(L2);
104 return {std::move(L1)};
105 }
106
108 const TypeErasedLattice &Previous) final {
109 Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
110 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
111 return widenInternal(Rank0{}, C, P);
112 }
113
115 const TypeErasedLattice &E2) final {
116 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
117 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
118 return L1 == L2;
119 }
120
122 Environment &Env) final {
123 Lattice &L = llvm::any_cast<Lattice &>(E.Value);
124 static_cast<Derived *>(this)->transfer(Element, L, Env);
125 }
126
127 void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
128 TypeErasedLattice &E, Environment &Env) final {
129 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
130 E, Env);
131 }
132
133private:
134 // These `Rank` structs are used for template metaprogramming to choose
135 // between overloads.
136 struct Rank1 {};
137 struct Rank0 : Rank1 {};
138
139 // The first-choice implementation: use `widen` when it is available.
140 template <typename T>
141 static auto widenInternal(Rank0, T &Current, const T &Prev)
142 -> decltype(Current.widen(Prev)) {
143 return Current.widen(Prev);
144 }
145
146 // The second-choice implementation: `widen` is unavailable. Widening is
147 // merged with equality checking, so when widening is unimplemented, we
148 // default to equality checking.
149 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
150 const Lattice &Prev) {
151 return Prev == Current ? LatticeJoinEffect::Unchanged
153 }
154
155 // The first-choice implementation: `transferBranch` is implemented.
156 template <typename Analysis>
157 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
158 const Stmt *Stmt, TypeErasedLattice &L,
159 Environment &Env)
160 -> std::void_t<decltype(A.transferBranch(
161 Branch, Stmt, std::declval<LatticeT &>(), Env))> {
162 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
163 }
164
165 // The second-choice implementation: `transferBranch` is unimplemented. No-op.
166 template <typename Analysis>
167 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
168 TypeErasedLattice &, Environment &) {}
169
170 ASTContext &Context;
171};
172
173// Model of the program at a given program point.
174template <typename LatticeT> struct DataflowAnalysisState {
175 // Model of a program property.
176 LatticeT Lattice;
177
178 // Model of the state of the program (store and heap).
180};
181
182/// Performs dataflow analysis and returns a mapping from basic block IDs to
183/// dataflow analysis states that model the respective basic blocks. The
184/// returned vector, if any, will have the same size as the number of CFG
185/// blocks, with indices corresponding to basic block IDs. Returns an error if
186/// the dataflow analysis cannot be performed successfully. Otherwise, calls
187/// `PostVisitCFG` on each CFG element with the final analysis results at that
188/// program point.
189template <typename AnalysisT>
190llvm::Expected<std::vector<
191 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
193 const ControlFlowContext &CFCtx, AnalysisT &Analysis,
194 const Environment &InitEnv,
195 std::function<void(const CFGElement &, const DataflowAnalysisState<
196 typename AnalysisT::Lattice> &)>
197 PostVisitCFG = nullptr) {
198 std::function<void(const CFGElement &,
200 PostVisitCFGClosure = nullptr;
201 if (PostVisitCFG) {
202 PostVisitCFGClosure = [&PostVisitCFG](
203 const CFGElement &Element,
204 const TypeErasedDataflowAnalysisState &State) {
205 auto *Lattice =
206 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
207 // FIXME: we should not be copying the environment here!
208 // Ultimately the PostVisitCFG only gets a const reference anyway.
210 *Lattice, State.Env.fork()});
211 };
212 }
213
214 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
215 CFCtx, Analysis, InitEnv, PostVisitCFGClosure);
216 if (!TypeErasedBlockStates)
217 return TypeErasedBlockStates.takeError();
218
219 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
221 BlockStates.reserve(TypeErasedBlockStates->size());
222
223 llvm::transform(
224 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
225 [](auto &OptState) {
226 return llvm::transformOptional(
227 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
229 llvm::any_cast<typename AnalysisT::Lattice>(
230 std::move(State.Lattice.Value)),
231 std::move(State.Env)};
232 });
233 });
234 return std::move(BlockStates);
235}
236
237/// Runs a dataflow analysis over the given function and then runs `Diagnoser`
238/// over the results. Returns a list of diagnostics for `FuncDecl` or an
239/// error. Currently, errors can occur (at least) because the analysis requires
240/// too many iterations over the CFG or the SAT solver times out.
241///
242/// The default value of `MaxSATIterations` was chosen based on the following
243/// observations:
244/// - Non-pathological calls to the solver typically require only a few hundred
245/// iterations.
246/// - This limit is still low enough to keep runtimes acceptable (on typical
247/// machines) in cases where we hit the limit.
248template <typename AnalysisT, typename Diagnostic>
250 const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
251 llvm::function_ref<llvm::SmallVector<Diagnostic>(
252 const CFGElement &, ASTContext &,
254 Diagnoser,
255 std::int64_t MaxSATIterations = 1'000'000'000) {
258 if (!Context)
259 return Context.takeError();
260
261 auto OwnedSolver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
262 const WatchedLiteralsSolver *Solver = OwnedSolver.get();
263 DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver));
264 Environment Env(AnalysisContext, FuncDecl);
265 AnalysisT Analysis(ASTCtx);
267 if (llvm::Error Err =
269 *Context, Analysis, Env,
270 [&ASTCtx, &Diagnoser, &Diagnostics](
271 const CFGElement &Elt,
272 const TypeErasedDataflowAnalysisState &State) mutable {
273 auto EltDiagnostics = Diagnoser(
274 Elt, ASTCtx,
276 llvm::any_cast<const typename AnalysisT::Lattice &>(
277 State.Lattice.Value),
278 State.Env));
279 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
280 })
281 .takeError())
282 return std::move(Err);
283
284 if (Solver->reachedLimit())
285 return llvm::createStringError(llvm::errc::interrupted,
286 "SAT solver timed out");
287
288 return Diagnostics;
289}
290
291/// Abstract base class for dataflow "models": reusable analysis components that
292/// model a particular aspect of program semantics in the `Environment`. For
293/// example, a model may capture a type and its related functions.
295public:
296 /// Return value indicates whether the model processed the `Element`.
297 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
298};
299
300} // namespace dataflow
301} // namespace clang
302
303#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
Defines the clang::ASTContext interface.
StringRef P
const Environment & Env
Definition: HTMLLogger.cpp:144
llvm::ArrayRef< std::optional< TypeErasedDataflowAnalysisState > > BlockStates
Stores the state of a CFG block if it has been evaluated by the analysis.
const ControlFlowContext & CFCtx
Contains the CFG being analyzed.
const Environment & InitEnv
Initial state to start the analysis.
TypeErasedDataflowAnalysis & Analysis
The analysis to be run.
StateNode * Previous
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
Represents a top-level expression in a basic block.
Definition: CFG.h:55
Represents a function declaration or definition.
Definition: Decl.h:1957
Stmt - This represents one statement.
Definition: Stmt.h:84
Holds CFG and other derived context that is needed to perform dataflow analysis.
static llvm::Expected< ControlFlowContext > build(const FunctionDecl &Func)
Builds a ControlFlowContext from a FunctionDecl.
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
Base class template for dataflow analyses built on a single lattice type.
LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, const TypeErasedLattice &Previous) final
Chooses a lattice element that approximates the current element at a program point,...
ASTContext & getASTContext() final
Returns the ASTContext that is used by the analysis.
DataflowAnalysis(ASTContext &Context, DataflowAnalysisOptions Options)
bool isEqualTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Returns true if and only if the two given type-erased lattice elements are equal.
DataflowAnalysis(ASTContext &Context)
TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Joins two type-erased lattice elements by computing their least upper bound.
void transferBranchTypeErased(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given edge from a CFG block of a conditional statement.
TypeErasedLattice typeErasedInitialElement() final
Returns a type-erased lattice element that models the initial state of a basic block.
LatticeT Lattice
Bounded join-semilattice that is used in the analysis.
void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given control flow graph element and type-erased lattice...
Abstract base class for dataflow "models": reusable analysis components that model a particular aspec...
virtual bool transfer(const CFGElement &Element, Environment &Env)=0
Return value indicates whether the model processed the Element.
Supplements Environment with non-standard comparison and join operations.
Holds the state of the program (store and heap) at a given program point.
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
Type-erased base class for dataflow analyses built on a single lattice type.
A SAT solver that is an implementation of Algorithm D from Knuth's The Art of Computer Programming Vo...
llvm::Expected< std::vector< std::optional< TypeErasedDataflowAnalysisState > > > runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, std::function< void(const CFGElement &, const TypeErasedDataflowAnalysisState &)> PostVisitCFG=nullptr)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
llvm::Expected< std::vector< std::optional< DataflowAnalysisState< typename AnalysisT::Lattice > > > > runDataflowAnalysis(const ControlFlowContext &CFCtx, AnalysisT &Analysis, const Environment &InitEnv, std::function< void(const CFGElement &, const DataflowAnalysisState< typename AnalysisT::Lattice > &)> PostVisitCFG=nullptr)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env)
Evaluates S and updates Env accordingly.
Definition: Transfer.cpp:826
LatticeJoinEffect
Effect indicating whether a lattice join operation resulted in a new value.
llvm::Expected< llvm::SmallVector< Diagnostic > > diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, llvm::function_ref< llvm::SmallVector< Diagnostic >(const CFGElement &, ASTContext &, const TransferStateForDiagnostics< typename AnalysisT::Lattice > &)> Diagnoser, std::int64_t MaxSATIterations=1 '000 '000 '000)
Runs a dataflow analysis over the given function and then runs Diagnoser over the results.
A read-only version of TransferState.
Definition: MatchSwitch.h:55
Type-erased model of the program at a given program point.
Type-erased lattice element container.