clang 20.0.0git
DataflowAnalysis.h
Go to the documentation of this file.
1//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines base types and functions for building dataflow analyses
10// that run over Control-Flow Graphs (CFGs).
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
15#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
16
17#include <iterator>
18#include <optional>
19#include <type_traits>
20#include <utility>
21#include <vector>
22
24#include "clang/Analysis/CFG.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/STLFunctionalExtras.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/Support/Errc.h"
35#include "llvm/Support/Error.h"
36
37namespace clang {
38namespace dataflow {
39
40/// Base class template for dataflow analyses built on a single lattice type.
41///
42/// Requirements:
43///
44/// `Derived` must be derived from a specialization of this class template and
45/// must provide the following public members:
46/// * `LatticeT initialElement()` - returns a lattice element that models the
47/// initial state of a basic block;
48/// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
49/// the analysis transfer function for a given CFG element and lattice
50/// element.
51///
52/// `Derived` can optionally provide the following members:
53/// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
54/// Environment &Env)` - applies the analysis transfer
55/// function for a given edge from a CFG block of a conditional statement.
56///
57/// `Derived` can optionally override the virtual functions in the
58/// `Environment::ValueModel` interface (which is an indirect base class of
59/// this class).
60///
61/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
62/// provide the following public members:
63/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
64/// argument by computing their least upper bound, modifies the object if
65/// necessary, and returns an effect indicating whether any changes were
66/// made to it;
67/// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
68/// * `bool operator==(const LatticeT &) const` - returns true if and only if
69/// the object is equal to the argument.
70///
71/// `LatticeT` can optionally provide the following members:
72/// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
73/// lattice element with an approximation that can reach a fixed point more
74/// quickly than iterated application of the transfer function alone. The
75/// previous value is provided to inform the choice of widened value. The
76/// function must also serve as a comparison operation, by indicating whether
77/// the widened value is equivalent to the previous value with the returned
78/// `LatticeJoinEffect`.
79template <typename Derived, typename LatticeT>
81public:
82 /// Bounded join-semilattice that is used in the analysis.
83 using Lattice = LatticeT;
84
85 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
86
87 explicit DataflowAnalysis(ASTContext &Context,
89 : TypeErasedDataflowAnalysis(Options), Context(Context) {}
90
91 ASTContext &getASTContext() final { return Context; }
92
94 return {static_cast<Derived *>(this)->initialElement()};
95 }
96
98 const TypeErasedLattice &E2) final {
99 // FIXME: change the signature of join() to avoid copying here.
100 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
101 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
102 L1.join(L2);
103 return {std::move(L1)};
104 }
105
107 const TypeErasedLattice &Previous) final {
108 Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
109 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
110 return widenInternal(Rank0{}, C, P);
111 }
112
114 const TypeErasedLattice &E2) final {
115 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
116 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
117 return L1 == L2;
118 }
119
121 Environment &Env) final {
122 Lattice &L = llvm::any_cast<Lattice &>(E.Value);
123 static_cast<Derived *>(this)->transfer(Element, L, Env);
124 }
125
126 void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
128 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
129 E, Env);
130 }
131
132private:
133 // These `Rank` structs are used for template metaprogramming to choose
134 // between overloads.
135 struct Rank1 {};
136 struct Rank0 : Rank1 {};
137
138 // The first-choice implementation: use `widen` when it is available.
139 template <typename T>
140 static auto widenInternal(Rank0, T &Current, const T &Prev)
141 -> decltype(Current.widen(Prev)) {
142 return Current.widen(Prev);
143 }
144
145 // The second-choice implementation: `widen` is unavailable. Widening is
146 // merged with equality checking, so when widening is unimplemented, we
147 // default to equality checking.
148 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
149 const Lattice &Prev) {
150 return Prev == Current ? LatticeJoinEffect::Unchanged
152 }
153
154 // The first-choice implementation: `transferBranch` is implemented.
155 template <typename Analysis>
156 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
157 const Stmt *Stmt, TypeErasedLattice &L,
158 Environment &Env)
159 -> std::void_t<decltype(A.transferBranch(
160 Branch, Stmt, std::declval<LatticeT &>(), Env))> {
161 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
162 }
163
164 // The second-choice implementation: `transferBranch` is unimplemented. No-op.
165 template <typename Analysis>
166 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
167 TypeErasedLattice &, Environment &) {}
168
169 ASTContext &Context;
170};
171
172// Model of the program at a given program point.
173template <typename LatticeT> struct DataflowAnalysisState {
174 // Model of a program property.
175 LatticeT Lattice;
176
177 // Model of the state of the program (store and heap).
179};
180
181/// A callback to be called with the state before or after visiting a CFG
182/// element.
183template <typename AnalysisT>
184using CFGEltCallback = std::function<void(
185 const CFGElement &,
187
188/// A pair of callbacks to be called with the state before and after visiting a
189/// CFG element.
190/// Either or both of the callbacks may be null.
191template <typename AnalysisT> struct CFGEltCallbacks {
194};
195
196/// A callback for performing diagnosis on a CFG element, called with the state
197/// before or after visiting that CFG element. Returns a list of diagnostics
198/// to emit (if any).
199template <typename AnalysisT, typename Diagnostic>
200using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>(
201 const CFGElement &, ASTContext &,
203
204/// A pair of callbacks for performing diagnosis on a CFG element, called with
205/// the state before and after visiting that CFG element.
206/// Either or both of the callbacks may be null.
207template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks {
210};
211
212/// Default for the maximum number of SAT solver iterations during analysis.
213inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000;
214
215/// Default for the maximum number of block visits during analysis.
216inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000;
217
218/// Performs dataflow analysis and returns a mapping from basic block IDs to
219/// dataflow analysis states that model the respective basic blocks. The
220/// returned vector, if any, will have the same size as the number of CFG
221/// blocks, with indices corresponding to basic block IDs. Returns an error if
222/// the dataflow analysis cannot be performed successfully. Otherwise, calls
223/// `PostAnalysisCallbacks` on each CFG element with the final analysis results
224/// before and after that program point.
225///
226/// `MaxBlockVisits` caps the number of block visits during analysis. See
227/// `runTypeErasedDataflowAnalysis` for a full description. The default value is
228/// essentially arbitrary -- large enough to accommodate what seems like any
229/// reasonable CFG, but still small enough to limit the cost of hitting the
230/// limit.
231template <typename AnalysisT>
232llvm::Expected<std::vector<
233 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
235 const Environment &InitEnv,
236 CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks,
237 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
238 CFGEltCallbacksTypeErased TypeErasedCallbacks;
239 if (PostAnalysisCallbacks.Before) {
240 TypeErasedCallbacks.Before =
241 [&PostAnalysisCallbacks](const CFGElement &Element,
242 const TypeErasedDataflowAnalysisState &State) {
243 auto *Lattice =
244 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
245 // FIXME: we should not be copying the environment here!
246 // Ultimately the `CFGEltCallback` only gets a const reference anyway.
247 PostAnalysisCallbacks.Before(
249 *Lattice, State.Env.fork()});
250 };
251 }
252 if (PostAnalysisCallbacks.After) {
253 TypeErasedCallbacks.After =
254 [&PostAnalysisCallbacks](const CFGElement &Element,
255 const TypeErasedDataflowAnalysisState &State) {
256 auto *Lattice =
257 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
258 // FIXME: we should not be copying the environment here!
259 // Ultimately the `CFGEltCallback` only gets a const reference anyway.
260 PostAnalysisCallbacks.After(
262 *Lattice, State.Env.fork()});
263 };
264 }
265
266 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
267 ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits);
268 if (!TypeErasedBlockStates)
269 return TypeErasedBlockStates.takeError();
270
271 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
273 BlockStates.reserve(TypeErasedBlockStates->size());
274
275 llvm::transform(
276 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
277 [](auto &OptState) {
278 return llvm::transformOptional(
279 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
281 llvm::any_cast<typename AnalysisT::Lattice>(
282 std::move(State.Lattice.Value)),
283 std::move(State.Env)};
284 });
285 });
286 return std::move(BlockStates);
287}
288
289/// Overload that takes only one post-analysis callback, which is run on the
290/// state after visiting the `CFGElement`. This is provided for backwards
291/// compatibility; new callers should call the overload taking `CFGEltCallbacks`
292/// instead.
293template <typename AnalysisT>
294llvm::Expected<std::vector<
295 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
297 const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
298 CFGEltCallback<AnalysisT> PostAnalysisCallbackAfterElt = nullptr,
299 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
301 {nullptr, PostAnalysisCallbackAfterElt},
302 MaxBlockVisits);
303}
304
305// Create an analysis class that is derived from `DataflowAnalysis`. This is an
306// SFINAE adapter that allows us to call two different variants of constructor
307// (either with or without the optional `Environment` parameter).
308// FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment`
309// parameter in their constructor so that we can get rid of this abomination.
310template <typename AnalysisT>
312 -> decltype(AnalysisT(ASTCtx, Env)) {
313 return AnalysisT(ASTCtx, Env);
314}
315template <typename AnalysisT>
316auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
317 -> decltype(AnalysisT(ASTCtx)) {
318 return AnalysisT(ASTCtx);
319}
320
321/// Runs a dataflow analysis over the given function and then runs `Diagnoser`
322/// over the results. Returns a list of diagnostics for `FuncDecl` or an
323/// error. Currently, errors can occur (at least) because the analysis requires
324/// too many iterations over the CFG or the SAT solver times out.
325///
326/// The default value of `MaxSATIterations` was chosen based on the following
327/// observations:
328/// - Non-pathological calls to the solver typically require only a few hundred
329/// iterations.
330/// - This limit is still low enough to keep runtimes acceptable (on typical
331/// machines) in cases where we hit the limit.
332///
333/// `MaxBlockVisits` caps the number of block visits during analysis. See
334/// `runDataflowAnalysis` for a full description and explanation of the default
335/// value.
336template <typename AnalysisT, typename Diagnostic>
338diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
340 std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
341 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
343 if (!Context)
344 return Context.takeError();
345
346 auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
347 DataflowAnalysisContext AnalysisContext(*Solver);
348 Environment Env(AnalysisContext, FuncDecl);
349 AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
351 CFGEltCallbacksTypeErased PostAnalysisCallbacks;
352 if (Diagnoser.Before) {
353 PostAnalysisCallbacks.Before =
354 [&ASTCtx, &Diagnoser,
355 &Diagnostics](const CFGElement &Elt,
356 const TypeErasedDataflowAnalysisState &State) mutable {
357 auto EltDiagnostics = Diagnoser.Before(
358 Elt, ASTCtx,
360 llvm::any_cast<const typename AnalysisT::Lattice &>(
361 State.Lattice.Value),
362 State.Env));
363 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
364 };
365 }
366 if (Diagnoser.After) {
367 PostAnalysisCallbacks.After =
368 [&ASTCtx, &Diagnoser,
369 &Diagnostics](const CFGElement &Elt,
370 const TypeErasedDataflowAnalysisState &State) mutable {
371 auto EltDiagnostics = Diagnoser.After(
372 Elt, ASTCtx,
374 llvm::any_cast<const typename AnalysisT::Lattice &>(
375 State.Lattice.Value),
376 State.Env));
377 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
378 };
379 }
380 if (llvm::Error Err =
382 PostAnalysisCallbacks, MaxBlockVisits)
383 .takeError())
384 return std::move(Err);
385
386 if (Solver->reachedLimit())
387 return llvm::createStringError(llvm::errc::interrupted,
388 "SAT solver timed out");
389
390 return Diagnostics;
391}
392
393/// Overload that takes only one diagnosis callback, which is run on the state
394/// after visiting the `CFGElement`. This is provided for backwards
395/// compatibility; new callers should call the overload taking
396/// `DiagnosisCallbacks` instead.
397template <typename AnalysisT, typename Diagnostic>
399diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
401 std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
402 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
403 DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser};
404 return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations,
405 MaxBlockVisits);
406}
407
408/// Abstract base class for dataflow "models": reusable analysis components that
409/// model a particular aspect of program semantics in the `Environment`. For
410/// example, a model may capture a type and its related functions.
412public:
413 /// Return value indicates whether the model processed the `Element`.
414 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
415};
416
417} // namespace dataflow
418} // namespace clang
419
420#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
Defines the clang::ASTContext interface.
StringRef P
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:148
llvm::ArrayRef< std::optional< TypeErasedDataflowAnalysisState > > BlockStates
Stores the state of a CFG block if it has been evaluated by the analysis.
const Environment & InitEnv
Initial state to start the analysis.
TypeErasedDataflowAnalysis & Analysis
The analysis to be run.
StateNode * Previous
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:186
Represents a top-level expression in a basic block.
Definition: CFG.h:55
Represents a function declaration or definition.
Definition: Decl.h:1932
Stmt - This represents one statement.
Definition: Stmt.h:84
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
static llvm::Expected< AdornedCFG > build(const FunctionDecl &Func)
Builds an AdornedCFG from a FunctionDecl.
Definition: AdornedCFG.cpp:129
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
Base class template for dataflow analyses built on a single lattice type.
LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, const TypeErasedLattice &Previous) final
Chooses a lattice element that approximates the current element at a program point,...
ASTContext & getASTContext() final
Returns the ASTContext that is used by the analysis.
DataflowAnalysis(ASTContext &Context, DataflowAnalysisOptions Options)
bool isEqualTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Returns true if and only if the two given type-erased lattice elements are equal.
DataflowAnalysis(ASTContext &Context)
TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Joins two type-erased lattice elements by computing their least upper bound.
void transferBranchTypeErased(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given edge from a CFG block of a conditional statement.
TypeErasedLattice typeErasedInitialElement() final
Returns a type-erased lattice element that models the initial state of a basic block.
LatticeT Lattice
Bounded join-semilattice that is used in the analysis.
void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given control flow graph element and type-erased lattice...
Abstract base class for dataflow "models": reusable analysis components that model a particular aspec...
virtual bool transfer(const CFGElement &Element, Environment &Env)=0
Return value indicates whether the model processed the Element.
Supplements Environment with non-standard comparison and join operations.
Holds the state of the program (store and heap) at a given program point.
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
virtual bool reachedLimit() const =0
Type-erased base class for dataflow analyses built on a single lattice type.
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env, Environment::ValueModel &Model)
Evaluates S and updates Env accordingly.
Definition: Transfer.cpp:891
llvm::Expected< std::vector< std::optional< DataflowAnalysisState< typename AnalysisT::Lattice > > > > runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv, CFGEltCallbacks< AnalysisT > PostAnalysisCallbacks, std::int32_t MaxBlockVisits=kDefaultMaxBlockVisits)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
llvm::function_ref< llvm::SmallVector< Diagnostic >(const CFGElement &, ASTContext &, const TransferStateForDiagnostics< typename AnalysisT::Lattice > &)> DiagnosisCallback
A callback for performing diagnosis on a CFG element, called with the state before or after visiting ...
constexpr std::int32_t kDefaultMaxBlockVisits
Default for the maximum number of block visits during analysis.
llvm::Expected< std::vector< std::optional< TypeErasedDataflowAnalysisState > > > runTypeErasedDataflowAnalysis(const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, const CFGEltCallbacksTypeErased &PostAnalysisCallbacks, std::int32_t MaxBlockVisits)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
llvm::Expected< llvm::SmallVector< Diagnostic > > diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, DiagnosisCallbacks< AnalysisT, Diagnostic > Diagnoser, std::int64_t MaxSATIterations=kDefaultMaxSATIterations, std::int32_t MaxBlockVisits=kDefaultMaxBlockVisits)
Runs a dataflow analysis over the given function and then runs Diagnoser over the results.
std::function< void(const CFGElement &, const DataflowAnalysisState< typename AnalysisT::Lattice > &)> CFGEltCallback
A callback to be called with the state before or after visiting a CFG element.
LatticeEffect LatticeJoinEffect
auto createAnalysis(ASTContext &ASTCtx, Environment &Env) -> decltype(AnalysisT(ASTCtx, Env))
LatticeEffect
Effect indicating whether a lattice operation resulted in a new value.
constexpr std::int64_t kDefaultMaxSATIterations
Default for the maximum number of SAT solver iterations during analysis.
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
A pair of callbacks to be called with the state before and after visiting a CFG element.
A pair of callbacks to be called with the state before and after visiting a CFG element.
CFGEltCallback< AnalysisT > After
CFGEltCallback< AnalysisT > Before
A pair of callbacks for performing diagnosis on a CFG element, called with the state before and after...
DiagnosisCallback< AnalysisT, Diagnostic > Before
DiagnosisCallback< AnalysisT, Diagnostic > After
A read-only version of TransferState.
Definition: MatchSwitch.h:55
Type-erased model of the program at a given program point.
Type-erased lattice element container.