clang 22.0.0git
DataflowAnalysisContext.cpp
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
21#include "llvm/ADT/SetOperations.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/FileSystem.h"
26#include "llvm/Support/Path.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cassert>
29#include <memory>
30#include <string>
31#include <utility>
32#include <vector>
33
34static llvm::cl::opt<std::string> DataflowLog(
35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
37 "log to stderr. With an arg, writes HTML logs under the "
38 "specified directory (one per analyzed function)."));
39
40namespace clang {
41namespace dataflow {
42
44 // During context-sensitive analysis, a struct may be allocated in one
45 // function, but its field accessed in a function lower in the stack than
46 // the allocation. Since we only collect fields used in the function where
47 // the allocation occurs, we can't apply that filter when performing
48 // context-sensitive analysis. But, this only applies to storage locations,
49 // since field access it not allowed to fail. In contrast, field *values*
50 // don't need this allowance, since the API allows for uninitialized fields.
51 if (Opts.ContextSensitiveOpts)
52 return getObjectFields(Type);
53
54 return llvm::set_intersection(getObjectFields(Type), ModeledFields);
55}
56
57void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
58 ModeledFields.set_union(Fields);
59}
60
62 if (!Type.isNull() && Type->isRecordType()) {
63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
64 for (const FieldDecl *Field : getModeledFields(Type))
65 if (Field->getType()->isReferenceType())
66 FieldLocs.insert({Field, nullptr});
67 else
68 FieldLocs.insert({Field, &createStorageLocation(
69 Field->getType().getNonReferenceType())});
70
72 for (const auto &Entry : getSyntheticFields(Type))
73 SyntheticFields.insert(
74 {Entry.getKey(),
75 &createStorageLocation(Entry.getValue().getNonReferenceType())});
76
77 return createRecordStorageLocation(Type, std::move(FieldLocs),
78 std::move(SyntheticFields));
79 }
81}
82
83// Returns the keys for a given `StringMap`.
84// Can't use `StringSet` as the return type as it doesn't support `operator==`.
85template <typename T>
86static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
87 return llvm::DenseSet<llvm::StringRef>(llvm::from_range, Map.keys());
88}
89
93 assert(Type->isRecordType());
94 assert(containsSameFields(getModeledFields(Type), FieldLocs));
95 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
96
97 RecordStorageLocationCreated = true;
98 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
99 std::move(SyntheticFields));
100}
101
104 if (auto *Loc = DeclToLoc.lookup(&D))
105 return *Loc;
107 DeclToLoc[&D] = &Loc;
108 return Loc;
109}
110
113 const Expr &CanonE = ignoreCFGOmittedNodes(E);
114
115 if (auto *Loc = ExprToLoc.lookup(&CanonE))
116 return *Loc;
117 auto &Loc = createStorageLocation(CanonE.getType());
118 ExprToLoc[&CanonE] = &Loc;
119 return Loc;
120}
121
124 auto CanonicalPointeeType =
125 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
126 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
127 if (Res.second) {
128 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
129 Res.first->second = &arena().create<PointerValue>(PointeeLoc);
130 }
131 return *Res.first->second;
132}
133
135 if (Invariant == nullptr)
136 Invariant = &Constraint;
137 else
138 Invariant = &arena().makeAnd(*Invariant, Constraint);
139}
140
142 Atom Token, const Formula &Constraint) {
143 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
144 if (!Res.second) {
145 Res.first->second =
146 &arena().makeAnd(*Res.first->second, Constraint);
147 }
148}
149
151 Atom ForkToken = arena().makeFlowConditionToken();
152 FlowConditionDeps[ForkToken].insert(Token);
153 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
154 return ForkToken;
155}
156
157Atom
159 Atom SecondToken) {
161 auto &TokenDeps = FlowConditionDeps[Token];
162 TokenDeps.insert(FirstToken);
163 TokenDeps.insert(SecondToken);
165 arena().makeOr(arena().makeAtomRef(FirstToken),
166 arena().makeAtomRef(SecondToken)));
167 return Token;
168}
169
171 llvm::SetVector<const Formula *> Constraints) {
172 return S.solve(Constraints.getArrayRef());
173}
174
176 const Formula &F) {
177 if (F.isLiteral(true))
178 return true;
179
180 // Returns true if and only if truth assignment of the flow condition implies
181 // that `F` is also true. We prove whether or not this property holds by
182 // reducing the problem to satisfiability checking. In other words, we attempt
183 // to show that assuming `F` is false makes the constraints induced by the
184 // flow condition unsatisfiable.
185 llvm::SetVector<const Formula *> Constraints;
186 Constraints.insert(&arena().makeAtomRef(Token));
187 Constraints.insert(&arena().makeNot(F));
188 addTransitiveFlowConditionConstraints(Token, Constraints);
189 return isUnsatisfiable(std::move(Constraints));
190}
191
193 const Formula &F) {
194 if (F.isLiteral(false))
195 return false;
196
197 llvm::SetVector<const Formula *> Constraints;
198 Constraints.insert(&arena().makeAtomRef(Token));
199 Constraints.insert(&F);
200 addTransitiveFlowConditionConstraints(Token, Constraints);
201 return isSatisfiable(std::move(Constraints));
202}
203
205 const Formula &Val2) {
206 llvm::SetVector<const Formula *> Constraints;
207 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
208 return isUnsatisfiable(std::move(Constraints));
209}
210
211llvm::DenseSet<Atom> DataflowAnalysisContext::collectDependencies(
212 llvm::DenseSet<Atom> Tokens) const {
213 // Use a worklist algorithm, with `Remaining` holding the worklist and
214 // `Tokens` tracking which atoms have already been added to the worklist.
215 std::vector<Atom> Remaining(Tokens.begin(), Tokens.end());
216 while (!Remaining.empty()) {
217 Atom CurrentToken = Remaining.back();
218 Remaining.pop_back();
219 if (auto DepsIt = FlowConditionDeps.find(CurrentToken);
220 DepsIt != FlowConditionDeps.end())
221 for (Atom A : DepsIt->second)
222 if (Tokens.insert(A).second)
223 Remaining.push_back(A);
224 }
225
226 return Tokens;
227}
228
229void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
230 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
231 llvm::DenseSet<Atom> AddedTokens;
232 std::vector<Atom> Remaining = {Token};
233
234 if (Invariant)
235 Constraints.insert(Invariant);
236 // Define all the flow conditions that might be referenced in constraints.
237 while (!Remaining.empty()) {
238 auto Token = Remaining.back();
239 Remaining.pop_back();
240 if (!AddedTokens.insert(Token).second)
241 continue;
242
243 auto ConstraintsIt = FlowConditionConstraints.find(Token);
244 if (ConstraintsIt == FlowConditionConstraints.end()) {
245 // The flow condition is unconstrained. Just add the atom directly, which
246 // is equivalent to asserting it is true.
247 Constraints.insert(&arena().makeAtomRef(Token));
248 } else {
249 // Bind flow condition token via `iff` to its set of constraints:
250 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
251 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
252 *ConstraintsIt->second));
253 }
254
255 if (auto DepsIt = FlowConditionDeps.find(Token);
256 DepsIt != FlowConditionDeps.end())
257 for (Atom A : DepsIt->second)
258 Remaining.push_back(A);
259 }
260}
261
262static void getReferencedAtoms(const Formula &F,
263 llvm::DenseSet<dataflow::Atom> &Refs) {
264 switch (F.kind()) {
265 case Formula::AtomRef:
266 Refs.insert(F.getAtom());
267 break;
268 case Formula::Literal:
269 break;
270 case Formula::Not:
271 getReferencedAtoms(*F.operands()[0], Refs);
272 break;
273 case Formula::And:
274 case Formula::Or:
275 case Formula::Implies:
276 case Formula::Equal:
277 ArrayRef<const Formula *> Operands = F.operands();
278 getReferencedAtoms(*Operands[0], Refs);
279 getReferencedAtoms(*Operands[1], Refs);
280 break;
281 }
282}
283
285 llvm::DenseSet<dataflow::Atom> TargetTokens) const {
287
288 // Copy `Invariant` even if it is null, to initialize the field.
289 LC.Invariant = Invariant;
290 if (Invariant != nullptr)
291 getReferencedAtoms(*Invariant, TargetTokens);
292
293 llvm::DenseSet<dataflow::Atom> Dependencies =
294 collectDependencies(std::move(TargetTokens));
295
296 for (dataflow::Atom Token : Dependencies) {
297 // Only process the token if it is constrained. Unconstrained tokens don't
298 // have dependencies.
299 const Formula *Constraints = FlowConditionConstraints.lookup(Token);
300 if (Constraints == nullptr)
301 continue;
302 LC.TokenDefs[Token] = Constraints;
303
304 if (auto DepsIt = FlowConditionDeps.find(Token);
305 DepsIt != FlowConditionDeps.end())
306 LC.TokenDeps[Token] = DepsIt->second;
307 }
308
309 return LC;
310}
311
313 Invariant = LC.Invariant;
314 FlowConditionConstraints = std::move(LC.TokenDefs);
315 // TODO: The dependencies in `LC.TokenDeps` can be reconstructed from
316 // `LC.TokenDefs`. Give the caller the option to reconstruct, rather than
317 // providing them directly, to save caller space (memory/disk).
318 FlowConditionDeps = std::move(LC.TokenDeps);
319}
320
321static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
322 llvm::raw_ostream &OS) {
323 OS << "(";
324 for (size_t i = 0; i < Atoms.size(); ++i) {
325 OS << Atoms[i];
326 if (i + 1 < Atoms.size())
327 OS << ", ";
328 }
329 OS << ")\n";
330}
331
333 llvm::raw_ostream &OS) {
334 llvm::SetVector<const Formula *> Constraints;
335 Constraints.insert(&arena().makeAtomRef(Token));
336 addTransitiveFlowConditionConstraints(Token, Constraints);
337
338 OS << "Flow condition token: " << Token << "\n";
340 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
341 simplifyConstraints(Constraints, arena(), &Info);
342 if (!Constraints.empty()) {
343 OS << "Constraints:\n";
344 for (const auto *Constraint : Constraints) {
345 Constraint->print(OS);
346 OS << "\n";
347 }
348 }
349 if (!Info.TrueAtoms.empty()) {
350 OS << "True atoms: ";
351 printAtomList(Info.TrueAtoms, OS);
352 }
353 if (!Info.FalseAtoms.empty()) {
354 OS << "False atoms: ";
355 printAtomList(Info.FalseAtoms, OS);
356 }
357 if (!Info.EquivalentAtoms.empty()) {
358 OS << "Equivalent atoms:\n";
360 printAtomList(Class, OS);
361 }
362
363 OS << "\nFlow condition constraints before simplification:\n";
364 for (const auto *Constraint : OriginalConstraints) {
365 Constraint->print(OS);
366 OS << "\n";
367 }
368}
369
370const AdornedCFG *
372 // Canonicalize the key:
373 F = F->getDefinition();
374 if (F == nullptr)
375 return nullptr;
376 auto It = FunctionContexts.find(F);
377 if (It != FunctionContexts.end())
378 return &It->second;
379
381 auto ACFG = AdornedCFG::build(*F);
382 // FIXME: Handle errors.
383 assert(ACFG);
384 auto Result = FunctionContexts.insert({F, std::move(*ACFG)});
385 return &Result.first->second;
386 }
387
388 return nullptr;
389}
390
391static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
392 if (DataflowLog.empty())
393 return Logger::textual(llvm::errs());
394
395 llvm::StringRef Dir = DataflowLog;
396 if (auto EC = llvm::sys::fs::create_directories(Dir))
397 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
398 // All analysis runs within a process will log to the same directory.
399 // Share a counter so they don't all overwrite each other's 0.html.
400 // (Don't share a logger, it's not threadsafe).
401 static std::atomic<unsigned> Counter = {0};
402 auto StreamFactory =
403 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
405 llvm::sys::path::append(File,
406 std::to_string(Counter.fetch_add(1)) + ".html");
407 std::error_code EC;
408 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
409 if (EC) {
410 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
411 << "\n";
412 return std::make_unique<llvm::raw_null_ostream>();
413 }
414 return OS;
415 };
416 return Logger::html(std::move(StreamFactory));
417}
418
420 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
421 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
422 Opts(Opts) {
423 // If the -dataflow-log command-line flag was set, synthesize a logger.
424 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
425 // based tools.
426 if (Opts.Log == nullptr) {
427 if (DataflowLog.getNumOccurrences() > 0) {
428 LogOwner = makeLoggerFromCommandLine();
429 this->Opts.Log = LogOwner.get();
430 // FIXME: if the flag is given a value, write an HTML log to a file.
431 } else {
432 this->Opts.Log = &Logger::null();
433 }
434 }
435}
436
437DataflowAnalysisContext::~DataflowAnalysisContext() = default;
438
439} // namespace dataflow
440} // namespace clang
static llvm::cl::opt< std::string > DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function)."))
This represents one expression.
Definition Expr.h:112
QualType getType() const
Definition Expr.h:144
Represents a member of a struct/union/class.
Definition Decl.h:3157
Represents a function declaration or definition.
Definition Decl.h:1999
bool doesThisDeclarationHaveABody() const
Returns whether this specific declaration of the function has a body.
Definition Decl.h:2325
FunctionDecl * getDefinition()
Get the definition for this declaration.
Definition Decl.h:2281
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8470
QualType getCanonicalType() const
Definition TypeBase.h:8337
Token - This structure provides full information about a lexed token.
Definition Token.h:36
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isRecordType() const
Definition TypeBase.h:8649
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:711
QualType getType() const
Definition Decl.h:722
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition AdornedCFG.h:47
static llvm::Expected< AdornedCFG > build(const FunctionDecl &Func)
Builds an AdornedCFG from a FunctionDecl.
Atom makeFlowConditionToken()
Creates a fresh flow condition and returns a token that identifies it.
Definition Arena.h:124
const Formula & makeAnd(const Formula &LHS, const Formula &RHS)
Returns a formula for the conjunction of LHS and RHS.
Definition Arena.cpp:41
std::enable_if_t< std::is_base_of< StorageLocation, T >::value, T & > create(Args &&...args)
Creates a T (some subclass of StorageLocation), forwarding args to the constructor,...
Definition Arena.h:36
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
DataflowAnalysisContext(std::unique_ptr< Solver > S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
SimpleLogicalContext exportLogicalContext(llvm::DenseSet< dataflow::Atom > TargetTokens) const
Export the logical-context portions of AC, limited to the given target flow-condition tokens.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
void initLogicalContext(SimpleLogicalContext LC)
Initializes this context's "logical" components with LC.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
ArrayRef< const Formula * > operands() const
Definition Formula.h:82
bool isLiteral(bool b) const
Definition Formula.h:78
Atom getAtom() const
Definition Formula.h:68
@ Equal
True if LHS is false or RHS is true.
Definition Formula.h:64
@ Implies
True if either LHS or RHS is true.
Definition Formula.h:63
@ AtomRef
A reference to an atomic boolean variable.
Definition Formula.h:54
@ Literal
Constant true or false.
Definition Formula.h:56
@ Or
True if LHS and RHS are both true.
Definition Formula.h:62
@ And
True if its only operand is false.
Definition Formula.h:61
Kind kind() const
Definition Formula.h:66
static std::unique_ptr< Logger > textual(llvm::raw_ostream &)
A logger that simply writes messages to the specified ostream in real time.
Definition Logger.cpp:107
static std::unique_ptr< Logger > html(std::function< std::unique_ptr< llvm::raw_ostream >()>)
A logger that builds an HTML UI to inspect the analysis results.
Models a symbolic pointer. Specifically, any value of type T*.
Definition Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
A storage location that is not subdivided further for the purposes of abstract interpretation.
Base class for elements of the local variable store and of the heap.
static void getReferencedAtoms(const Formula &F, llvm::DenseSet< dataflow::Atom > &Refs)
Atom
Identifies an atomic boolean variable such as "V1".
Definition Formula.h:34
static void printAtomList(const llvm::SmallVector< Atom > &Atoms, llvm::raw_ostream &OS)
void simplifyConstraints(llvm::SetVector< const Formula * > &Constraints, Arena &arena, SimplifyConstraintsInfo *Info=nullptr)
Simplifies a set of constraints (implicitly connected by "and") in a way that does not change satisfi...
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
Definition ASTOps.cpp:35
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
Definition ASTOps.cpp:74
static std::unique_ptr< Logger > makeLoggerFromCommandLine()
static llvm::DenseSet< llvm::StringRef > getKeys(const llvm::StringMap< T > &Map)
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
Definition ASTOps.cpp:80
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition ASTOps.h:43
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
@ Invariant
The parameter is invariant: must match exactly.
Definition DeclObjC.h:555
@ Class
The "class" keyword introduces the elaborated-type-specifier.
Definition TypeBase.h:5874
A simple representation of essential elements of the logical context used in environments.
llvm::DenseMap< Atom, const Formula * > TokenDefs
llvm::DenseMap< Atom, llvm::DenseSet< Atom > > TokenDeps
Information on the way a set of constraints was simplified.
llvm::SmallVector< Atom > TrueAtoms
Atoms that the original constraints imply must be true.
llvm::SmallVector< llvm::SmallVector< Atom > > EquivalentAtoms
List of equivalence classes of atoms.
llvm::SmallVector< Atom > FalseAtoms
Atoms that the original constraints imply must be false.