clang 19.0.0git
DataflowAnalysisContext.cpp
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
16#include "clang/AST/ExprCXX.h"
23#include "llvm/ADT/SetOperations.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/raw_ostream.h"
30#include <cassert>
31#include <memory>
32#include <string>
33#include <utility>
34#include <vector>
35
36static llvm::cl::opt<std::string> DataflowLog(
37 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
38 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
39 "log to stderr. With an arg, writes HTML logs under the "
40 "specified directory (one per analyzed function)."));
41
42namespace clang {
43namespace dataflow {
44
46 // During context-sensitive analysis, a struct may be allocated in one
47 // function, but its field accessed in a function lower in the stack than
48 // the allocation. Since we only collect fields used in the function where
49 // the allocation occurs, we can't apply that filter when performing
50 // context-sensitive analysis. But, this only applies to storage locations,
51 // since field access it not allowed to fail. In contrast, field *values*
52 // don't need this allowance, since the API allows for uninitialized fields.
53 if (Opts.ContextSensitiveOpts)
54 return getObjectFields(Type);
55
56 return llvm::set_intersection(getObjectFields(Type), ModeledFields);
57}
58
59void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
60 ModeledFields.set_union(Fields);
61}
62
64 if (!Type.isNull() && Type->isRecordType()) {
65 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
66 for (const FieldDecl *Field : getModeledFields(Type))
67 if (Field->getType()->isReferenceType())
68 FieldLocs.insert({Field, nullptr});
69 else
70 FieldLocs.insert({Field, &createStorageLocation(
71 Field->getType().getNonReferenceType())});
72
74 for (const auto &Entry : getSyntheticFields(Type))
75 SyntheticFields.insert(
76 {Entry.getKey(),
77 &createStorageLocation(Entry.getValue().getNonReferenceType())});
78
79 return createRecordStorageLocation(Type, std::move(FieldLocs),
80 std::move(SyntheticFields));
81 }
82 return arena().create<ScalarStorageLocation>(Type);
83}
84
85// Returns the keys for a given `StringMap`.
86// Can't use `StringSet` as the return type as it doesn't support `operator==`.
87template <typename T>
88static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
89 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
90}
91
92RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
95 assert(Type->isRecordType());
96 assert(containsSameFields(getModeledFields(Type), FieldLocs));
97 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
98
99 RecordStorageLocationCreated = true;
100 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
101 std::move(SyntheticFields));
102}
103
105DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
106 if (auto *Loc = DeclToLoc.lookup(&D))
107 return *Loc;
108 auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
109 DeclToLoc[&D] = &Loc;
110 return Loc;
111}
112
114DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
115 const Expr &CanonE = ignoreCFGOmittedNodes(E);
116
117 if (auto *Loc = ExprToLoc.lookup(&CanonE))
118 return *Loc;
119 auto &Loc = createStorageLocation(CanonE.getType());
120 ExprToLoc[&CanonE] = &Loc;
121 return Loc;
122}
123
125DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
126 auto CanonicalPointeeType =
127 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
128 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
129 if (Res.second) {
130 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
131 Res.first->second = &arena().create<PointerValue>(PointeeLoc);
132 }
133 return *Res.first->second;
134}
135
136void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
137 if (Invariant == nullptr)
138 Invariant = &Constraint;
139 else
140 Invariant = &arena().makeAnd(*Invariant, Constraint);
141}
142
143void DataflowAnalysisContext::addFlowConditionConstraint(
144 Atom Token, const Formula &Constraint) {
145 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
146 if (!Res.second) {
147 Res.first->second =
148 &arena().makeAnd(*Res.first->second, Constraint);
149 }
150}
151
152Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
153 Atom ForkToken = arena().makeFlowConditionToken();
154 FlowConditionDeps[ForkToken].insert(Token);
155 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
156 return ForkToken;
157}
158
159Atom
160DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
161 Atom SecondToken) {
162 Atom Token = arena().makeFlowConditionToken();
163 FlowConditionDeps[Token].insert(FirstToken);
164 FlowConditionDeps[Token].insert(SecondToken);
165 addFlowConditionConstraint(Token,
166 arena().makeOr(arena().makeAtomRef(FirstToken),
167 arena().makeAtomRef(SecondToken)));
168 return Token;
169}
170
171Solver::Result DataflowAnalysisContext::querySolver(
172 llvm::SetVector<const Formula *> Constraints) {
173 return S.solve(Constraints.getArrayRef());
174}
175
176bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
177 const Formula &F) {
178 if (F.isLiteral(true))
179 return true;
180
181 // Returns true if and only if truth assignment of the flow condition implies
182 // that `F` is also true. We prove whether or not this property holds by
183 // reducing the problem to satisfiability checking. In other words, we attempt
184 // to show that assuming `F` is false makes the constraints induced by the
185 // flow condition unsatisfiable.
186 llvm::SetVector<const Formula *> Constraints;
187 Constraints.insert(&arena().makeAtomRef(Token));
188 Constraints.insert(&arena().makeNot(F));
189 addTransitiveFlowConditionConstraints(Token, Constraints);
190 return isUnsatisfiable(std::move(Constraints));
191}
192
193bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
194 const Formula &F) {
195 if (F.isLiteral(false))
196 return false;
197
198 llvm::SetVector<const Formula *> Constraints;
199 Constraints.insert(&arena().makeAtomRef(Token));
200 Constraints.insert(&F);
201 addTransitiveFlowConditionConstraints(Token, Constraints);
202 return isSatisfiable(std::move(Constraints));
203}
204
205bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
206 const Formula &Val2) {
207 llvm::SetVector<const Formula *> Constraints;
208 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
209 return isUnsatisfiable(std::move(Constraints));
210}
211
212void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
213 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
214 llvm::DenseSet<Atom> AddedTokens;
215 std::vector<Atom> Remaining = {Token};
216
217 if (Invariant)
218 Constraints.insert(Invariant);
219 // Define all the flow conditions that might be referenced in constraints.
220 while (!Remaining.empty()) {
221 auto Token = Remaining.back();
222 Remaining.pop_back();
223 if (!AddedTokens.insert(Token).second)
224 continue;
225
226 auto ConstraintsIt = FlowConditionConstraints.find(Token);
227 if (ConstraintsIt == FlowConditionConstraints.end()) {
228 Constraints.insert(&arena().makeAtomRef(Token));
229 } else {
230 // Bind flow condition token via `iff` to its set of constraints:
231 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
232 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
233 *ConstraintsIt->second));
234 }
235
236 if (auto DepsIt = FlowConditionDeps.find(Token);
237 DepsIt != FlowConditionDeps.end())
238 for (Atom A : DepsIt->second)
239 Remaining.push_back(A);
240 }
241}
242
243static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
244 llvm::raw_ostream &OS) {
245 OS << "(";
246 for (size_t i = 0; i < Atoms.size(); ++i) {
247 OS << Atoms[i];
248 if (i + 1 < Atoms.size())
249 OS << ", ";
250 }
251 OS << ")\n";
252}
253
254void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
255 llvm::raw_ostream &OS) {
256 llvm::SetVector<const Formula *> Constraints;
257 Constraints.insert(&arena().makeAtomRef(Token));
258 addTransitiveFlowConditionConstraints(Token, Constraints);
259
260 OS << "Flow condition token: " << Token << "\n";
262 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
263 simplifyConstraints(Constraints, arena(), &Info);
264 if (!Constraints.empty()) {
265 OS << "Constraints:\n";
266 for (const auto *Constraint : Constraints) {
267 Constraint->print(OS);
268 OS << "\n";
269 }
270 }
271 if (!Info.TrueAtoms.empty()) {
272 OS << "True atoms: ";
273 printAtomList(Info.TrueAtoms, OS);
274 }
275 if (!Info.FalseAtoms.empty()) {
276 OS << "False atoms: ";
277 printAtomList(Info.FalseAtoms, OS);
278 }
279 if (!Info.EquivalentAtoms.empty()) {
280 OS << "Equivalent atoms:\n";
282 printAtomList(Class, OS);
283 }
284
285 OS << "\nFlow condition constraints before simplification:\n";
286 for (const auto *Constraint : OriginalConstraints) {
287 Constraint->print(OS);
288 OS << "\n";
289 }
290}
291
292const AdornedCFG *
293DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
294 // Canonicalize the key:
295 F = F->getDefinition();
296 if (F == nullptr)
297 return nullptr;
298 auto It = FunctionContexts.find(F);
299 if (It != FunctionContexts.end())
300 return &It->second;
301
303 auto ACFG = AdornedCFG::build(*F);
304 // FIXME: Handle errors.
305 assert(ACFG);
306 auto Result = FunctionContexts.insert({F, std::move(*ACFG)});
307 return &Result.first->second;
308 }
309
310 return nullptr;
311}
312
313static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
314 if (DataflowLog.empty())
315 return Logger::textual(llvm::errs());
316
317 llvm::StringRef Dir = DataflowLog;
318 if (auto EC = llvm::sys::fs::create_directories(Dir))
319 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
320 // All analysis runs within a process will log to the same directory.
321 // Share a counter so they don't all overwrite each other's 0.html.
322 // (Don't share a logger, it's not threadsafe).
323 static std::atomic<unsigned> Counter = {0};
324 auto StreamFactory =
325 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
327 llvm::sys::path::append(File,
328 std::to_string(Counter.fetch_add(1)) + ".html");
329 std::error_code EC;
330 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
331 if (EC) {
332 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
333 << "\n";
334 return std::make_unique<llvm::raw_null_ostream>();
335 }
336 return OS;
337 };
338 return Logger::html(std::move(StreamFactory));
339}
340
341DataflowAnalysisContext::DataflowAnalysisContext(
342 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
343 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
344 Opts(Opts) {
345 // If the -dataflow-log command-line flag was set, synthesize a logger.
346 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
347 // based tools.
348 if (Opts.Log == nullptr) {
349 if (DataflowLog.getNumOccurrences() > 0) {
350 LogOwner = makeLoggerFromCommandLine();
351 this->Opts.Log = LogOwner.get();
352 // FIXME: if the flag is given a value, write an HTML log to a file.
353 } else {
354 this->Opts.Log = &Logger::null();
355 }
356 }
357}
358
359DataflowAnalysisContext::~DataflowAnalysisContext() = default;
360
361} // namespace dataflow
362} // namespace clang
static llvm::cl::opt< std::string > DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function)."))
Defines the clang::Expr interface and subclasses for C++ expressions.
SourceLocation Loc
Definition: SemaObjC.cpp:758
This represents one expression.
Definition: Expr.h:110
QualType getType() const
Definition: Expr.h:142
Represents a member of a struct/union/class.
Definition: Decl.h:3059
Represents a function declaration or definition.
Definition: Decl.h:1971
bool doesThisDeclarationHaveABody() const
Returns whether this specific declaration of the function has a body.
Definition: Decl.h:2297
FunctionDecl * getDefinition()
Get the definition for this declaration.
Definition: Decl.h:2253
A (possibly-)qualified type.
Definition: Type.h:940
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1007
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7573
QualType getCanonicalType() const
Definition: Type.h:7424
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1827
bool isRecordType() const
Definition: Type.h:7719
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
bool isLiteral(bool b) const
Definition: Formula.h:78
static Logger & null()
Returns a dummy logger that does nothing.
Definition: Logger.cpp:16
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
A storage location that is not subdivided further for the purposes of abstract interpretation.
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
static void printAtomList(const llvm::SmallVector< Atom > &Atoms, llvm::raw_ostream &OS)
void simplifyConstraints(llvm::SetVector< const Formula * > &Constraints, Arena &arena, SimplifyConstraintsInfo *Info=nullptr)
Simplifies a set of constraints (implicitly connected by "and") in a way that does not change satisfi...
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
Definition: ASTOps.cpp:34
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
Definition: ASTOps.cpp:74
static std::unique_ptr< Logger > makeLoggerFromCommandLine()
static llvm::DenseSet< llvm::StringRef > getKeys(const llvm::StringMap< T > &Map)
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
Definition: ASTOps.cpp:80
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
@ Invariant
The parameter is invariant: must match exactly.
@ Class
The "class" keyword introduces the elaborated-type-specifier.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Information on the way a set of constraints was simplified.
llvm::SmallVector< Atom > TrueAtoms
Atoms that the original constraints imply must be true.
llvm::SmallVector< llvm::SmallVector< Atom > > EquivalentAtoms
List of equivalence classes of atoms.
llvm::SmallVector< Atom > FalseAtoms
Atoms that the original constraints imply must be false.