clang 19.0.0git
DataflowAnalysisContext.cpp
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
16#include "clang/AST/ExprCXX.h"
22#include "llvm/ADT/SetOperations.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/Path.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cassert>
30#include <memory>
31#include <string>
32#include <utility>
33#include <vector>
34
35static llvm::cl::opt<std::string> DataflowLog(
36 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
38 "log to stderr. With an arg, writes HTML logs under the "
39 "specified directory (one per analyzed function)."));
40
41namespace clang {
42namespace dataflow {
43
45 // During context-sensitive analysis, a struct may be allocated in one
46 // function, but its field accessed in a function lower in the stack than
47 // the allocation. Since we only collect fields used in the function where
48 // the allocation occurs, we can't apply that filter when performing
49 // context-sensitive analysis. But, this only applies to storage locations,
50 // since field access it not allowed to fail. In contrast, field *values*
51 // don't need this allowance, since the API allows for uninitialized fields.
52 if (Opts.ContextSensitiveOpts)
53 return getObjectFields(Type);
54
55 return llvm::set_intersection(getObjectFields(Type), ModeledFields);
56}
57
58void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
59 ModeledFields.set_union(Fields);
60}
61
63 if (!Type.isNull() && Type->isRecordType()) {
64 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
65 for (const FieldDecl *Field : getModeledFields(Type))
66 if (Field->getType()->isReferenceType())
67 FieldLocs.insert({Field, nullptr});
68 else
69 FieldLocs.insert({Field, &createStorageLocation(
70 Field->getType().getNonReferenceType())});
71
73 for (const auto &Entry : getSyntheticFields(Type))
74 SyntheticFields.insert(
75 {Entry.getKey(),
76 &createStorageLocation(Entry.getValue().getNonReferenceType())});
77
78 return createRecordStorageLocation(Type, std::move(FieldLocs),
79 std::move(SyntheticFields));
80 }
81 return arena().create<ScalarStorageLocation>(Type);
82}
83
84// Returns the keys for a given `StringMap`.
85// Can't use `StringSet` as the return type as it doesn't support `operator==`.
86template <typename T>
87static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
88 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
89}
90
91RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
94 assert(Type->isRecordType());
95 assert(containsSameFields(getModeledFields(Type), FieldLocs));
96 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
97
98 RecordStorageLocationCreated = true;
99 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
100 std::move(SyntheticFields));
101}
102
104DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
105 if (auto *Loc = DeclToLoc.lookup(&D))
106 return *Loc;
107 auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
108 DeclToLoc[&D] = &Loc;
109 return Loc;
110}
111
113DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
114 const Expr &CanonE = ignoreCFGOmittedNodes(E);
115
116 if (auto *Loc = ExprToLoc.lookup(&CanonE))
117 return *Loc;
118 auto &Loc = createStorageLocation(CanonE.getType());
119 ExprToLoc[&CanonE] = &Loc;
120 return Loc;
121}
122
124DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
125 auto CanonicalPointeeType =
126 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
127 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
128 if (Res.second) {
129 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
130 Res.first->second = &arena().create<PointerValue>(PointeeLoc);
131 }
132 return *Res.first->second;
133}
134
135void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
136 if (Invariant == nullptr)
137 Invariant = &Constraint;
138 else
139 Invariant = &arena().makeAnd(*Invariant, Constraint);
140}
141
142void DataflowAnalysisContext::addFlowConditionConstraint(
143 Atom Token, const Formula &Constraint) {
144 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
145 if (!Res.second) {
146 Res.first->second =
147 &arena().makeAnd(*Res.first->second, Constraint);
148 }
149}
150
151Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
152 Atom ForkToken = arena().makeFlowConditionToken();
153 FlowConditionDeps[ForkToken].insert(Token);
154 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
155 return ForkToken;
156}
157
158Atom
159DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
160 Atom SecondToken) {
161 Atom Token = arena().makeFlowConditionToken();
162 FlowConditionDeps[Token].insert(FirstToken);
163 FlowConditionDeps[Token].insert(SecondToken);
164 addFlowConditionConstraint(Token,
165 arena().makeOr(arena().makeAtomRef(FirstToken),
166 arena().makeAtomRef(SecondToken)));
167 return Token;
168}
169
170Solver::Result DataflowAnalysisContext::querySolver(
171 llvm::SetVector<const Formula *> Constraints) {
172 return S->solve(Constraints.getArrayRef());
173}
174
175bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
176 const Formula &F) {
177 if (F.isLiteral(true))
178 return true;
179
180 // Returns true if and only if truth assignment of the flow condition implies
181 // that `F` is also true. We prove whether or not this property holds by
182 // reducing the problem to satisfiability checking. In other words, we attempt
183 // to show that assuming `F` is false makes the constraints induced by the
184 // flow condition unsatisfiable.
185 llvm::SetVector<const Formula *> Constraints;
186 Constraints.insert(&arena().makeAtomRef(Token));
187 Constraints.insert(&arena().makeNot(F));
188 addTransitiveFlowConditionConstraints(Token, Constraints);
189 return isUnsatisfiable(std::move(Constraints));
190}
191
192bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
193 const Formula &F) {
194 if (F.isLiteral(false))
195 return false;
196
197 llvm::SetVector<const Formula *> Constraints;
198 Constraints.insert(&arena().makeAtomRef(Token));
199 Constraints.insert(&F);
200 addTransitiveFlowConditionConstraints(Token, Constraints);
201 return isSatisfiable(std::move(Constraints));
202}
203
204bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
205 const Formula &Val2) {
206 llvm::SetVector<const Formula *> Constraints;
207 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
208 return isUnsatisfiable(std::move(Constraints));
209}
210
211void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
212 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
213 llvm::DenseSet<Atom> AddedTokens;
214 std::vector<Atom> Remaining = {Token};
215
216 if (Invariant)
217 Constraints.insert(Invariant);
218 // Define all the flow conditions that might be referenced in constraints.
219 while (!Remaining.empty()) {
220 auto Token = Remaining.back();
221 Remaining.pop_back();
222 if (!AddedTokens.insert(Token).second)
223 continue;
224
225 auto ConstraintsIt = FlowConditionConstraints.find(Token);
226 if (ConstraintsIt == FlowConditionConstraints.end()) {
227 Constraints.insert(&arena().makeAtomRef(Token));
228 } else {
229 // Bind flow condition token via `iff` to its set of constraints:
230 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
231 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
232 *ConstraintsIt->second));
233 }
234
235 if (auto DepsIt = FlowConditionDeps.find(Token);
236 DepsIt != FlowConditionDeps.end())
237 for (Atom A : DepsIt->second)
238 Remaining.push_back(A);
239 }
240}
241
242static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
243 llvm::raw_ostream &OS) {
244 OS << "(";
245 for (size_t i = 0; i < Atoms.size(); ++i) {
246 OS << Atoms[i];
247 if (i + 1 < Atoms.size())
248 OS << ", ";
249 }
250 OS << ")\n";
251}
252
253void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
254 llvm::raw_ostream &OS) {
255 llvm::SetVector<const Formula *> Constraints;
256 Constraints.insert(&arena().makeAtomRef(Token));
257 addTransitiveFlowConditionConstraints(Token, Constraints);
258
259 OS << "Flow condition token: " << Token << "\n";
261 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
262 simplifyConstraints(Constraints, arena(), &Info);
263 if (!Constraints.empty()) {
264 OS << "Constraints:\n";
265 for (const auto *Constraint : Constraints) {
266 Constraint->print(OS);
267 OS << "\n";
268 }
269 }
270 if (!Info.TrueAtoms.empty()) {
271 OS << "True atoms: ";
272 printAtomList(Info.TrueAtoms, OS);
273 }
274 if (!Info.FalseAtoms.empty()) {
275 OS << "False atoms: ";
276 printAtomList(Info.FalseAtoms, OS);
277 }
278 if (!Info.EquivalentAtoms.empty()) {
279 OS << "Equivalent atoms:\n";
281 printAtomList(Class, OS);
282 }
283
284 OS << "\nFlow condition constraints before simplification:\n";
285 for (const auto *Constraint : OriginalConstraints) {
286 Constraint->print(OS);
287 OS << "\n";
288 }
289}
290
291const ControlFlowContext *
292DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
293 // Canonicalize the key:
294 F = F->getDefinition();
295 if (F == nullptr)
296 return nullptr;
297 auto It = FunctionContexts.find(F);
298 if (It != FunctionContexts.end())
299 return &It->second;
300
302 auto CFCtx = ControlFlowContext::build(*F);
303 // FIXME: Handle errors.
304 assert(CFCtx);
305 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
306 return &Result.first->second;
307 }
308
309 return nullptr;
310}
311
312static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
313 if (DataflowLog.empty())
314 return Logger::textual(llvm::errs());
315
316 llvm::StringRef Dir = DataflowLog;
317 if (auto EC = llvm::sys::fs::create_directories(Dir))
318 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
319 // All analysis runs within a process will log to the same directory.
320 // Share a counter so they don't all overwrite each other's 0.html.
321 // (Don't share a logger, it's not threadsafe).
322 static std::atomic<unsigned> Counter = {0};
323 auto StreamFactory =
324 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
326 llvm::sys::path::append(File,
327 std::to_string(Counter.fetch_add(1)) + ".html");
328 std::error_code EC;
329 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
330 if (EC) {
331 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
332 << "\n";
333 return std::make_unique<llvm::raw_null_ostream>();
334 }
335 return OS;
336 };
337 return Logger::html(std::move(StreamFactory));
338}
339
340DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
341 Options Opts)
342 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
343 assert(this->S != nullptr);
344 // If the -dataflow-log command-line flag was set, synthesize a logger.
345 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
346 // based tools.
347 if (Opts.Log == nullptr) {
348 if (DataflowLog.getNumOccurrences() > 0) {
349 LogOwner = makeLoggerFromCommandLine();
350 this->Opts.Log = LogOwner.get();
351 // FIXME: if the flag is given a value, write an HTML log to a file.
352 } else {
353 this->Opts.Log = &Logger::null();
354 }
355 }
356}
357
359
360} // namespace dataflow
361} // namespace clang
362
363using namespace clang;
364
366 const Expr *Current = &E;
367 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
368 Current = EWC->getSubExpr();
369 assert(Current != nullptr);
370 }
371 Current = Current->IgnoreParens();
372 assert(Current != nullptr);
373 return *Current;
374}
375
377 if (auto *E = dyn_cast<Expr>(&S))
378 return ignoreCFGOmittedNodes(*E);
379 return S;
380}
381
382// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
383// field decl will be modeled for all instances of the inherited field.
387 !Type->isRecordType())
388 return;
389
390 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
391 Fields.insert(Field);
392 if (auto *CXXRecord = Type->getAsCXXRecordDecl())
393 for (const CXXBaseSpecifier &Base : CXXRecord->bases())
394 getFieldsFromClassHierarchy(Base.getType(), Fields);
395}
396
397/// Gets the set of all fields in the type.
399 FieldSet Fields;
401 return Fields;
402}
403
405 const clang::dataflow::FieldSet &Fields,
407 if (Fields.size() != FieldLocs.size())
408 return false;
409 for ([[maybe_unused]] auto [Field, Loc] : FieldLocs)
410 if (!Fields.contains(cast_or_null<FieldDecl>(Field)))
411 return false;
412 return true;
413}
static llvm::cl::opt< std::string > DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function)."))
static void getFieldsFromClassHierarchy(QualType Type, clang::dataflow::FieldSet &Fields)
Defines the clang::Expr interface and subclasses for C++ expressions.
const ControlFlowContext & CFCtx
Contains the CFG being analyzed.
Represents a base class of a C++ class.
Definition: DeclCXX.h:146
This represents one expression.
Definition: Expr.h:110
QualType getType() const
Definition: Expr.h:142
Represents a member of a struct/union/class.
Definition: Decl.h:3025
Represents a function declaration or definition.
Definition: Decl.h:1959
bool doesThisDeclarationHaveABody() const
Returns whether this specific declaration of the function has a body.
Definition: Decl.h:2270
FunctionDecl * getDefinition()
Get the definition for this declaration.
Definition: Decl.h:2226
A (possibly-)qualified type.
Definition: Type.h:737
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:804
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7101
QualType getCanonicalType() const
Definition: Type.h:6953
field_range fields() const
Definition: Decl.h:4339
Stmt - This represents one statement.
Definition: Stmt.h:84
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1606
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1819
bool isDependentType() const
Whether this type is a dependent type, meaning that its definition somehow depends on a template para...
Definition: Type.h:2419
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
Definition: Type.cpp:2299
bool isRecordType() const
Definition: Type.h:7243
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1823
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Holds CFG and other derived context that is needed to perform dataflow analysis.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
bool isLiteral(bool b) const
Definition: Formula.h:78
static Logger & null()
Returns a dummy logger that does nothing.
Definition: Logger.cpp:16
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:172
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
A storage location that is not subdivided further for the purposes of abstract interpretation.
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
static void printAtomList(const llvm::SmallVector< Atom > &Atoms, llvm::raw_ostream &OS)
void simplifyConstraints(llvm::SetVector< const Formula * > &Constraints, Arena &arena, SimplifyConstraintsInfo *Info=nullptr)
Simplifies a set of constraints (implicitly connected by "and") in a way that does not change satisfi...
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
static std::unique_ptr< Logger > makeLoggerFromCommandLine()
static llvm::DenseSet< llvm::StringRef > getKeys(const llvm::StringMap< T > &Map)
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
@ Invariant
The parameter is invariant: must match exactly.
@ Class
The "class" keyword introduces the elaborated-type-specifier.
Definition: Format.h:5304
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Information on the way a set of constraints was simplified.
llvm::SmallVector< Atom > TrueAtoms
Atoms that the original constraints imply must be true.
llvm::SmallVector< llvm::SmallVector< Atom > > EquivalentAtoms
List of equivalence classes of atoms.
llvm::SmallVector< Atom > FalseAtoms
Atoms that the original constraints imply must be false.