clang 23.0.0git
PointerFlowExtractor.cpp
Go to the documentation of this file.
1//===- PointerFlowExtractor.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "clang/AST/Decl.h"
13#include "clang/AST/DeclCXX.h"
14#include "clang/AST/Expr.h"
15#include "clang/AST/ExprCXX.h"
16#include "clang/AST/Stmt.h"
17#include "clang/AST/TypeBase.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/STLFunctionalExtras.h"
27#include "llvm/ADT/Sequence.h"
28#include "llvm/Support/Error.h"
29#include <memory>
30#include <optional>
31
32namespace clang::ssaf {
34} // namespace clang::ssaf
35
36namespace {
37using namespace clang;
38using namespace ssaf;
39
40class PointerFlowMatcher {
41public:
42 EdgeSet Results;
43 ASTContext &Ctx;
44 TUSummaryExtractor &Extractor;
45
46 PointerFlowMatcher(ASTContext &Ctx, TUSummaryExtractor &Extractor)
47 : Ctx(Ctx), Extractor(Extractor) {}
48
49 llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl);
50
51 llvm::Error matchesInitializerList(const ValueDecl *Base,
52 const Expr *InitExpr,
53 unsigned ArrayElementIndirectLevel = 0);
54
55 llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl);
56
57 llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl);
58
59private:
60 std::function<EntityId(const EntityName &)> AddEntity;
61
62 Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N,
63 bool IsRet = false) const;
64
65 Expected<EntityPointerLevelSet> toEPL(const Expr *N) const;
66
67 llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS,
68 Expected<EntityPointerLevelSet> &&RHS);
69
70 template <typename ParmsProvider, typename ArgsProvider>
71 llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP,
72 ArgsProvider *AP) {
73 unsigned ArgIdx = ArgIdxStart;
74
75 for (unsigned ParmIdx = 0;
76 ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs();
77 ++ArgIdx, ++ParmIdx) {
78 if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx);
79 PD && hasPtrOrArrType(PD)) {
80 if (auto Err = addEdges(toEPL(PD), toEPL(AP->getArg(ArgIdx))))
81 return Err;
82 }
83 }
84 return llvm::Error::success();
85 }
86};
87
88Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N,
89 bool IsRet) const {
90 auto Ret = createEntityPointerLevel(N, Extractor, IsRet);
91
92 if (Ret)
93 return EntityPointerLevelSet{*Ret};
94 return Ret.takeError();
95}
96
97Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const {
98 return translateEntityPointerLevel(N, Ctx, Extractor);
99}
100
101llvm::Error
102PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS,
103 Expected<EntityPointerLevelSet> &&RHS) {
104 if (!LHS && !RHS)
105 return llvm::joinErrors(LHS.takeError(), RHS.takeError());
106 if (!LHS)
107 return LHS.takeError();
108 if (!RHS)
109 return RHS.takeError();
110 for (auto L : *LHS)
111 Results[L].insert(RHS->begin(), RHS->end());
112 return llvm::Error::success();
113}
114
115/// Match and extract pointer flow.
116/// The extraction function 'XF' can be described by the following rules:
117///
118/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))"
119/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ...
120/// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return
121/// entity of the enclosing
122/// function
123/// XF(ctor(a, ...) : x1(y1), ... {...})
124/// := XF(Param_1 = a), ...,
125/// XF(x1 = y1), ...,
126/// ctor's body will be visited separately.
127/// XF(T var = e) := XF(var = e)
128/// XF(T var = init-list) := see \ref
129/// PointerFlowMatcher::matchInitializerList
130llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode,
131 const NamedDecl *RootDecl) {
132 if (const Stmt *S = DynNode.get<Stmt>())
133 return matchesStmt(S, RootDecl);
134 if (const Decl *D = DynNode.get<Decl>())
135 return matchesDecl(D, RootDecl);
136 return llvm::Error::success();
137}
138
139llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S,
140 const NamedDecl *RootDecl) {
141 // Match 'p = q' whenever it has pointer or array type:
142 if (const auto *BO = dyn_cast<BinaryOperator>(S);
143 BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) {
144 return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS()));
145 }
146
147 // Match arg-to-param passing (in CallExpr) for any pointer type argument:
148 if (const auto *CE = dyn_cast<CallExpr>(S)) {
149 const FunctionDecl *FD = CE->getDirectCallee();
150
151 if (!FD)
152 return llvm::Error::success();
153
154 unsigned ArgIdx = 0;
155
157 if (auto *MD = dyn_cast<CXXMethodDecl>(FD);
158 MD && !MD->isExplicitObjectMemberFunction())
159 ArgIdx = 1;
160 return matchesArgsWithParams(ArgIdx, FD, CE);
161 }
162 // Match arg-to-param passing (in CXXConstructExpr) for any pointer type
163 // argument:
164 if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) {
165 return matchesArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), CCE);
166 }
167 if (const auto *RS = dyn_cast<ReturnStmt>(S)) {
168 const Expr *RetExpr = RS->getRetValue();
169 if (!RetExpr || !hasPtrOrArrType(RetExpr))
170 return llvm::Error::success();
171 return addEdges(toEPL(RootDecl, true), toEPL(RetExpr));
172 }
173 return llvm::Error::success();
174}
175
176llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D,
177 const NamedDecl *RootDecl) {
178 const Expr *InitExpr = nullptr;
179
180 if (const auto *VD = dyn_cast<ValueDecl>(D)) {
181 if (const auto *Var = dyn_cast<VarDecl>(VD))
182 InitExpr = Var->getInit();
183 if (const auto *Fd = dyn_cast<FieldDecl>(VD))
184 InitExpr = Fd->getInClassInitializer();
185
186 // Match initializer-list:
187 if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr))
188 return matchesInitializerList(VD, InitLst);
189
190 // Match initializers to variables/fields of a pointer type:
191 if (InitExpr && hasPtrOrArrType(VD))
192 return addEdges(toEPL(VD), toEPL(InitExpr));
193 }
194
195 // Match C++ constructor member-initializers:
196 if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) {
197 for (auto *E : CtorD->inits()) {
198 if (E->isDelegatingInitializer())
199 return matches(DynTypedNode::create(*E->getInit()), RootDecl);
200 if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) {
201 if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit())))
202 return Err;
203 }
204 }
205 }
206 return llvm::Error::success();
207}
208
209// Helper function for matchInitializerList that handles record:
210llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher,
211 const RecordDecl *RecordTy,
212 const InitListExpr *ILE) {
213 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy))
214 if (CXXRD->getNumBases() != 0) {
215 // FIXME: support this:
216 return makeErrAtNode(
217 Matcher.Ctx, ILE,
218 "attempt to create pointer assignment edges between "
219 "CXXRecordDecls with base classes and initializer-lists");
220 }
221 // Handle union:
222 if (RecordTy->isUnion()) {
224
225 if (!InitField)
226 return llvm::Error::success();
227 assert(!ILE->inits().empty());
228 return Matcher.matchesInitializerList(InitField, ILE->getInit(0));
229 }
230 // Handle struct/class:
231 ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm();
232
233 auto FieldIter = RecordTy->field_begin();
234
235 assert(RecordTy->getNumFields() >= ILE->getNumInits());
236 for (auto *Init : ILE->inits())
237 if (auto Err = Matcher.matchesInitializerList(*(FieldIter++), Init))
238 return Err;
239 return llvm::Error::success();
240}
241
242// Helper function for matchInitializerList that handles array:
243llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher,
244 const ValueDecl *Array,
245 const InitListExpr *ILE,
246 unsigned ArrayIndirectLevel = 0) {
247 for (auto *E : ILE->inits())
248 if (auto Err =
249 Matcher.matchesInitializerList(Array, E, ArrayIndirectLevel + 1))
250 return Err;
251 return llvm::Error::success();
252}
253
254/// Match initializer lists of the form 'Var = {a, b, c, ...}':
255///
256/// If 'Var' is a struct/union:
257/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a)
258/// XF(Var.field_2 = b)
259/// ...
260/// If 'Var' is an array:
261/// XF(Var = {a, b, c, ...}) := XF(*Var = a)
262/// XF(*Var = b)
263/// ...
264///
265/// The process is recursive: 'a', 'b', 'c', ... may themselves be
266/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep
267/// track of the pointer level the left-hand side.
268llvm::Error
269PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base,
270 const Expr *InitExpr,
271 unsigned ArrayElementIndirectLevel) {
272 const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr);
273
274 if (!ILE) {
275 if (!hasPtrOrArrType(InitExpr))
276 return llvm::Error::success();
277
278 auto BaseEPL = toEPL(Base);
279
280 if (!BaseEPL)
281 return BaseEPL.takeError();
282
283 // Apply ArrayElementIndirectLevel to BaseEPL
284 auto R = llvm::map_range(*BaseEPL, [&ArrayElementIndirectLevel](
285 const EntityPointerLevel &EPL) {
286 EntityPointerLevel Result = EPL;
287 for ([[maybe_unused]] auto Ignored : llvm::seq(ArrayElementIndirectLevel))
289 return Result;
290 });
291 return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, toEPL(InitExpr));
292 }
293 // Note that `Base`'s type is NOT the real LHS type when
294 // ArrayElementIndirectLevel > 0:
295 QualType Type = InitExpr->getType();
296
297 if (auto *RD = Type->getAsRecordDecl())
298 return matchInitializerListForRecordDecl(*this, RD, ILE);
299 if (Type->isArrayType())
300 return matchInitializerListForArray(*this, Base, ILE,
301 ArrayElementIndirectLevel);
302 // Must be the case of using a initializer-list for a scalar:
303 return matchesInitializerList(Base, ILE->getInit(0));
304}
305
306class PointerFlowTUSummaryExtractor : public TUSummaryExtractor {
307public:
308 PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder)
309 : TUSummaryExtractor(Builder) {}
310
311 Expected<std::unique_ptr<PointerFlowEntitySummary>>
312 extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx,
313 TUSummaryExtractor &Extractor) {
314 PointerFlowMatcher Matcher(Ctx, Extractor);
315 auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) {
316 auto Err = Matcher.matches(Node, Contributor);
317
318 if (Err)
319 llvm::report_fatal_error(std::move(Err));
320 };
321
322 findMatchesIn(Contributor, MatchAction);
323 return std::make_unique<PointerFlowEntitySummary>(
324 buildPointerFlowEntitySummary(std::move(Matcher.Results)));
325 }
326
327 void HandleTranslationUnit(ASTContext &Ctx) override {
328 std::vector<const NamedDecl *> Contributors;
329
330 findContributors(Ctx, Contributors);
331 for (auto *CD : Contributors) {
332 auto EntitySummary = extractEntitySummary(CD, Ctx, *this);
333
334 if (!EntitySummary)
335 llvm::reportFatalInternalError(EntitySummary.takeError());
336 assert(*EntitySummary);
337 if ((*EntitySummary)->empty())
338 continue;
339
340 std::optional<EntityId> ContributorId = addEntity(CD);
341 if (!ContributorId)
342 llvm::reportFatalInternalError(makeEntityNameErr(Ctx, CD));
343
344 [[maybe_unused]] auto [_, InsertionSucceeded] =
345 SummaryBuilder.addSummary(*ContributorId, std::move(*EntitySummary));
346
347 assert(InsertionSucceeded && "duplicated contributor extraction");
348 }
349 }
350};
351} // namespace
352
353// NOLINTNEXTLINE(misc-use-internal-linkage)
355
356static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor>
358 "Extract pointer flow information");
Defines the clang::ASTContext interface.
static TUSummaryExtractorRegistry::Add< CallGraphExtractor > RegisterExtractor(CallGraphSummary::Name, "Extracts static call-graph information")
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
Result
Implement __builtin_bit_cast and related operations.
llvm::json::Array Array
volatile int PointerFlowTUSummaryExtractorAnchorSource
C Language Family Type Representation.
const T * get() const
Retrieve the stored node as type T.
QualType getType() const
Definition Expr.h:144
FieldDecl * getInitializedFieldInUnion()
If this initializes a union, specifies which field in the union to initialize.
Definition Expr.h:5429
unsigned getNumInits() const
Definition Expr.h:5335
bool isSemanticForm() const
Definition Expr.h:5465
InitListExpr * getSemanticForm() const
Definition Expr.h:5466
const Expr * getInit(unsigned Init) const
Definition Expr.h:5357
ArrayRef< Expr * > inits() const
Definition Expr.h:5355
This represents a decl that may have a name.
Definition Decl.h:274
unsigned getNumFields() const
Returns the number of fields (non-static data members) in this record.
Definition Decl.h:4559
field_iterator field_begin() const
Definition Decl.cpp:5270
bool isUnion() const
Definition Decl.h:3946
static constexpr llvm::StringLiteral Name
Definition PointerFlow.h:35
DynTypedNode DynTypedNode
bool InitField(InterpState &S, CodePtr OpPC, uint32_t I)
1) Pops the value from the stack 2) Peeks a pointer from the stack 3) Pushes the value to field I of ...
Definition Interp.h:1873
PRESERVE_NONE bool Ret(InterpState &S, CodePtr &PC)
Definition Interp.h:258
llvm::Error makeErrAtNode(clang::ASTContext &Ctx, const NodeTy *N, llvm::StringRef Fmt, const Ts &...Args)
std::map< EntityPointerLevel, EntityPointerLevelSet > EdgeSet
Maps each source node to its destination nodes:
Definition PointerFlow.h:23
EntityPointerLevel incrementPointerLevel(const EntityPointerLevel &E)
An EntityPointerLevel is associated with a level of the declared pointer/array type of an entity.
void findContributors(ASTContext &Ctx, std::vector< const NamedDecl * > &Contributors)
Find all contributors in an AST.
llvm::Error makeEntityNameErr(clang::ASTContext &Ctx, const clang::NamedDecl *D)
void findMatchesIn(const NamedDecl *Contributor, llvm::function_ref< void(const DynTypedNode &)> MatchActionRef)
Perform "MatchAction" on each Stmt and Decl belonging to the Contributor.
PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges)
bool hasPtrOrArrType(const DeclOrExpr *E)
bool matches(const til::SExpr *E1, const til::SExpr *E2)
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ Type
The name was classified as a type.
Definition Sema.h:564
int const char * function
Definition c++config.h:31