clang 23.0.0git
PointerFlowExtractor.cpp
Go to the documentation of this file.
1//===- PointerFlowExtractor.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "clang/AST/Decl.h"
13#include "clang/AST/DeclCXX.h"
14#include "clang/AST/Expr.h"
15#include "clang/AST/ExprCXX.h"
16#include "clang/AST/Stmt.h"
17#include "clang/AST/TypeBase.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/STLFunctionalExtras.h"
28#include "llvm/ADT/Sequence.h"
29#include "llvm/Support/Error.h"
30#include <memory>
31#include <optional>
32
33namespace clang::ssaf {
35} // namespace clang::ssaf
36
37namespace {
38using namespace clang;
39using namespace ssaf;
40
41class PointerFlowMatcher {
42public:
43 EdgeSet Results;
44 ASTContext &Ctx;
45
46 PointerFlowMatcher(ASTContext &Ctx,
47 std::function<EntityId(const EntityName &)> AddEntity)
48 : Ctx(Ctx), AddEntity(std::move(AddEntity)) {}
49
50 llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl);
51
52 llvm::Error matchesInitializerList(const ValueDecl *Base,
53 const Expr *InitExpr,
54 unsigned ArrayElementIndirectLevel = 0);
55
56 llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl);
57
58 llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl);
59
60private:
61 std::function<EntityId(const EntityName &)> AddEntity;
62
63 Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N,
64 bool IsRet = false) const;
65
66 Expected<EntityPointerLevelSet> toEPL(const Expr *N) const;
67
68 llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS,
69 Expected<EntityPointerLevelSet> &&RHS);
70
71 template <typename ParmsProvider, typename ArgsProvider>
72 llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP,
73 ArgsProvider *AP) {
74 unsigned ArgIdx = ArgIdxStart;
75
76 for (unsigned ParmIdx = 0;
77 ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs();
78 ++ArgIdx, ++ParmIdx) {
79 if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx);
80 PD && hasPtrOrArrType(PD)) {
81 if (auto Err = addEdges(toEPL(PD), toEPL(AP->getArg(ArgIdx))))
82 return Err;
83 }
84 }
85 return llvm::Error::success();
86 }
87};
88
89Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N,
90 bool IsRet) const {
91 auto Ret = createEntityPointerLevel(N, AddEntity, IsRet);
92
93 if (Ret)
94 return EntityPointerLevelSet{*Ret};
95 return Ret.takeError();
96}
97
98Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const {
99 return translateEntityPointerLevel(N, Ctx, AddEntity);
100}
101
102llvm::Error
103PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS,
104 Expected<EntityPointerLevelSet> &&RHS) {
105 if (!LHS && !RHS)
106 return llvm::joinErrors(LHS.takeError(), RHS.takeError());
107 if (!LHS)
108 return LHS.takeError();
109 if (!RHS)
110 return RHS.takeError();
111 for (auto L : *LHS)
112 Results[L].insert(RHS->begin(), RHS->end());
113 return llvm::Error::success();
114}
115
116/// Match and extract pointer flow.
117/// The extraction function 'XF' can be described by the following rules:
118///
119/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))"
120/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ...
121/// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return
122/// entity of the enclosing
123/// function
124/// XF(ctor(a, ...) : x1(y1), ... {...})
125/// := XF(Param_1 = a), ...,
126/// XF(x1 = y1), ...,
127/// ctor's body will be visited separately.
128/// XF(T var = e) := XF(var = e)
129/// XF(T var = init-list) := see \ref
130/// PointerFlowMatcher::matchInitializerList
131llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode,
132 const NamedDecl *RootDecl) {
133 if (const Stmt *S = DynNode.get<Stmt>())
134 return matchesStmt(S, RootDecl);
135 if (const Decl *D = DynNode.get<Decl>())
136 return matchesDecl(D, RootDecl);
137 return llvm::Error::success();
138}
139
140llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S,
141 const NamedDecl *RootDecl) {
142 // Match 'p = q' whenever it has pointer or array type:
143 if (const auto *BO = dyn_cast<BinaryOperator>(S);
144 BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) {
145 return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS()));
146 }
147
148 // Match arg-to-param passing (in CallExpr) for any pointer type argument:
149 if (const auto *CE = dyn_cast<CallExpr>(S)) {
150 const FunctionDecl *FD = CE->getDirectCallee();
151
152 if (!FD)
153 return llvm::Error::success();
154
155 unsigned ArgIdx = 0;
156
158 if (auto *MD = dyn_cast<CXXMethodDecl>(FD);
159 MD && !MD->isExplicitObjectMemberFunction())
160 ArgIdx = 1;
161 return matchesArgsWithParams(ArgIdx, FD, CE);
162 }
163 // Match arg-to-param passing (in CXXConstructExpr) for any pointer type
164 // argument:
165 if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) {
166 return matchesArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), CCE);
167 }
168 if (const auto *RS = dyn_cast<ReturnStmt>(S)) {
169 const Expr *RetExpr = RS->getRetValue();
170 if (!RetExpr || !hasPtrOrArrType(RetExpr))
171 return llvm::Error::success();
172 return addEdges(toEPL(RootDecl, true), toEPL(RetExpr));
173 }
174 return llvm::Error::success();
175}
176
177llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D,
178 const NamedDecl *RootDecl) {
179 const Expr *InitExpr = nullptr;
180
181 if (const auto *VD = dyn_cast<ValueDecl>(D)) {
182 if (const auto *Var = dyn_cast<VarDecl>(VD))
183 InitExpr = Var->getInit();
184 if (const auto *Fd = dyn_cast<FieldDecl>(VD))
185 InitExpr = Fd->getInClassInitializer();
186
187 // Match initializer-list:
188 if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr))
189 return matchesInitializerList(VD, InitLst);
190
191 // Match initializers to variables/fields of a pointer type:
192 if (InitExpr && hasPtrOrArrType(VD))
193 return addEdges(toEPL(VD), toEPL(InitExpr));
194 }
195
196 // Match C++ constructor member-initializers:
197 if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) {
198 for (auto *E : CtorD->inits()) {
199 if (E->isDelegatingInitializer())
200 return matches(DynTypedNode::create(*E->getInit()), RootDecl);
201 if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) {
202 if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit())))
203 return Err;
204 }
205 }
206 }
207 return llvm::Error::success();
208}
209
210// Helper function for matchInitializerList that handles record:
211llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher,
212 const RecordDecl *RecordTy,
213 const InitListExpr *ILE) {
214 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy))
215 if (CXXRD->getNumBases() != 0) {
216 // FIXME: support this:
217 return makeErrAtNode(
218 Matcher.Ctx, ILE,
219 "attempt to create pointer assignment edges between "
220 "CXXRecordDecls with base classes and initializer-lists");
221 }
222 // Handle union:
223 if (RecordTy->isUnion()) {
225
226 if (!InitField)
227 return llvm::Error::success();
228 assert(!ILE->inits().empty());
229 return Matcher.matchesInitializerList(InitField, ILE->getInit(0));
230 }
231 // Handle struct/class:
232 ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm();
233
234 auto FieldIter = RecordTy->field_begin();
235
236 assert(RecordTy->getNumFields() >= ILE->getNumInits());
237 for (auto *Init : ILE->inits())
238 if (auto Err = Matcher.matchesInitializerList(*(FieldIter++), Init))
239 return Err;
240 return llvm::Error::success();
241}
242
243// Helper function for matchInitializerList that handles array:
244llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher,
245 const ValueDecl *Array,
246 const InitListExpr *ILE,
247 unsigned ArrayIndirectLevel = 0) {
248 for (auto *E : ILE->inits())
249 if (auto Err =
250 Matcher.matchesInitializerList(Array, E, ArrayIndirectLevel + 1))
251 return Err;
252 return llvm::Error::success();
253}
254
255/// Match initializer lists of the form 'Var = {a, b, c, ...}':
256///
257/// If 'Var' is a struct/union:
258/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a)
259/// XF(Var.field_2 = b)
260/// ...
261/// If 'Var' is an array:
262/// XF(Var = {a, b, c, ...}) := XF(*Var = a)
263/// XF(*Var = b)
264/// ...
265///
266/// The process is recursive: 'a', 'b', 'c', ... may themselves be
267/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep
268/// track of the pointer level the left-hand side.
269llvm::Error
270PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base,
271 const Expr *InitExpr,
272 unsigned ArrayElementIndirectLevel) {
273 const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr);
274
275 if (!ILE) {
276 if (!hasPtrOrArrType(InitExpr))
277 return llvm::Error::success();
278
279 auto BaseEPL = toEPL(Base);
280
281 if (!BaseEPL)
282 return BaseEPL.takeError();
283
284 // Apply ArrayElementIndirectLevel to BaseEPL
285 auto R = llvm::map_range(*BaseEPL, [&ArrayElementIndirectLevel](
286 const EntityPointerLevel &EPL) {
287 EntityPointerLevel Result = EPL;
288 for ([[maybe_unused]] auto Ignored : llvm::seq(ArrayElementIndirectLevel))
290 return Result;
291 });
292 return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, toEPL(InitExpr));
293 }
294 // Note that `Base`'s type is NOT the real LHS type when
295 // ArrayElementIndirectLevel > 0:
296 QualType Type = InitExpr->getType();
297
298 if (auto *RD = Type->getAsRecordDecl())
299 return matchInitializerListForRecordDecl(*this, RD, ILE);
300 if (Type->isArrayType())
301 return matchInitializerListForArray(*this, Base, ILE,
302 ArrayElementIndirectLevel);
303 // Must be the case of using a initializer-list for a scalar:
304 return matchesInitializerList(Base, ILE->getInit(0));
305}
306
307class PointerFlowTUSummaryExtractor : public TUSummaryExtractor {
308public:
309 PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder)
310 : TUSummaryExtractor(Builder) {}
311
312 EntityId addEntity(const EntityName &EN) {
313 return SummaryBuilder.addEntity(EN);
314 }
315
316 Expected<std::unique_ptr<PointerFlowEntitySummary>>
317 extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx) {
318 PointerFlowMatcher Matcher(
319 Ctx, [this](const EntityName &EN) { return addEntity(EN); });
320 auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) {
321 auto Err = Matcher.matches(Node, Contributor);
322
323 if (Err)
324 llvm::report_fatal_error(std::move(Err));
325 };
326
327 findMatchesIn(Contributor, MatchAction);
328 return std::make_unique<PointerFlowEntitySummary>(
329 buildPointerFlowEntitySummary(std::move(Matcher.Results)));
330 }
331
332 void HandleTranslationUnit(ASTContext &Ctx) override {
333 std::vector<const NamedDecl *> Contributors;
334
335 findContributors(Ctx, Contributors);
336 for (auto *CD : Contributors) {
337 auto EntitySummary = extractEntitySummary(CD, Ctx);
338
339 if (!EntitySummary)
340 llvm::reportFatalInternalError(EntitySummary.takeError());
341 assert(*EntitySummary);
342 if ((*EntitySummary)->empty())
343 continue;
344
345 auto ContributorName = getEntityName(CD);
346
347 if (!ContributorName)
348 llvm::reportFatalInternalError(makeEntityNameErr(Ctx, CD));
349
350 auto [_, InsertionSucceeded] = SummaryBuilder.addSummary(
351 addEntity(*ContributorName), std::move(*EntitySummary));
352
353 assert(InsertionSucceeded && "duplicated contributor extraction");
354 }
355 }
356};
357} // namespace
358
359// NOLINTNEXTLINE(misc-use-internal-linkage)
361
362static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor>
364 "Extract pointer flow information");
Defines the clang::ASTContext interface.
static TUSummaryExtractorRegistry::Add< CallGraphExtractor > RegisterExtractor(CallGraphSummary::Name, "Extracts static call-graph information")
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
Result
Implement __builtin_bit_cast and related operations.
llvm::json::Array Array
volatile int PointerFlowTUSummaryExtractorAnchorSource
C Language Family Type Representation.
const T * get() const
Retrieve the stored node as type T.
QualType getType() const
Definition Expr.h:144
FieldDecl * getInitializedFieldInUnion()
If this initializes a union, specifies which field in the union to initialize.
Definition Expr.h:5426
unsigned getNumInits() const
Definition Expr.h:5332
bool isSemanticForm() const
Definition Expr.h:5466
InitListExpr * getSemanticForm() const
Definition Expr.h:5467
const Expr * getInit(unsigned Init) const
Definition Expr.h:5354
ArrayRef< Expr * > inits() const
Definition Expr.h:5352
This represents a decl that may have a name.
Definition Decl.h:274
unsigned getNumFields() const
Returns the number of fields (non-static data members) in this record.
Definition Decl.h:4559
field_iterator field_begin() const
Definition Decl.cpp:5270
bool isUnion() const
Definition Decl.h:3946
static constexpr llvm::StringLiteral Name
Definition PointerFlow.h:35
DynTypedNode DynTypedNode
bool InitField(InterpState &S, CodePtr OpPC, uint32_t I)
1) Pops the value from the stack 2) Peeks a pointer from the stack 3) Pushes the value to field I of ...
Definition Interp.h:1856
PRESERVE_NONE bool Ret(InterpState &S, CodePtr &PC)
Definition Interp.h:258
llvm::Error makeErrAtNode(clang::ASTContext &Ctx, const NodeTy *N, llvm::StringRef Fmt, const Ts &...Args)
std::map< EntityPointerLevel, EntityPointerLevelSet > EdgeSet
Maps each source node to its destination nodes:
Definition PointerFlow.h:23
EntityPointerLevel incrementPointerLevel(const EntityPointerLevel &E)
An EntityPointerLevel is associated with a level of the declared pointer/array type of an entity.
void findContributors(ASTContext &Ctx, std::vector< const NamedDecl * > &Contributors)
Find all contributors in an AST.
llvm::Error makeEntityNameErr(clang::ASTContext &Ctx, const clang::NamedDecl *D)
void findMatchesIn(const NamedDecl *Contributor, llvm::function_ref< void(const DynTypedNode &)> MatchActionRef)
Perform "MatchAction" on each Stmt and Decl belonging to the Contributor.
std::optional< EntityName > getEntityName(const Decl *D)
Maps a declaration to an EntityName.
PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges)
bool hasPtrOrArrType(const DeclOrExpr *E)
bool matches(const til::SExpr *E1, const til::SExpr *E2)
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ Type
The name was classified as a type.
Definition Sema.h:564
int const char * function
Definition c++config.h:31