clang 23.0.0git
PointerFlowExtractor.cpp
Go to the documentation of this file.
1//===- PointerFlowExtractor.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "clang/AST/Decl.h"
13#include "clang/AST/DeclCXX.h"
14#include "clang/AST/Expr.h"
15#include "clang/AST/ExprCXX.h"
16#include "clang/AST/Stmt.h"
17#include "clang/AST/TypeBase.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/STLFunctionalExtras.h"
27#include "llvm/ADT/Sequence.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/Error.h"
30#include <memory>
31#include <optional>
32
33namespace clang::ssaf {
35} // namespace clang::ssaf
36
37namespace {
38using namespace clang;
39using namespace ssaf;
40
41class PointerFlowMatcher {
42public:
43 EdgeSet Results;
44 ASTContext &Ctx;
45 TUSummaryExtractor &Extractor;
46
47 PointerFlowMatcher(ASTContext &Ctx, TUSummaryExtractor &Extractor)
48 : Ctx(Ctx), Extractor(Extractor) {}
49
50 llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl);
51
52 llvm::Error matchesInitializerList(const ValueDecl *Base,
53 const Expr *InitExpr,
54 unsigned ArrayElementIndirectLevel = 0);
55
56 llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl);
57
58 llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl);
59
60private:
61 std::function<EntityId(const EntityName &)> AddEntity;
62
63 Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N,
64 bool IsRet = false) const;
65
66 Expected<EntityPointerLevelSet> toEPL(const Expr *N) const;
67
68 llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS,
69 Expected<EntityPointerLevelSet> &&RHS);
70
71 template <typename ParmsProvider, typename ArgsProvider>
72 llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP,
73 ArgsProvider *AP) {
74 unsigned ArgIdx = ArgIdxStart;
75
76 for (unsigned ParmIdx = 0;
77 ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs();
78 ++ArgIdx, ++ParmIdx) {
79 if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx);
80 PD && hasPtrOrArrType(PD)) {
81 if (auto Err = addEdges(toEPL(PD), toEPL(AP->getArg(ArgIdx))))
82 return Err;
83 }
84 }
85 return llvm::Error::success();
86 }
87};
88
89Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N,
90 bool IsRet) const {
91 auto Ret = createEntityPointerLevel(N, Extractor, IsRet);
92
93 if (Ret)
94 return EntityPointerLevelSet{*Ret};
95 return Ret.takeError();
96}
97
98Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const {
99 return translateEntityPointerLevel(N, Ctx, Extractor);
100}
101
102llvm::Error
103PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS,
104 Expected<EntityPointerLevelSet> &&RHS) {
105 if (!LHS && !RHS)
106 return llvm::joinErrors(LHS.takeError(), RHS.takeError());
107 if (!LHS)
108 return LHS.takeError();
109 if (!RHS)
110 return RHS.takeError();
111 if (RHS->empty())
112 return llvm::Error::success();
113 for (auto L : *LHS)
114 Results[L].insert(RHS->begin(), RHS->end());
115 return llvm::Error::success();
116}
117
118/// Match and extract pointer flow.
119/// The extraction function 'XF' can be described by the following rules:
120///
121/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))"
122/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ...
123/// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return
124/// entity of the enclosing
125/// function
126/// XF(ctor(a, ...) : x1(y1), ... {...})
127/// := XF(Param_1 = a), ...,
128/// XF(x1 = y1), ...,
129/// ctor's body will be visited separately.
130/// XF(T var = e) := XF(var = e)
131/// XF(T var = init-list) := see \ref
132/// PointerFlowMatcher::matchInitializerList
133llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode,
134 const NamedDecl *RootDecl) {
135 if (const Stmt *S = DynNode.get<Stmt>())
136 return matchesStmt(S, RootDecl);
137 if (const Decl *D = DynNode.get<Decl>())
138 return matchesDecl(D, RootDecl);
139 return llvm::Error::success();
140}
141
142llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S,
143 const NamedDecl *RootDecl) {
144 // Match 'p = q' whenever it has pointer or array type:
145 if (const auto *BO = dyn_cast<BinaryOperator>(S);
146 BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) {
147 return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS()));
148 }
149
150 // Match arg-to-param passing (in CallExpr) for any pointer type argument:
151 if (const auto *CE = dyn_cast<CallExpr>(S)) {
152 const FunctionDecl *FD = CE->getDirectCallee();
153
154 if (!FD)
155 return llvm::Error::success();
156
157 unsigned ArgIdx = 0;
158
160 if (auto *MD = dyn_cast<CXXMethodDecl>(FD);
161 MD && !MD->isExplicitObjectMemberFunction())
162 ArgIdx = 1;
163 return matchesArgsWithParams(ArgIdx, FD, CE);
164 }
165 // Match arg-to-param passing (in CXXConstructExpr) for any pointer type
166 // argument:
167 if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) {
168 return matchesArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), CCE);
169 }
170 if (const auto *RS = dyn_cast<ReturnStmt>(S)) {
171 const Expr *RetExpr = RS->getRetValue();
172 if (!RetExpr || !hasPtrOrArrType(RetExpr))
173 return llvm::Error::success();
174 return addEdges(toEPL(RootDecl, true), toEPL(RetExpr));
175 }
176 return llvm::Error::success();
177}
178
179llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D,
180 const NamedDecl *RootDecl) {
181 const Expr *InitExpr = nullptr;
182
183 if (const auto *VD = dyn_cast<ValueDecl>(D)) {
184 if (const auto *Var = dyn_cast<VarDecl>(VD))
185 InitExpr = Var->getInit();
186 if (const auto *Fd = dyn_cast<FieldDecl>(VD))
187 InitExpr = Fd->getInClassInitializer();
188
189 // Match initializer-list:
190 if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr))
191 return matchesInitializerList(VD, InitLst);
192
193 // Match initializers to variables/fields of a pointer type:
194 if (InitExpr && hasPtrOrArrType(VD))
195 return addEdges(toEPL(VD), toEPL(InitExpr));
196 }
197
198 // Match C++ constructor member-initializers:
199 if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) {
200 for (auto *E : CtorD->inits()) {
201 if (E->isDelegatingInitializer())
202 return matches(DynTypedNode::create(*E->getInit()), RootDecl);
203 if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) {
204 if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit())))
205 return Err;
206 }
207 }
208 }
209 return llvm::Error::success();
210}
211
212// Helper function for matchInitializerList that handles record:
213llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher,
214 const RecordDecl *RecordTy,
215 const InitListExpr *ILE) {
216 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy))
217 if (CXXRD->getNumBases() != 0) {
218 // FIXME: support this:
219 return makeErrAtNode(
220 Matcher.Ctx, ILE,
221 "attempt to create pointer assignment edges between "
222 "CXXRecordDecls with base classes and initializer-lists");
223 }
224 // Handle union:
225 if (RecordTy->isUnion()) {
227
228 if (!InitField || ILE->inits().empty())
229 return llvm::Error::success();
230 return Matcher.matchesInitializerList(InitField, ILE->getInit(0));
231 }
232 // Handle struct/class:
233 ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm();
234
235 auto FieldIter = RecordTy->field_begin();
236
237 assert(RecordTy->getNumFields() >= ILE->getNumInits());
238 for (auto *Init : ILE->inits())
239 if (auto Err = Matcher.matchesInitializerList(*(FieldIter++), Init))
240 return Err;
241 return llvm::Error::success();
242}
243
244// Helper function for matchInitializerList that handles array:
245llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher,
246 const ValueDecl *Array,
247 const InitListExpr *ILE,
248 unsigned ArrayIndirectLevel = 0) {
249 for (auto *E : ILE->inits())
250 if (auto Err =
251 Matcher.matchesInitializerList(Array, E, ArrayIndirectLevel + 1))
252 return Err;
253 return llvm::Error::success();
254}
255
256/// Match initializer lists of the form 'Var = {a, b, c, ...}':
257///
258/// If 'Var' is a struct/union:
259/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a)
260/// XF(Var.field_2 = b)
261/// ...
262/// If 'Var' is an array:
263/// XF(Var = {a, b, c, ...}) := XF(*Var = a)
264/// XF(*Var = b)
265/// ...
266///
267/// The process is recursive: 'a', 'b', 'c', ... may themselves be
268/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep
269/// track of the pointer level the left-hand side.
270llvm::Error
271PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base,
272 const Expr *InitExpr,
273 unsigned ArrayElementIndirectLevel) {
274 const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr);
275
276 if (!ILE) {
277 if (!hasPtrOrArrType(InitExpr))
278 return llvm::Error::success();
279
280 auto BaseEPL = toEPL(Base);
281
282 if (!BaseEPL)
283 return BaseEPL.takeError();
284
285 // Apply ArrayElementIndirectLevel to BaseEPL
286 auto R = llvm::map_range(*BaseEPL, [&ArrayElementIndirectLevel](
287 const EntityPointerLevel &EPL) {
288 EntityPointerLevel Result = EPL;
289 for ([[maybe_unused]] auto Ignored : llvm::seq(ArrayElementIndirectLevel))
291 return Result;
292 });
293 return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, toEPL(InitExpr));
294 }
295 // Note that `Base`'s type is NOT the real LHS type when
296 // ArrayElementIndirectLevel > 0:
297 QualType Type = InitExpr->getType();
298
299 if (auto *RD = Type->getAsRecordDecl())
300 return matchInitializerListForRecordDecl(*this, RD, ILE);
301 if (Type->isArrayType())
302 return matchInitializerListForArray(*this, Base, ILE,
303 ArrayElementIndirectLevel);
304
305 // Must be the case of using a initializer-list for a scalar.
306 // The initializer-list can be either singleton or empty:
307 if (ILE->getNumInits() == 0)
308 return llvm::Error::success();
309 return matchesInitializerList(Base, ILE->getInit(0));
310}
311
312class PointerFlowTUSummaryExtractor : public TUSummaryExtractor {
313public:
314 PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder)
315 : TUSummaryExtractor(Builder) {}
316
317 /// \return a non-null unique pointer to a PointerFlowEntitySummary
318 std::unique_ptr<PointerFlowEntitySummary>
319 extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx,
320 TUSummaryExtractor &Extractor) {
321 PointerFlowMatcher Matcher(Ctx, Extractor);
322 auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) {
323 auto Err = Matcher.matches(Node, Contributor);
324
325 if (Err)
326 logWarningFromError(std::move(Err));
327 };
328
329 findMatchesIn(Contributor, MatchAction);
330 return std::make_unique<PointerFlowEntitySummary>(
331 buildPointerFlowEntitySummary(std::move(Matcher.Results)));
332 }
333
334 void HandleTranslationUnit(ASTContext &Ctx) override {
335 std::vector<const NamedDecl *> Contributors;
336
337 findContributors(Ctx, Contributors);
338 for (auto *CD : Contributors) {
339 auto EntitySummary = extractEntitySummary(CD, Ctx, *this);
340
341 assert(EntitySummary);
342 if (EntitySummary->empty())
343 continue;
344
345 std::optional<EntityId> ContributorId = addEntity(CD);
346 if (!ContributorId) {
348 continue;
349 }
350
351 [[maybe_unused]] auto [_, InsertionSucceeded] =
352 SummaryBuilder.addSummary(*ContributorId, std::move(EntitySummary));
353
354 assert(InsertionSucceeded && "duplicated contributor extraction");
355 }
356 }
357};
358} // namespace
359
360namespace clang::ssaf {
361// NOLINTNEXTLINE(misc-use-internal-linkage)
363} // namespace clang::ssaf
364
365static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor>
367 "Extract pointer flow information");
Defines the clang::ASTContext interface.
static TUSummaryExtractorRegistry::Add< CallGraphExtractor > RegisterExtractor(CallGraphSummary::Name, "Extracts static call-graph information")
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
Result
Implement __builtin_bit_cast and related operations.
llvm::json::Array Array
C Language Family Type Representation.
const T * get() const
Retrieve the stored node as type T.
QualType getType() const
Definition Expr.h:144
FieldDecl * getInitializedFieldInUnion()
If this initializes a union, specifies which field in the union to initialize.
Definition Expr.h:5429
unsigned getNumInits() const
Definition Expr.h:5335
bool isSemanticForm() const
Definition Expr.h:5465
InitListExpr * getSemanticForm() const
Definition Expr.h:5466
const Expr * getInit(unsigned Init) const
Definition Expr.h:5357
ArrayRef< Expr * > inits() const
Definition Expr.h:5355
This represents a decl that may have a name.
Definition Decl.h:274
unsigned getNumFields() const
Returns the number of fields (non-static data members) in this record.
Definition Decl.h:4563
field_iterator field_begin() const
Definition Decl.cpp:5269
bool isUnion() const
Definition Decl.h:3950
static constexpr llvm::StringLiteral Name
Definition PointerFlow.h:36
DynTypedNode DynTypedNode
bool InitField(InterpState &S, CodePtr OpPC, uint32_t I)
1) Pops the value from the stack 2) Peeks a pointer from the stack 3) Pushes the value to field I of ...
Definition Interp.h:1927
PRESERVE_NONE bool Ret(InterpState &S, CodePtr &PC)
Definition Interp.h:260
void logWarningFromError(llvm::Error Err)
Log a warning from an llvm::Error.
llvm::Error makeErrAtNode(clang::ASTContext &Ctx, const NodeTy *N, llvm::StringRef Fmt, const Ts &...Args)
std::map< EntityPointerLevel, EntityPointerLevelSet > EdgeSet
Maps each LHS pointer (source / assignee) to the set of RHS pointers (destinations / assigned values)...
Definition PointerFlow.h:24
EntityPointerLevel incrementPointerLevel(const EntityPointerLevel &E)
An EntityPointerLevel is associated with a level of the declared pointer/array type of an entity.
void findContributors(ASTContext &Ctx, std::vector< const NamedDecl * > &Contributors)
Find all contributors in an AST.
llvm::Error makeEntityNameErr(clang::ASTContext &Ctx, const clang::NamedDecl *D)
void findMatchesIn(const NamedDecl *Contributor, llvm::function_ref< void(const DynTypedNode &)> MatchActionRef)
Perform "MatchAction" on each Stmt and Decl belonging to the Contributor.
PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges)
volatile int PointerFlowExtractorAnchorSource
bool hasPtrOrArrType(const DeclOrExpr *E)
bool matches(const til::SExpr *E1, const til::SExpr *E2)
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ Type
The name was classified as a type.
Definition Sema.h:564
int const char * function
Definition c++config.h:31