clang 23.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress();
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 Config.setDefaultTargetAS(
1043 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1044 Config.setRuntimeCC(CGM.getRuntimeCC());
1045
1046 OMPBuilder.setConfig(Config);
1047 OMPBuilder.initialize();
1048 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1049 CGM.getLangOpts().OpenMPIsTargetDevice
1050 ? CGM.getLangOpts().OMPHostIRFile
1051 : StringRef{});
1052
1053 // The user forces the compiler to behave as if omp requires
1054 // unified_shared_memory was given.
1055 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1058 }
1059}
1060
1062 InternalVars.clear();
1063 // Clean non-target variable declarations possibly used only in debug info.
1064 for (const auto &Data : EmittedNonTargetVariables) {
1065 if (!Data.getValue().pointsToAliveValue())
1066 continue;
1067 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1068 if (!GV)
1069 continue;
1070 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1071 continue;
1072 GV->eraseFromParent();
1073 }
1074}
1075
1077 return OMPBuilder.createPlatformSpecificName(Parts);
1078}
1079
1080static llvm::Function *
1082 const Expr *CombinerInitializer, const VarDecl *In,
1083 const VarDecl *Out, bool IsCombiner) {
1084 // void .omp_combiner.(Ty *in, Ty *out);
1085 ASTContext &C = CGM.getContext();
1086 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1087 FunctionArgList Args;
1088 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1089 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1090 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1091 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1092 Args.push_back(&OmpOutParm);
1093 Args.push_back(&OmpInParm);
1094 const CGFunctionInfo &FnInfo =
1095 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1096 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1097 std::string Name = CGM.getOpenMPRuntime().getName(
1098 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1099 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1100 Name, &CGM.getModule());
1101 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1102 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1103 Fn->removeFnAttr(llvm::Attribute::NoInline);
1104 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1105 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1106 }
1107 CodeGenFunction CGF(CGM);
1108 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1109 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1110 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1111 Out->getLocation());
1113 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1114 Scope.addPrivate(
1115 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1116 .getAddress());
1117 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1118 Scope.addPrivate(
1119 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1120 .getAddress());
1121 (void)Scope.Privatize();
1122 if (!IsCombiner && Out->hasInit() &&
1123 !CGF.isTrivialInitializer(Out->getInit())) {
1124 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1125 Out->getType().getQualifiers(),
1126 /*IsInitializer=*/true);
1127 }
1128 if (CombinerInitializer)
1129 CGF.EmitIgnoredExpr(CombinerInitializer);
1130 Scope.ForceCleanup();
1131 CGF.FinishFunction();
1132 return Fn;
1133}
1134
1137 if (UDRMap.count(D) > 0)
1138 return;
1139 llvm::Function *Combiner = emitCombinerOrInitializer(
1140 CGM, D->getType(), D->getCombiner(),
1143 /*IsCombiner=*/true);
1144 llvm::Function *Initializer = nullptr;
1145 if (const Expr *Init = D->getInitializer()) {
1147 CGM, D->getType(),
1149 : nullptr,
1152 /*IsCombiner=*/false);
1153 }
1154 UDRMap.try_emplace(D, Combiner, Initializer);
1155 if (CGF)
1156 FunctionUDRMap[CGF->CurFn].push_back(D);
1157}
1158
1159std::pair<llvm::Function *, llvm::Function *>
1161 auto I = UDRMap.find(D);
1162 if (I != UDRMap.end())
1163 return I->second;
1164 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1165 return UDRMap.lookup(D);
1166}
1167
1168namespace {
1169// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1170// Builder if one is present.
1171struct PushAndPopStackRAII {
1172 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1173 bool HasCancel, llvm::omp::Directive Kind)
1174 : OMPBuilder(OMPBuilder) {
1175 if (!OMPBuilder)
1176 return;
1177
1178 // The following callback is the crucial part of clangs cleanup process.
1179 //
1180 // NOTE:
1181 // Once the OpenMPIRBuilder is used to create parallel regions (and
1182 // similar), the cancellation destination (Dest below) is determined via
1183 // IP. That means if we have variables to finalize we split the block at IP,
1184 // use the new block (=BB) as destination to build a JumpDest (via
1185 // getJumpDestInCurrentScope(BB)) which then is fed to
1186 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1187 // to push & pop an FinalizationInfo object.
1188 // The FiniCB will still be needed but at the point where the
1189 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1190 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1191 assert(IP.getBlock()->end() == IP.getPoint() &&
1192 "Clang CG should cause non-terminated block!");
1193 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1194 CGF.Builder.restoreIP(IP);
1196 CGF.getOMPCancelDestination(OMPD_parallel);
1197 CGF.EmitBranchThroughCleanup(Dest);
1198 return llvm::Error::success();
1199 };
1200
1201 // TODO: Remove this once we emit parallel regions through the
1202 // OpenMPIRBuilder as it can do this setup internally.
1203 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1204 OMPBuilder->pushFinalizationCB(std::move(FI));
1205 }
1206 ~PushAndPopStackRAII() {
1207 if (OMPBuilder)
1208 OMPBuilder->popFinalizationCB();
1209 }
1210 llvm::OpenMPIRBuilder *OMPBuilder;
1211};
1212} // namespace
1213
1215 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1216 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1217 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1218 assert(ThreadIDVar->getType()->isPointerType() &&
1219 "thread id variable must be of type kmp_int32 *");
1220 CodeGenFunction CGF(CGM, true);
1221 bool HasCancel = false;
1222 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1225 HasCancel = OPD->hasCancel();
1226 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1227 HasCancel = OPSD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240
1241 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1242 // parallel region to make cancellation barriers work properly.
1243 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1244 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1245 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1246 HasCancel, OutlinedHelperName);
1247 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1248 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1249}
1250
1251std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1252 std::string Suffix = getName({"omp_outlined"});
1253 return (Name + Suffix).str();
1254}
1255
1257 return getOutlinedHelperName(CGF.CurFn->getName());
1258}
1259
1260std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1261 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1262 return (Name + Suffix).str();
1263}
1264
1267 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1268 const RegionCodeGenTy &CodeGen) {
1269 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1271 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1272 CodeGen);
1273}
1274
1277 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1278 const RegionCodeGenTy &CodeGen) {
1279 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1282 CodeGen);
1283}
1284
1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289 bool Tied, unsigned &NumberOfParts) {
1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291 PrePostActionTy &) {
1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294 llvm::Value *TaskArgs[] = {
1295 UpLoc, ThreadID,
1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297 TaskTVar->getType()->castAs<PointerType>())
1298 .getPointer(CGF)};
1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 CGM.getModule(), OMPRTL___kmpc_omp_task),
1301 TaskArgs);
1302 };
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304 UntiedCodeGen);
1305 CodeGen.setAction(Action);
1306 assert(!ThreadIDVar->getType()->isPointerType() &&
1307 "thread id variable must be of type kmp_int32 for tasks");
1308 const OpenMPDirectiveKind Region =
1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310 : OMPD_task;
1311 const CapturedStmt *CS = D.getCapturedStmt(Region);
1312 bool HasCancel = false;
1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1321
1322 CodeGenFunction CGF(CGM, true);
1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324 InnermostKind, HasCancel, Action);
1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327 if (!Tied)
1328 NumberOfParts = Action.getNumberOfParts();
1329 return Res;
1330}
1331
1333 bool AtCurrentPoint) {
1334 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1335 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1336
1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338 if (AtCurrentPoint) {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1340 CGF.Builder.GetInsertBlock());
1341 } else {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1343 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1344 }
1345}
1346
1348 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1349 if (Elem.ServiceInsertPt) {
1350 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1351 Elem.ServiceInsertPt = nullptr;
1352 Ptr->eraseFromParent();
1353 }
1354}
1355
1357 SourceLocation Loc,
1358 SmallString<128> &Buffer) {
1359 llvm::raw_svector_ostream OS(Buffer);
1360 // Build debug location
1362 OS << ";";
1363 if (auto *DbgInfo = CGF.getDebugInfo())
1364 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1365 else
1366 OS << PLoc.getFilename();
1367 OS << ";";
1368 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1369 OS << FD->getQualifiedNameAsString();
1370 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1371 return OS.str();
1372}
1373
1375 SourceLocation Loc,
1376 unsigned Flags, bool EmitLoc) {
1377 uint32_t SrcLocStrSize;
1378 llvm::Constant *SrcLocStr;
1379 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1380 llvm::codegenoptions::NoDebugInfo) ||
1381 Loc.isInvalid()) {
1382 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1383 } else {
1384 std::string FunctionName;
1385 std::string FileName;
1386 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1387 FunctionName = FD->getQualifiedNameAsString();
1389 if (auto *DbgInfo = CGF.getDebugInfo())
1390 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1391 else
1392 FileName = PLoc.getFilename();
1393 unsigned Line = PLoc.getLine();
1394 unsigned Column = PLoc.getColumn();
1395 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1396 Column, SrcLocStrSize);
1397 }
1398 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1399 return OMPBuilder.getOrCreateIdent(
1400 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1401}
1402
1404 SourceLocation Loc) {
1405 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1406 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1407 // the clang invariants used below might be broken.
1408 if (CGM.getLangOpts().OpenMPIRBuilder) {
1409 SmallString<128> Buffer;
1410 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1411 uint32_t SrcLocStrSize;
1412 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1413 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1414 return OMPBuilder.getOrCreateThreadID(
1415 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1416 }
1417
1418 llvm::Value *ThreadID = nullptr;
1419 // Check whether we've already cached a load of the thread id in this
1420 // function.
1421 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1422 if (I != OpenMPLocThreadIDMap.end()) {
1423 ThreadID = I->second.ThreadID;
1424 if (ThreadID != nullptr)
1425 return ThreadID;
1426 }
1427 // If exceptions are enabled, do not use parameter to avoid possible crash.
1428 if (auto *OMPRegionInfo =
1429 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1430 if (OMPRegionInfo->getThreadIDVariable()) {
1431 // Check if this an outlined function with thread id passed as argument.
1432 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1433 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1434 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1435 !CGF.getLangOpts().CXXExceptions ||
1436 CGF.Builder.GetInsertBlock() == TopBlock ||
1437 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1438 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1439 TopBlock ||
1440 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1441 CGF.Builder.GetInsertBlock()) {
1442 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1443 // If value loaded in entry block, cache it and use it everywhere in
1444 // function.
1445 if (CGF.Builder.GetInsertBlock() == TopBlock)
1446 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1447 return ThreadID;
1448 }
1449 }
1450 }
1451
1452 // This is not an outlined function region - need to call __kmpc_int32
1453 // kmpc_global_thread_num(ident_t *loc).
1454 // Generate thread id value and cache this value for use across the
1455 // function.
1456 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1457 if (!Elem.ServiceInsertPt)
1459 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1460 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1462 llvm::CallInst *Call = CGF.Builder.CreateCall(
1463 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1464 OMPRTL___kmpc_global_thread_num),
1465 emitUpdateLocation(CGF, Loc));
1466 Call->setCallingConv(CGF.getRuntimeCC());
1467 Elem.ThreadID = Call;
1468 return Call;
1469}
1470
1472 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1473 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1475 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1476 }
1477 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1478 for (const auto *D : I->second)
1479 UDRMap.erase(D);
1480 FunctionUDRMap.erase(I);
1481 }
1482 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1483 for (const auto *D : I->second)
1484 UDMMap.erase(D);
1485 FunctionUDMMap.erase(I);
1486 }
1489}
1490
1492 return OMPBuilder.IdentPtr;
1493}
1494
1495static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1498 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1499 if (!DevTy)
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1501
1502 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1503 case OMPDeclareTargetDeclAttr::DT_Host:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_NoHost:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_Any:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1511 break;
1512 default:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1514 break;
1515 }
1516}
1517
1518static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1521 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1522 if (!MapType)
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1524 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1533 break;
1534 default:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1536 break;
1537 }
1538}
1539
1540static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1541 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1542 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1543
1544 auto FileInfoCallBack = [&]() {
1546 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1547
1548 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1549 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1550
1551 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1552 };
1553
1554 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1555 *CGM.getFileSystem(), ParentName);
1556}
1557
1559 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1560
1561 auto LinkageForVariable = [&VD, this]() {
1562 return CGM.getLLVMLinkageVarDefinition(VD);
1563 };
1564
1565 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1566
1567 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1568 CGM.getContext().getPointerType(VD->getType()));
1569 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1571 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1572 VD->isExternallyVisible(),
1574 VD->getCanonicalDecl()->getBeginLoc()),
1575 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1576 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1577 LinkageForVariable);
1578
1579 if (!addr)
1580 return ConstantAddress::invalid();
1581 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1582}
1583
1584llvm::Constant *
1586 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1587 !CGM.getContext().getTargetInfo().isTLSSupported());
1588 // Lookup the entry, lazily creating it if necessary.
1589 std::string Suffix = getName({"cache", ""});
1590 return OMPBuilder.getOrCreateInternalVariable(
1591 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1592}
1593
1595 const VarDecl *VD,
1596 Address VDAddr,
1597 SourceLocation Loc) {
1598 if (CGM.getLangOpts().OpenMPUseTLS &&
1599 CGM.getContext().getTargetInfo().isTLSSupported())
1600 return VDAddr;
1601
1602 llvm::Type *VarTy = VDAddr.getElementType();
1603 llvm::Value *Args[] = {
1604 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1605 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1606 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1608 return Address(
1609 CGF.EmitRuntimeCall(
1610 OMPBuilder.getOrCreateRuntimeFunction(
1611 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1612 Args),
1613 CGF.Int8Ty, VDAddr.getAlignment());
1614}
1615
1617 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1618 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1619 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1620 // library.
1621 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1622 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1623 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1624 OMPLoc);
1625 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1626 // to register constructor/destructor for variable.
1627 llvm::Value *Args[] = {
1628 OMPLoc,
1629 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1630 Ctor, CopyCtor, Dtor};
1631 CGF.EmitRuntimeCall(
1632 OMPBuilder.getOrCreateRuntimeFunction(
1633 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1634 Args);
1635}
1636
1638 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1639 bool PerformInit, CodeGenFunction *CGF) {
1640 if (CGM.getLangOpts().OpenMPUseTLS &&
1641 CGM.getContext().getTargetInfo().isTLSSupported())
1642 return nullptr;
1643
1644 VD = VD->getDefinition(CGM.getContext());
1645 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1646 QualType ASTTy = VD->getType();
1647
1648 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1649 const Expr *Init = VD->getAnyInitializer();
1650 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1651 // Generate function that re-emits the declaration's initializer into the
1652 // threadprivate copy of the variable VD
1653 CodeGenFunction CtorCGF(CGM);
1654 FunctionArgList Args;
1655 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1656 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1658 Args.push_back(&Dst);
1659
1660 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1661 CGM.getContext().VoidPtrTy, Args);
1662 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1663 std::string Name = getName({"__kmpc_global_ctor_", ""});
1664 llvm::Function *Fn =
1665 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1666 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1667 Args, Loc, Loc);
1668 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1669 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1670 CGM.getContext().VoidPtrTy, Dst.getLocation());
1671 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1672 VDAddr.getAlignment());
1673 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1674 /*IsInitializer=*/true);
1675 ArgVal = CtorCGF.EmitLoadOfScalar(
1676 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1677 CGM.getContext().VoidPtrTy, Dst.getLocation());
1678 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1679 CtorCGF.FinishFunction();
1680 Ctor = Fn;
1681 }
1683 // Generate function that emits destructor call for the threadprivate copy
1684 // of the variable VD
1685 CodeGenFunction DtorCGF(CGM);
1686 FunctionArgList Args;
1687 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1688 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1690 Args.push_back(&Dst);
1691
1692 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1693 CGM.getContext().VoidTy, Args);
1694 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1695 std::string Name = getName({"__kmpc_global_dtor_", ""});
1696 llvm::Function *Fn =
1697 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1698 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1699 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1700 Loc, Loc);
1701 // Create a scope with an artificial location for the body of this function.
1702 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1703 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1704 DtorCGF.GetAddrOfLocalVar(&Dst),
1705 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1706 DtorCGF.emitDestroy(
1707 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1708 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1709 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1710 DtorCGF.FinishFunction();
1711 Dtor = Fn;
1712 }
1713 // Do not emit init function if it is not required.
1714 if (!Ctor && !Dtor)
1715 return nullptr;
1716
1717 // Copying constructor for the threadprivate variable.
1718 // Must be NULL - reserved by runtime, but currently it requires that this
1719 // parameter is always NULL. Otherwise it fires assertion.
1720 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1721 if (Ctor == nullptr) {
1722 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1723 }
1724 if (Dtor == nullptr) {
1725 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1726 }
1727 if (!CGF) {
1728 auto *InitFunctionTy =
1729 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1730 std::string Name = getName({"__omp_threadprivate_init_", ""});
1731 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1732 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1733 CodeGenFunction InitCGF(CGM);
1734 FunctionArgList ArgList;
1735 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1736 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1737 Loc, Loc);
1738 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1739 InitCGF.FinishFunction();
1740 return InitFunction;
1741 }
1742 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 }
1744 return nullptr;
1745}
1746
1748 llvm::GlobalValue *GV) {
1749 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1750 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1751
1752 // We only need to handle active 'indirect' declare target functions.
1753 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1754 return;
1755
1756 // Get a mangled name to store the new device global in.
1757 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1759 SmallString<128> Name;
1760 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1761
1762 // We need to generate a new global to hold the address of the indirectly
1763 // called device function. Doing this allows us to keep the visibility and
1764 // linkage of the associated function unchanged while allowing the runtime to
1765 // access its value.
1766 llvm::GlobalValue *Addr = GV;
1767 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1768 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1769 CGM.getLLVMContext(),
1770 CGM.getModule().getDataLayout().getProgramAddressSpace());
1771 Addr = new llvm::GlobalVariable(
1772 CGM.getModule(), FnPtrTy,
1773 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1774 nullptr, llvm::GlobalValue::NotThreadLocal,
1775 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1776 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1777 }
1778
1779 // Register the indirect Vtable:
1780 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1781 // size field refers to the size of memory pointed to, not the size of
1782 // the pointer symbol itself (which is implicitly the size of a pointer).
1783 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1784 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1785 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1786 llvm::GlobalValue::WeakODRLinkage);
1787}
1788
1789void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1790 const VarDecl *VD) {
1791 // TODO: add logic to avoid duplicate vtable registrations per
1792 // translation unit; though for external linkage, this should no
1793 // longer be an issue - or at least we can avoid the issue by
1794 // checking for an existing offloading entry. But, perhaps the
1795 // better approach is to defer emission of the vtables and offload
1796 // entries until later (by tracking a list of items that need to be
1797 // emitted).
1798
1799 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1800
1801 // Generate a new externally visible global to point to the
1802 // internally visible vtable. Doing this allows us to keep the
1803 // visibility and linkage of the associated vtable unchanged while
1804 // allowing the runtime to access its value. The externally
1805 // visible global var needs to be emitted with a unique mangled
1806 // name that won't conflict with similarly named (internal)
1807 // vtables in other translation units.
1808
1809 // Register vtable with source location of dynamic object in map
1810 // clause.
1811 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1813 VTable->getName());
1814
1815 llvm::GlobalVariable *Addr = VTable;
1816 SmallString<128> AddrName;
1817 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1818 AddrName.append("addr");
1819
1820 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1821 Addr = new llvm::GlobalVariable(
1822 CGM.getModule(), VTable->getType(),
1823 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1824 AddrName,
1825 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1826 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1827 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1828 }
1829 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1830 AddrName, VTable,
1831 CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1832 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1833 llvm::GlobalValue::WeakODRLinkage);
1834}
1835
1838 const VarDecl *VD) {
1839 // Register C++ VTable to OpenMP Offload Entry if it's a new
1840 // CXXRecordDecl.
1841 if (CXXRecord && CXXRecord->isDynamicClass() &&
1842 !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1843 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1844 if (Res.second) {
1845 CGM.EmitVTable(CXXRecord);
1846 CodeGenVTables VTables = CGM.getVTables();
1847 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1848 assert(VTablesAddr && "Expected non-null VTable address");
1849 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1850 // Emit VTable for all the fields containing dynamic CXXRecord
1851 for (const FieldDecl *Field : CXXRecord->fields()) {
1852 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1854 }
1855 // Emit VTable for all dynamic parent class
1856 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1857 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1858 emitAndRegisterVTable(CGM, BaseDecl, VD);
1859 }
1860 }
1861 }
1862}
1863
1865 // Register VTable by scanning through the map clause of OpenMP target region.
1866 // Get CXXRecordDecl and VarDecl from Expr.
1867 auto GetVTableDecl = [](const Expr *E) {
1868 QualType VDTy = E->getType();
1869 CXXRecordDecl *CXXRecord = nullptr;
1870 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1871 VDTy = RefType->getPointeeType();
1872 if (VDTy->isPointerType())
1874 else
1875 CXXRecord = VDTy->getAsCXXRecordDecl();
1876
1877 const VarDecl *VD = nullptr;
1878 if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
1879 VD = cast<VarDecl>(DRE->getDecl());
1880 } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
1881 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
1882 if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
1883 VD = BaseVD;
1884 }
1885 }
1886 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1887 };
1888 // Collect VTable from OpenMP map clause.
1889 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1890 for (const auto *E : C->varlist()) {
1891 auto DeclPair = GetVTableDecl(E);
1892 // Ensure VD is not null
1893 if (DeclPair.second)
1894 emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1895 }
1896 }
1897}
1898
1900 QualType VarType,
1901 StringRef Name) {
1902 std::string Suffix = getName({"artificial", ""});
1903 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1904 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1905 VarLVType, Twine(Name).concat(Suffix).str());
1906 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1907 CGM.getTarget().isTLSSupported()) {
1908 GAddr->setThreadLocal(/*Val=*/true);
1909 return Address(GAddr, GAddr->getValueType(),
1910 CGM.getContext().getTypeAlignInChars(VarType));
1911 }
1912 std::string CacheSuffix = getName({"cache", ""});
1913 llvm::Value *Args[] = {
1916 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1917 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1918 /*isSigned=*/false),
1919 OMPBuilder.getOrCreateInternalVariable(
1920 CGM.VoidPtrPtrTy,
1921 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1922 return Address(
1924 CGF.EmitRuntimeCall(
1925 OMPBuilder.getOrCreateRuntimeFunction(
1926 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1927 Args),
1928 CGF.Builder.getPtrTy(0)),
1929 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1930}
1931
1933 const RegionCodeGenTy &ThenGen,
1934 const RegionCodeGenTy &ElseGen) {
1935 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1936
1937 // If the condition constant folds and can be elided, try to avoid emitting
1938 // the condition and the dead arm of the if/else.
1939 bool CondConstant;
1940 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1941 if (CondConstant)
1942 ThenGen(CGF);
1943 else
1944 ElseGen(CGF);
1945 return;
1946 }
1947
1948 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1949 // emit the conditional branch.
1950 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1951 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1952 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1953 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1954
1955 // Emit the 'then' code.
1956 CGF.EmitBlock(ThenBlock);
1957 ThenGen(CGF);
1958 CGF.EmitBranch(ContBlock);
1959 // Emit the 'else' code if present.
1960 // There is no need to emit line number for unconditional branch.
1962 CGF.EmitBlock(ElseBlock);
1963 ElseGen(CGF);
1964 // There is no need to emit line number for unconditional branch.
1966 CGF.EmitBranch(ContBlock);
1967 // Emit the continuation block for code after the if.
1968 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1969}
1970
1972 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1973 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1974 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1975 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1976 if (!CGF.HaveInsertPoint())
1977 return;
1978 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1979 auto &M = CGM.getModule();
1980 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1981 this](CodeGenFunction &CGF, PrePostActionTy &) {
1982 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1983 llvm::Value *Args[] = {
1984 RTLoc,
1985 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1986 OutlinedFn};
1988 RealArgs.append(std::begin(Args), std::end(Args));
1989 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1990
1991 llvm::FunctionCallee RTLFn =
1992 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1993 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1994 };
1995 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1996 this](CodeGenFunction &CGF, PrePostActionTy &) {
1998 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1999 // Build calls:
2000 // __kmpc_serialized_parallel(&Loc, GTid);
2001 llvm::Value *Args[] = {RTLoc, ThreadID};
2002 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2003 M, OMPRTL___kmpc_serialized_parallel),
2004 Args);
2005
2006 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2007 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2008 RawAddress ZeroAddrBound =
2010 /*Name=*/".bound.zero.addr");
2011 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2013 // ThreadId for serialized parallels is 0.
2014 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
2015 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2016 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2017
2018 // Ensure we do not inline the function. This is trivially true for the ones
2019 // passed to __kmpc_fork_call but the ones called in serialized regions
2020 // could be inlined. This is not a perfect but it is closer to the invariant
2021 // we want, namely, every data environment starts with a new function.
2022 // TODO: We should pass the if condition to the runtime function and do the
2023 // handling there. Much cleaner code.
2024 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2025 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2026 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2027
2028 // __kmpc_end_serialized_parallel(&Loc, GTid);
2029 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2030 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2031 M, OMPRTL___kmpc_end_serialized_parallel),
2032 EndArgs);
2033 };
2034 if (IfCond) {
2035 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2036 } else {
2037 RegionCodeGenTy ThenRCG(ThenGen);
2038 ThenRCG(CGF);
2039 }
2040}
2041
2042// If we're inside an (outlined) parallel region, use the region info's
2043// thread-ID variable (it is passed in a first argument of the outlined function
2044// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2045// regular serial code region, get thread ID by calling kmp_int32
2046// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2047// return the address of that temp.
2049 SourceLocation Loc) {
2050 if (auto *OMPRegionInfo =
2051 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2052 if (OMPRegionInfo->getThreadIDVariable())
2053 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2054
2055 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2056 QualType Int32Ty =
2057 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2058 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2059 CGF.EmitStoreOfScalar(ThreadID,
2060 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2061
2062 return ThreadIDTemp;
2063}
2064
2065llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2066 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2067 std::string Name = getName({Prefix, "var"});
2068 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2069}
2070
2071namespace {
2072/// Common pre(post)-action for different OpenMP constructs.
2073class CommonActionTy final : public PrePostActionTy {
2074 llvm::FunctionCallee EnterCallee;
2075 ArrayRef<llvm::Value *> EnterArgs;
2076 llvm::FunctionCallee ExitCallee;
2077 ArrayRef<llvm::Value *> ExitArgs;
2078 bool Conditional;
2079 llvm::BasicBlock *ContBlock = nullptr;
2080
2081public:
2082 CommonActionTy(llvm::FunctionCallee EnterCallee,
2083 ArrayRef<llvm::Value *> EnterArgs,
2084 llvm::FunctionCallee ExitCallee,
2085 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2086 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2087 ExitArgs(ExitArgs), Conditional(Conditional) {}
2088 void Enter(CodeGenFunction &CGF) override {
2089 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2090 if (Conditional) {
2091 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2092 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2093 ContBlock = CGF.createBasicBlock("omp_if.end");
2094 // Generate the branch (If-stmt)
2095 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2096 CGF.EmitBlock(ThenBlock);
2097 }
2098 }
2099 void Done(CodeGenFunction &CGF) {
2100 // Emit the rest of blocks/branches
2101 CGF.EmitBranch(ContBlock);
2102 CGF.EmitBlock(ContBlock, true);
2103 }
2104 void Exit(CodeGenFunction &CGF) override {
2105 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2106 }
2107};
2108} // anonymous namespace
2109
2111 StringRef CriticalName,
2112 const RegionCodeGenTy &CriticalOpGen,
2113 SourceLocation Loc, const Expr *Hint) {
2114 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2115 // CriticalOpGen();
2116 // __kmpc_end_critical(ident_t *, gtid, Lock);
2117 // Prepare arguments and build a call to __kmpc_critical
2118 if (!CGF.HaveInsertPoint())
2119 return;
2120 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(),
2122 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2123 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2124 unsigned LockVarArgIdx = 2;
2125 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2126 RuntimeFcn.getFunctionType()
2127 ->getParamType(LockVarArgIdx)
2128 ->getPointerAddressSpace())
2129 LockVar = CGF.Builder.CreateAddrSpaceCast(
2130 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2131 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2132 LockVar};
2133 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2134 std::end(Args));
2135 if (Hint) {
2136 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2137 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2138 }
2139 CommonActionTy Action(RuntimeFcn, EnterArgs,
2140 OMPBuilder.getOrCreateRuntimeFunction(
2141 CGM.getModule(), OMPRTL___kmpc_end_critical),
2142 Args);
2143 CriticalOpGen.setAction(Action);
2144 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2145}
2146
2148 const RegionCodeGenTy &MasterOpGen,
2149 SourceLocation Loc) {
2150 if (!CGF.HaveInsertPoint())
2151 return;
2152 // if(__kmpc_master(ident_t *, gtid)) {
2153 // MasterOpGen();
2154 // __kmpc_end_master(ident_t *, gtid);
2155 // }
2156 // Prepare arguments and build a call to __kmpc_master
2157 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2158 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2159 CGM.getModule(), OMPRTL___kmpc_master),
2160 Args,
2161 OMPBuilder.getOrCreateRuntimeFunction(
2162 CGM.getModule(), OMPRTL___kmpc_end_master),
2163 Args,
2164 /*Conditional=*/true);
2165 MasterOpGen.setAction(Action);
2166 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2167 Action.Done(CGF);
2168}
2169
2171 const RegionCodeGenTy &MaskedOpGen,
2172 SourceLocation Loc, const Expr *Filter) {
2173 if (!CGF.HaveInsertPoint())
2174 return;
2175 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2176 // MaskedOpGen();
2177 // __kmpc_end_masked(iden_t *, gtid);
2178 // }
2179 // Prepare arguments and build a call to __kmpc_masked
2180 llvm::Value *FilterVal = Filter
2181 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2182 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2183 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2184 FilterVal};
2185 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2186 getThreadID(CGF, Loc)};
2187 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2188 CGM.getModule(), OMPRTL___kmpc_masked),
2189 Args,
2190 OMPBuilder.getOrCreateRuntimeFunction(
2191 CGM.getModule(), OMPRTL___kmpc_end_masked),
2192 ArgsEnd,
2193 /*Conditional=*/true);
2194 MaskedOpGen.setAction(Action);
2195 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2196 Action.Done(CGF);
2197}
2198
2200 SourceLocation Loc) {
2201 if (!CGF.HaveInsertPoint())
2202 return;
2203 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2204 OMPBuilder.createTaskyield(CGF.Builder);
2205 } else {
2206 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2207 llvm::Value *Args[] = {
2208 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2209 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2210 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2211 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2212 Args);
2213 }
2214
2215 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2216 Region->emitUntiedSwitch(CGF);
2217}
2218
2220 const RegionCodeGenTy &TaskgroupOpGen,
2221 SourceLocation Loc) {
2222 if (!CGF.HaveInsertPoint())
2223 return;
2224 // __kmpc_taskgroup(ident_t *, gtid);
2225 // TaskgroupOpGen();
2226 // __kmpc_end_taskgroup(ident_t *, gtid);
2227 // Prepare arguments and build a call to __kmpc_taskgroup
2228 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2229 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2230 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2231 Args,
2232 OMPBuilder.getOrCreateRuntimeFunction(
2233 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2234 Args);
2235 TaskgroupOpGen.setAction(Action);
2236 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2237}
2238
2239/// Given an array of pointers to variables, project the address of a
2240/// given variable.
2242 unsigned Index, const VarDecl *Var) {
2243 // Pull out the pointer to the variable.
2244 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2245 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2246
2247 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2248 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2249}
2250
2252 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2253 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2254 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2255 SourceLocation Loc) {
2256 ASTContext &C = CGM.getContext();
2257 // void copy_func(void *LHSArg, void *RHSArg);
2258 FunctionArgList Args;
2259 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2261 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2263 Args.push_back(&LHSArg);
2264 Args.push_back(&RHSArg);
2265 const auto &CGFI =
2266 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2267 std::string Name =
2268 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2269 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2270 llvm::GlobalValue::InternalLinkage, Name,
2271 &CGM.getModule());
2273 Fn->setDoesNotRecurse();
2274 CodeGenFunction CGF(CGM);
2275 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2276 // Dest = (void*[n])(LHSArg);
2277 // Src = (void*[n])(RHSArg);
2279 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2280 CGF.Builder.getPtrTy(0)),
2281 ArgsElemType, CGF.getPointerAlign());
2283 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2284 CGF.Builder.getPtrTy(0)),
2285 ArgsElemType, CGF.getPointerAlign());
2286 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2287 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2288 // ...
2289 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2290 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2291 const auto *DestVar =
2292 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2293 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2294
2295 const auto *SrcVar =
2296 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2297 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2298
2299 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2300 QualType Type = VD->getType();
2301 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2302 }
2303 CGF.FinishFunction();
2304 return Fn;
2305}
2306
2308 const RegionCodeGenTy &SingleOpGen,
2309 SourceLocation Loc,
2310 ArrayRef<const Expr *> CopyprivateVars,
2311 ArrayRef<const Expr *> SrcExprs,
2312 ArrayRef<const Expr *> DstExprs,
2313 ArrayRef<const Expr *> AssignmentOps) {
2314 if (!CGF.HaveInsertPoint())
2315 return;
2316 assert(CopyprivateVars.size() == SrcExprs.size() &&
2317 CopyprivateVars.size() == DstExprs.size() &&
2318 CopyprivateVars.size() == AssignmentOps.size());
2319 ASTContext &C = CGM.getContext();
2320 // int32 did_it = 0;
2321 // if(__kmpc_single(ident_t *, gtid)) {
2322 // SingleOpGen();
2323 // __kmpc_end_single(ident_t *, gtid);
2324 // did_it = 1;
2325 // }
2326 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2327 // <copy_func>, did_it);
2328
2329 Address DidIt = Address::invalid();
2330 if (!CopyprivateVars.empty()) {
2331 // int32 did_it = 0;
2332 QualType KmpInt32Ty =
2333 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2334 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2335 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2336 }
2337 // Prepare arguments and build a call to __kmpc_single
2338 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2339 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2340 CGM.getModule(), OMPRTL___kmpc_single),
2341 Args,
2342 OMPBuilder.getOrCreateRuntimeFunction(
2343 CGM.getModule(), OMPRTL___kmpc_end_single),
2344 Args,
2345 /*Conditional=*/true);
2346 SingleOpGen.setAction(Action);
2347 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2348 if (DidIt.isValid()) {
2349 // did_it = 1;
2350 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2351 }
2352 Action.Done(CGF);
2353 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2354 // <copy_func>, did_it);
2355 if (DidIt.isValid()) {
2356 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2357 QualType CopyprivateArrayTy = C.getConstantArrayType(
2358 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2359 /*IndexTypeQuals=*/0);
2360 // Create a list of all private variables for copyprivate.
2361 Address CopyprivateList =
2362 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2363 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2364 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2365 CGF.Builder.CreateStore(
2367 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2368 CGF.VoidPtrTy),
2369 Elem);
2370 }
2371 // Build function that copies private values from single region to all other
2372 // threads in the corresponding parallel region.
2373 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2374 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2375 SrcExprs, DstExprs, AssignmentOps, Loc);
2376 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2378 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2379 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2380 llvm::Value *Args[] = {
2381 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2382 getThreadID(CGF, Loc), // i32 <gtid>
2383 BufSize, // size_t <buf_size>
2384 CL.emitRawPointer(CGF), // void *<copyprivate list>
2385 CpyFn, // void (*) (void *, void *) <copy_func>
2386 DidItVal // i32 did_it
2387 };
2388 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2389 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2390 Args);
2391 }
2392}
2393
2395 const RegionCodeGenTy &OrderedOpGen,
2396 SourceLocation Loc, bool IsThreads) {
2397 if (!CGF.HaveInsertPoint())
2398 return;
2399 // __kmpc_ordered(ident_t *, gtid);
2400 // OrderedOpGen();
2401 // __kmpc_end_ordered(ident_t *, gtid);
2402 // Prepare arguments and build a call to __kmpc_ordered
2403 if (IsThreads) {
2404 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2405 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2406 CGM.getModule(), OMPRTL___kmpc_ordered),
2407 Args,
2408 OMPBuilder.getOrCreateRuntimeFunction(
2409 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2410 Args);
2411 OrderedOpGen.setAction(Action);
2412 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2413 return;
2414 }
2415 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2416}
2417
2419 unsigned Flags;
2420 if (Kind == OMPD_for)
2421 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2422 else if (Kind == OMPD_sections)
2423 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2424 else if (Kind == OMPD_single)
2425 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2426 else if (Kind == OMPD_barrier)
2427 Flags = OMP_IDENT_BARRIER_EXPL;
2428 else
2429 Flags = OMP_IDENT_BARRIER_IMPL;
2430 return Flags;
2431}
2432
2434 CodeGenFunction &CGF, const OMPLoopDirective &S,
2435 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2436 // Check if the loop directive is actually a doacross loop directive. In this
2437 // case choose static, 1 schedule.
2438 if (llvm::any_of(
2439 S.getClausesOfKind<OMPOrderedClause>(),
2440 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2441 ScheduleKind = OMPC_SCHEDULE_static;
2442 // Chunk size is 1 in this case.
2443 llvm::APInt ChunkSize(32, 1);
2444 ChunkExpr = IntegerLiteral::Create(
2445 CGF.getContext(), ChunkSize,
2446 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2447 SourceLocation());
2448 }
2449}
2450
2452 OpenMPDirectiveKind Kind, bool EmitChecks,
2453 bool ForceSimpleCall) {
2454 // Check if we should use the OMPBuilder
2455 auto *OMPRegionInfo =
2456 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2457 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2458 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2459 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2460 EmitChecks));
2461 CGF.Builder.restoreIP(AfterIP);
2462 return;
2463 }
2464
2465 if (!CGF.HaveInsertPoint())
2466 return;
2467 // Build call __kmpc_cancel_barrier(loc, thread_id);
2468 // Build call __kmpc_barrier(loc, thread_id);
2469 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2470 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2471 // thread_id);
2472 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2473 getThreadID(CGF, Loc)};
2474 if (OMPRegionInfo) {
2475 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2476 llvm::Value *Result = CGF.EmitRuntimeCall(
2477 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2478 OMPRTL___kmpc_cancel_barrier),
2479 Args);
2480 if (EmitChecks) {
2481 // if (__kmpc_cancel_barrier()) {
2482 // exit from construct;
2483 // }
2484 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2485 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2486 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2487 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2488 CGF.EmitBlock(ExitBB);
2489 // exit from construct;
2490 CodeGenFunction::JumpDest CancelDestination =
2491 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2492 CGF.EmitBranchThroughCleanup(CancelDestination);
2493 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2494 }
2495 return;
2496 }
2497 }
2498 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2499 CGM.getModule(), OMPRTL___kmpc_barrier),
2500 Args);
2501}
2502
2504 Expr *ME, bool IsFatal) {
2505 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2506 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2507 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2508 // *message)
2509 llvm::Value *Args[] = {
2510 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2511 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2512 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2513 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2514 CGM.getModule(), OMPRTL___kmpc_error),
2515 Args);
2516}
2517
2518/// Map the OpenMP loop schedule to the runtime enumeration.
2519static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2520 bool Chunked, bool Ordered) {
2521 switch (ScheduleKind) {
2522 case OMPC_SCHEDULE_static:
2523 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2524 : (Ordered ? OMP_ord_static : OMP_sch_static);
2525 case OMPC_SCHEDULE_dynamic:
2526 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2527 case OMPC_SCHEDULE_guided:
2528 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2529 case OMPC_SCHEDULE_runtime:
2530 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2531 case OMPC_SCHEDULE_auto:
2532 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2534 assert(!Chunked && "chunk was specified but schedule kind not known");
2535 return Ordered ? OMP_ord_static : OMP_sch_static;
2536 }
2537 llvm_unreachable("Unexpected runtime schedule");
2538}
2539
2540/// Map the OpenMP distribute schedule to the runtime enumeration.
2541static OpenMPSchedType
2543 // only static is allowed for dist_schedule
2544 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2545}
2546
2548 bool Chunked) const {
2549 OpenMPSchedType Schedule =
2550 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2551 return Schedule == OMP_sch_static;
2552}
2553
2555 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2556 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2557 return Schedule == OMP_dist_sch_static;
2558}
2559
2561 bool Chunked) const {
2562 OpenMPSchedType Schedule =
2563 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2564 return Schedule == OMP_sch_static_chunked;
2565}
2566
2568 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2569 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2570 return Schedule == OMP_dist_sch_static_chunked;
2571}
2572
2574 OpenMPSchedType Schedule =
2575 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2576 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2577 return Schedule != OMP_sch_static;
2578}
2579
2580static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2583 int Modifier = 0;
2584 switch (M1) {
2585 case OMPC_SCHEDULE_MODIFIER_monotonic:
2586 Modifier = OMP_sch_modifier_monotonic;
2587 break;
2588 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2589 Modifier = OMP_sch_modifier_nonmonotonic;
2590 break;
2591 case OMPC_SCHEDULE_MODIFIER_simd:
2592 if (Schedule == OMP_sch_static_chunked)
2593 Schedule = OMP_sch_static_balanced_chunked;
2594 break;
2597 break;
2598 }
2599 switch (M2) {
2600 case OMPC_SCHEDULE_MODIFIER_monotonic:
2601 Modifier = OMP_sch_modifier_monotonic;
2602 break;
2603 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2604 Modifier = OMP_sch_modifier_nonmonotonic;
2605 break;
2606 case OMPC_SCHEDULE_MODIFIER_simd:
2607 if (Schedule == OMP_sch_static_chunked)
2608 Schedule = OMP_sch_static_balanced_chunked;
2609 break;
2612 break;
2613 }
2614 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2615 // If the static schedule kind is specified or if the ordered clause is
2616 // specified, and if the nonmonotonic modifier is not specified, the effect is
2617 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2618 // modifier is specified, the effect is as if the nonmonotonic modifier is
2619 // specified.
2620 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2621 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2622 Schedule == OMP_sch_static_balanced_chunked ||
2623 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2624 Schedule == OMP_dist_sch_static_chunked ||
2625 Schedule == OMP_dist_sch_static))
2626 Modifier = OMP_sch_modifier_nonmonotonic;
2627 }
2628 return Schedule | Modifier;
2629}
2630
2633 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2634 bool Ordered, const DispatchRTInput &DispatchValues) {
2635 if (!CGF.HaveInsertPoint())
2636 return;
2637 OpenMPSchedType Schedule = getRuntimeSchedule(
2638 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2639 assert(Ordered ||
2640 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2641 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2642 Schedule != OMP_sch_static_balanced_chunked));
2643 // Call __kmpc_dispatch_init(
2644 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2645 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2646 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2647
2648 // If the Chunk was not specified in the clause - use default value 1.
2649 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2650 : CGF.Builder.getIntN(IVSize, 1);
2651 llvm::Value *Args[] = {
2652 emitUpdateLocation(CGF, Loc),
2653 getThreadID(CGF, Loc),
2654 CGF.Builder.getInt32(addMonoNonMonoModifier(
2655 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2656 DispatchValues.LB, // Lower
2657 DispatchValues.UB, // Upper
2658 CGF.Builder.getIntN(IVSize, 1), // Stride
2659 Chunk // Chunk
2660 };
2661 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2662 Args);
2663}
2664
2666 SourceLocation Loc) {
2667 if (!CGF.HaveInsertPoint())
2668 return;
2669 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2670 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2671 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2672}
2673
2675 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2676 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2678 const CGOpenMPRuntime::StaticRTInput &Values) {
2679 if (!CGF.HaveInsertPoint())
2680 return;
2681
2682 assert(!Values.Ordered);
2683 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2684 Schedule == OMP_sch_static_balanced_chunked ||
2685 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2686 Schedule == OMP_dist_sch_static ||
2687 Schedule == OMP_dist_sch_static_chunked);
2688
2689 // Call __kmpc_for_static_init(
2690 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2691 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2692 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2693 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2694 llvm::Value *Chunk = Values.Chunk;
2695 if (Chunk == nullptr) {
2696 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2697 Schedule == OMP_dist_sch_static) &&
2698 "expected static non-chunked schedule");
2699 // If the Chunk was not specified in the clause - use default value 1.
2700 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2701 } else {
2702 assert((Schedule == OMP_sch_static_chunked ||
2703 Schedule == OMP_sch_static_balanced_chunked ||
2704 Schedule == OMP_ord_static_chunked ||
2705 Schedule == OMP_dist_sch_static_chunked) &&
2706 "expected static chunked schedule");
2707 }
2708 llvm::Value *Args[] = {
2709 UpdateLocation,
2710 ThreadId,
2711 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2712 M2)), // Schedule type
2713 Values.IL.emitRawPointer(CGF), // &isLastIter
2714 Values.LB.emitRawPointer(CGF), // &LB
2715 Values.UB.emitRawPointer(CGF), // &UB
2716 Values.ST.emitRawPointer(CGF), // &Stride
2717 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2718 Chunk // Chunk
2719 };
2720 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2721}
2722
2724 SourceLocation Loc,
2725 OpenMPDirectiveKind DKind,
2726 const OpenMPScheduleTy &ScheduleKind,
2727 const StaticRTInput &Values) {
2728 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2729 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2730 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2731 "Expected loop-based or sections-based directive.");
2732 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2734 ? OMP_IDENT_WORK_LOOP
2735 : OMP_IDENT_WORK_SECTIONS);
2736 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2737 llvm::FunctionCallee StaticInitFunction =
2738 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2739 false);
2741 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2742 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2743}
2744
2748 const CGOpenMPRuntime::StaticRTInput &Values) {
2749 OpenMPSchedType ScheduleNum =
2750 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2751 llvm::Value *UpdatedLocation =
2752 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2753 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2754 llvm::FunctionCallee StaticInitFunction;
2755 bool isGPUDistribute =
2756 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2757 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2758 Values.IVSize, Values.IVSigned, isGPUDistribute);
2759
2760 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2761 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2763}
2764
2766 SourceLocation Loc,
2767 OpenMPDirectiveKind DKind) {
2768 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2769 DKind == OMPD_sections) &&
2770 "Expected distribute, for, or sections directive kind");
2771 if (!CGF.HaveInsertPoint())
2772 return;
2773 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2774 llvm::Value *Args[] = {
2775 emitUpdateLocation(CGF, Loc,
2777 (DKind == OMPD_target_teams_loop)
2778 ? OMP_IDENT_WORK_DISTRIBUTE
2779 : isOpenMPLoopDirective(DKind)
2780 ? OMP_IDENT_WORK_LOOP
2781 : OMP_IDENT_WORK_SECTIONS),
2782 getThreadID(CGF, Loc)};
2784 if (isOpenMPDistributeDirective(DKind) &&
2785 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2786 CGF.EmitRuntimeCall(
2787 OMPBuilder.getOrCreateRuntimeFunction(
2788 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2789 Args);
2790 else
2791 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2792 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2793 Args);
2794}
2795
2797 SourceLocation Loc,
2798 unsigned IVSize,
2799 bool IVSigned) {
2800 if (!CGF.HaveInsertPoint())
2801 return;
2802 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2803 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2804 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2805 Args);
2806}
2807
2809 SourceLocation Loc, unsigned IVSize,
2810 bool IVSigned, Address IL,
2811 Address LB, Address UB,
2812 Address ST) {
2813 // Call __kmpc_dispatch_next(
2814 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2815 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2816 // kmp_int[32|64] *p_stride);
2817 llvm::Value *Args[] = {
2818 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2819 IL.emitRawPointer(CGF), // &isLastIter
2820 LB.emitRawPointer(CGF), // &Lower
2821 UB.emitRawPointer(CGF), // &Upper
2822 ST.emitRawPointer(CGF) // &Stride
2823 };
2824 llvm::Value *Call = CGF.EmitRuntimeCall(
2825 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2826 return CGF.EmitScalarConversion(
2827 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2828 CGF.getContext().BoolTy, Loc);
2829}
2830
2832 const Expr *Message,
2833 SourceLocation Loc) {
2834 if (!Message)
2835 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2836 return CGF.EmitScalarExpr(Message);
2837}
2838
2839llvm::Value *
2841 SourceLocation Loc) {
2842 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2843 // as if sev-level is fatal."
2844 return llvm::ConstantInt::get(CGM.Int32Ty,
2845 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2846}
2847
2849 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2851 SourceLocation SeverityLoc, const Expr *Message,
2852 SourceLocation MessageLoc) {
2853 if (!CGF.HaveInsertPoint())
2854 return;
2856 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2857 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2858 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2859 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2860 // messsage) if strict modifier is used.
2861 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2862 if (Modifier == OMPC_NUMTHREADS_strict) {
2863 FnID = OMPRTL___kmpc_push_num_threads_strict;
2864 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2865 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2866 }
2867 CGF.EmitRuntimeCall(
2868 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2869}
2870
2872 ProcBindKind ProcBind,
2873 SourceLocation Loc) {
2874 if (!CGF.HaveInsertPoint())
2875 return;
2876 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2877 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2878 llvm::Value *Args[] = {
2879 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2880 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2882 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2883 Args);
2884}
2885
2887 SourceLocation Loc, llvm::AtomicOrdering AO) {
2888 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2889 OMPBuilder.createFlush(CGF.Builder);
2890 } else {
2891 if (!CGF.HaveInsertPoint())
2892 return;
2893 // Build call void __kmpc_flush(ident_t *loc)
2894 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895 CGM.getModule(), OMPRTL___kmpc_flush),
2896 emitUpdateLocation(CGF, Loc));
2897 }
2898}
2899
2900namespace {
2901/// Indexes of fields for type kmp_task_t.
2902enum KmpTaskTFields {
2903 /// List of shared variables.
2904 KmpTaskTShareds,
2905 /// Task routine.
2906 KmpTaskTRoutine,
2907 /// Partition id for the untied tasks.
2908 KmpTaskTPartId,
2909 /// Function with call of destructors for private variables.
2910 Data1,
2911 /// Task priority.
2912 Data2,
2913 /// (Taskloops only) Lower bound.
2914 KmpTaskTLowerBound,
2915 /// (Taskloops only) Upper bound.
2916 KmpTaskTUpperBound,
2917 /// (Taskloops only) Stride.
2918 KmpTaskTStride,
2919 /// (Taskloops only) Is last iteration flag.
2920 KmpTaskTLastIter,
2921 /// (Taskloops only) Reduction data.
2922 KmpTaskTReductions,
2923};
2924} // anonymous namespace
2925
2927 // If we are in simd mode or there are no entries, we don't need to do
2928 // anything.
2929 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2930 return;
2931
2932 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2933 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2934 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2935 SourceLocation Loc;
2936 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2937 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2938 E = CGM.getContext().getSourceManager().fileinfo_end();
2939 I != E; ++I) {
2940 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2941 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2942 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2943 I->getFirst(), EntryInfo.Line, 1);
2944 break;
2945 }
2946 }
2947 }
2948 switch (Kind) {
2949 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2950 CGM.getDiags().Report(Loc,
2951 diag::err_target_region_offloading_entry_incorrect)
2952 << EntryInfo.ParentName;
2953 } break;
2954 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2955 CGM.getDiags().Report(
2956 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2957 << EntryInfo.ParentName;
2958 } break;
2959 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2960 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2961 } break;
2962 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2963 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2964 DiagnosticsEngine::Error, "Offloading entry for indirect declare "
2965 "target variable is incorrect: the "
2966 "address is invalid.");
2967 CGM.getDiags().Report(DiagID);
2968 } break;
2969 }
2970 };
2971
2972 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2973}
2974
2976 if (!KmpRoutineEntryPtrTy) {
2977 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2978 ASTContext &C = CGM.getContext();
2979 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2981 KmpRoutineEntryPtrQTy = C.getPointerType(
2982 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2983 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2984 }
2985}
2986
2987namespace {
2988struct PrivateHelpersTy {
2989 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2990 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2991 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2992 PrivateElemInit(PrivateElemInit) {}
2993 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2994 const Expr *OriginalRef = nullptr;
2995 const VarDecl *Original = nullptr;
2996 const VarDecl *PrivateCopy = nullptr;
2997 const VarDecl *PrivateElemInit = nullptr;
2998 bool isLocalPrivate() const {
2999 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3000 }
3001};
3002typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3003} // anonymous namespace
3004
3005static bool isAllocatableDecl(const VarDecl *VD) {
3006 const VarDecl *CVD = VD->getCanonicalDecl();
3007 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3008 return false;
3009 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3010 // Use the default allocation.
3011 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3012 !AA->getAllocator());
3013}
3014
3015static RecordDecl *
3017 if (!Privates.empty()) {
3018 ASTContext &C = CGM.getContext();
3019 // Build struct .kmp_privates_t. {
3020 // /* private vars */
3021 // };
3022 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3023 RD->startDefinition();
3024 for (const auto &Pair : Privates) {
3025 const VarDecl *VD = Pair.second.Original;
3027 // If the private variable is a local variable with lvalue ref type,
3028 // allocate the pointer instead of the pointee type.
3029 if (Pair.second.isLocalPrivate()) {
3030 if (VD->getType()->isLValueReferenceType())
3031 Type = C.getPointerType(Type);
3032 if (isAllocatableDecl(VD))
3033 Type = C.getPointerType(Type);
3034 }
3036 if (VD->hasAttrs()) {
3037 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3038 E(VD->getAttrs().end());
3039 I != E; ++I)
3040 FD->addAttr(*I);
3041 }
3042 }
3043 RD->completeDefinition();
3044 return RD;
3045 }
3046 return nullptr;
3047}
3048
3049static RecordDecl *
3051 QualType KmpInt32Ty,
3052 QualType KmpRoutineEntryPointerQTy) {
3053 ASTContext &C = CGM.getContext();
3054 // Build struct kmp_task_t {
3055 // void * shareds;
3056 // kmp_routine_entry_t routine;
3057 // kmp_int32 part_id;
3058 // kmp_cmplrdata_t data1;
3059 // kmp_cmplrdata_t data2;
3060 // For taskloops additional fields:
3061 // kmp_uint64 lb;
3062 // kmp_uint64 ub;
3063 // kmp_int64 st;
3064 // kmp_int32 liter;
3065 // void * reductions;
3066 // };
3067 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
3068 UD->startDefinition();
3069 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3070 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3071 UD->completeDefinition();
3072 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
3073 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3074 RD->startDefinition();
3075 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3076 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3077 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3078 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3079 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3080 if (isOpenMPTaskLoopDirective(Kind)) {
3081 QualType KmpUInt64Ty =
3082 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3083 QualType KmpInt64Ty =
3084 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3085 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3086 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3087 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3088 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3089 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3090 }
3091 RD->completeDefinition();
3092 return RD;
3093}
3094
3095static RecordDecl *
3098 ASTContext &C = CGM.getContext();
3099 // Build struct kmp_task_t_with_privates {
3100 // kmp_task_t task_data;
3101 // .kmp_privates_t. privates;
3102 // };
3103 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3104 RD->startDefinition();
3105 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3106 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3107 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
3108 RD->completeDefinition();
3109 return RD;
3110}
3111
3112/// Emit a proxy function which accepts kmp_task_t as the second
3113/// argument.
3114/// \code
3115/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3116/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3117/// For taskloops:
3118/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3119/// tt->reductions, tt->shareds);
3120/// return 0;
3121/// }
3122/// \endcode
3123static llvm::Function *
3125 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3126 QualType KmpTaskTWithPrivatesPtrQTy,
3127 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3128 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3129 llvm::Value *TaskPrivatesMap) {
3130 ASTContext &C = CGM.getContext();
3131 FunctionArgList Args;
3132 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3134 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3135 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3137 Args.push_back(&GtidArg);
3138 Args.push_back(&TaskTypeArg);
3139 const auto &TaskEntryFnInfo =
3140 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3141 llvm::FunctionType *TaskEntryTy =
3142 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3143 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3144 auto *TaskEntry = llvm::Function::Create(
3145 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3146 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3147 TaskEntry->setDoesNotRecurse();
3148 CodeGenFunction CGF(CGM);
3149 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3150 Loc, Loc);
3151
3152 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3153 // tt,
3154 // For taskloops:
3155 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3156 // tt->task_data.shareds);
3157 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3158 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3159 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3160 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3161 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3162 const auto *KmpTaskTWithPrivatesQTyRD =
3163 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3164 LValue Base =
3165 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3166 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3167 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3168 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3169 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3170
3171 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3172 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3173 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3174 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3175 CGF.ConvertTypeForMem(SharedsPtrTy));
3176
3177 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3178 llvm::Value *PrivatesParam;
3179 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3180 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3181 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3182 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3183 } else {
3184 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3185 }
3186
3187 llvm::Value *CommonArgs[] = {
3188 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3189 CGF.Builder
3190 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3191 CGF.VoidPtrTy, CGF.Int8Ty)
3192 .emitRawPointer(CGF)};
3193 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3194 std::end(CommonArgs));
3195 if (isOpenMPTaskLoopDirective(Kind)) {
3196 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3197 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3198 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3199 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3200 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3201 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3202 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3203 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3204 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3205 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3206 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3207 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3208 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3209 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3210 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3211 CallArgs.push_back(LBParam);
3212 CallArgs.push_back(UBParam);
3213 CallArgs.push_back(StParam);
3214 CallArgs.push_back(LIParam);
3215 CallArgs.push_back(RParam);
3216 }
3217 CallArgs.push_back(SharedsParam);
3218
3219 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3220 CallArgs);
3221 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3222 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3223 CGF.FinishFunction();
3224 return TaskEntry;
3225}
3226
3228 SourceLocation Loc,
3229 QualType KmpInt32Ty,
3230 QualType KmpTaskTWithPrivatesPtrQTy,
3231 QualType KmpTaskTWithPrivatesQTy) {
3232 ASTContext &C = CGM.getContext();
3233 FunctionArgList Args;
3234 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3236 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3237 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3239 Args.push_back(&GtidArg);
3240 Args.push_back(&TaskTypeArg);
3241 const auto &DestructorFnInfo =
3242 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3243 llvm::FunctionType *DestructorFnTy =
3244 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3245 std::string Name =
3246 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3247 auto *DestructorFn =
3248 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3249 Name, &CGM.getModule());
3250 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3251 DestructorFnInfo);
3252 DestructorFn->setDoesNotRecurse();
3253 CodeGenFunction CGF(CGM);
3254 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3255 Args, Loc, Loc);
3256
3257 LValue Base = CGF.EmitLoadOfPointerLValue(
3258 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3259 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3260 const auto *KmpTaskTWithPrivatesQTyRD =
3261 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3262 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3263 Base = CGF.EmitLValueForField(Base, *FI);
3264 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3265 if (QualType::DestructionKind DtorKind =
3266 Field->getType().isDestructedType()) {
3267 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3268 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3269 }
3270 }
3271 CGF.FinishFunction();
3272 return DestructorFn;
3273}
3274
3275/// Emit a privates mapping function for correct handling of private and
3276/// firstprivate variables.
3277/// \code
3278/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3279/// **noalias priv1,..., <tyn> **noalias privn) {
3280/// *priv1 = &.privates.priv1;
3281/// ...;
3282/// *privn = &.privates.privn;
3283/// }
3284/// \endcode
3285static llvm::Value *
3287 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3289 ASTContext &C = CGM.getContext();
3290 FunctionArgList Args;
3291 ImplicitParamDecl TaskPrivatesArg(
3292 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3293 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3295 Args.push_back(&TaskPrivatesArg);
3296 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3297 unsigned Counter = 1;
3298 for (const Expr *E : Data.PrivateVars) {
3299 Args.push_back(ImplicitParamDecl::Create(
3300 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3301 C.getPointerType(C.getPointerType(E->getType()))
3302 .withConst()
3303 .withRestrict(),
3305 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3306 PrivateVarsPos[VD] = Counter;
3307 ++Counter;
3308 }
3309 for (const Expr *E : Data.FirstprivateVars) {
3310 Args.push_back(ImplicitParamDecl::Create(
3311 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3312 C.getPointerType(C.getPointerType(E->getType()))
3313 .withConst()
3314 .withRestrict(),
3316 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3317 PrivateVarsPos[VD] = Counter;
3318 ++Counter;
3319 }
3320 for (const Expr *E : Data.LastprivateVars) {
3321 Args.push_back(ImplicitParamDecl::Create(
3322 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3323 C.getPointerType(C.getPointerType(E->getType()))
3324 .withConst()
3325 .withRestrict(),
3327 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3328 PrivateVarsPos[VD] = Counter;
3329 ++Counter;
3330 }
3331 for (const VarDecl *VD : Data.PrivateLocals) {
3333 if (VD->getType()->isLValueReferenceType())
3334 Ty = C.getPointerType(Ty);
3335 if (isAllocatableDecl(VD))
3336 Ty = C.getPointerType(Ty);
3337 Args.push_back(ImplicitParamDecl::Create(
3338 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3339 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3341 PrivateVarsPos[VD] = Counter;
3342 ++Counter;
3343 }
3344 const auto &TaskPrivatesMapFnInfo =
3345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3346 llvm::FunctionType *TaskPrivatesMapTy =
3347 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3348 std::string Name =
3349 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3350 auto *TaskPrivatesMap = llvm::Function::Create(
3351 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3352 &CGM.getModule());
3353 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3354 TaskPrivatesMapFnInfo);
3355 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3356 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3357 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3358 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3359 }
3360 CodeGenFunction CGF(CGM);
3361 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3362 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3363
3364 // *privi = &.privates.privi;
3365 LValue Base = CGF.EmitLoadOfPointerLValue(
3366 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3367 TaskPrivatesArg.getType()->castAs<PointerType>());
3368 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3369 Counter = 0;
3370 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3371 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3372 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3373 LValue RefLVal =
3374 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3375 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3376 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3377 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3378 ++Counter;
3379 }
3380 CGF.FinishFunction();
3381 return TaskPrivatesMap;
3382}
3383
3384/// Emit initialization for private variables in task-based directives.
3386 const OMPExecutableDirective &D,
3387 Address KmpTaskSharedsPtr, LValue TDBase,
3388 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3389 QualType SharedsTy, QualType SharedsPtrTy,
3390 const OMPTaskDataTy &Data,
3391 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3392 ASTContext &C = CGF.getContext();
3393 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3394 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3395 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3396 ? OMPD_taskloop
3397 : OMPD_task;
3398 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3399 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3400 LValue SrcBase;
3401 bool IsTargetTask =
3402 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3403 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3404 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3405 // PointersArray, SizesArray, and MappersArray. The original variables for
3406 // these arrays are not captured and we get their addresses explicitly.
3407 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3408 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3409 SrcBase = CGF.MakeAddrLValue(
3411 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3412 CGF.ConvertTypeForMem(SharedsTy)),
3413 SharedsTy);
3414 }
3415 FI = FI->getType()->castAsRecordDecl()->field_begin();
3416 for (const PrivateDataTy &Pair : Privates) {
3417 // Do not initialize private locals.
3418 if (Pair.second.isLocalPrivate()) {
3419 ++FI;
3420 continue;
3421 }
3422 const VarDecl *VD = Pair.second.PrivateCopy;
3423 const Expr *Init = VD->getAnyInitializer();
3424 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3425 !CGF.isTrivialInitializer(Init)))) {
3426 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3427 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3428 const VarDecl *OriginalVD = Pair.second.Original;
3429 // Check if the variable is the target-based BasePointersArray,
3430 // PointersArray, SizesArray, or MappersArray.
3431 LValue SharedRefLValue;
3432 QualType Type = PrivateLValue.getType();
3433 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3434 if (IsTargetTask && !SharedField) {
3435 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3436 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3437 cast<CapturedDecl>(OriginalVD->getDeclContext())
3438 ->getNumParams() == 0 &&
3440 cast<CapturedDecl>(OriginalVD->getDeclContext())
3441 ->getDeclContext()) &&
3442 "Expected artificial target data variable.");
3443 SharedRefLValue =
3444 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3445 } else if (ForDup) {
3446 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3447 SharedRefLValue = CGF.MakeAddrLValue(
3448 SharedRefLValue.getAddress().withAlignment(
3449 C.getDeclAlign(OriginalVD)),
3450 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3451 SharedRefLValue.getTBAAInfo());
3452 } else if (CGF.LambdaCaptureFields.count(
3453 Pair.second.Original->getCanonicalDecl()) > 0 ||
3454 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3455 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3456 } else {
3457 // Processing for implicitly captured variables.
3458 InlinedOpenMPRegionRAII Region(
3459 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3460 /*HasCancel=*/false, /*NoInheritance=*/true);
3461 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3462 }
3463 if (Type->isArrayType()) {
3464 // Initialize firstprivate array.
3466 // Perform simple memcpy.
3467 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3468 } else {
3469 // Initialize firstprivate array using element-by-element
3470 // initialization.
3472 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3473 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3474 Address SrcElement) {
3475 // Clean up any temporaries needed by the initialization.
3476 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3477 InitScope.addPrivate(Elem, SrcElement);
3478 (void)InitScope.Privatize();
3479 // Emit initialization for single element.
3480 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3481 CGF, &CapturesInfo);
3482 CGF.EmitAnyExprToMem(Init, DestElement,
3483 Init->getType().getQualifiers(),
3484 /*IsInitializer=*/false);
3485 });
3486 }
3487 } else {
3488 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3489 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3490 (void)InitScope.Privatize();
3491 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3492 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3493 /*capturedByInit=*/false);
3494 }
3495 } else {
3496 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3497 }
3498 }
3499 ++FI;
3500 }
3501}
3502
3503/// Check if duplication function is required for taskloops.
3506 bool InitRequired = false;
3507 for (const PrivateDataTy &Pair : Privates) {
3508 if (Pair.second.isLocalPrivate())
3509 continue;
3510 const VarDecl *VD = Pair.second.PrivateCopy;
3511 const Expr *Init = VD->getAnyInitializer();
3512 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3514 if (InitRequired)
3515 break;
3516 }
3517 return InitRequired;
3518}
3519
3520
3521/// Emit task_dup function (for initialization of
3522/// private/firstprivate/lastprivate vars and last_iter flag)
3523/// \code
3524/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3525/// lastpriv) {
3526/// // setup lastprivate flag
3527/// task_dst->last = lastpriv;
3528/// // could be constructor calls here...
3529/// }
3530/// \endcode
3531static llvm::Value *
3533 const OMPExecutableDirective &D,
3534 QualType KmpTaskTWithPrivatesPtrQTy,
3535 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3536 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3537 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3538 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3539 ASTContext &C = CGM.getContext();
3540 FunctionArgList Args;
3541 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3542 KmpTaskTWithPrivatesPtrQTy,
3544 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3545 KmpTaskTWithPrivatesPtrQTy,
3547 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3549 Args.push_back(&DstArg);
3550 Args.push_back(&SrcArg);
3551 Args.push_back(&LastprivArg);
3552 const auto &TaskDupFnInfo =
3553 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3554 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3555 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3556 auto *TaskDup = llvm::Function::Create(
3557 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3558 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3559 TaskDup->setDoesNotRecurse();
3560 CodeGenFunction CGF(CGM);
3561 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3562 Loc);
3563
3564 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3565 CGF.GetAddrOfLocalVar(&DstArg),
3566 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3567 // task_dst->liter = lastpriv;
3568 if (WithLastIter) {
3569 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3570 LValue Base = CGF.EmitLValueForField(
3571 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3572 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3573 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3574 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3575 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3576 }
3577
3578 // Emit initial values for private copies (if any).
3579 assert(!Privates.empty());
3580 Address KmpTaskSharedsPtr = Address::invalid();
3581 if (!Data.FirstprivateVars.empty()) {
3582 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3583 CGF.GetAddrOfLocalVar(&SrcArg),
3584 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3585 LValue Base = CGF.EmitLValueForField(
3586 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3587 KmpTaskSharedsPtr = Address(
3589 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3590 KmpTaskTShareds)),
3591 Loc),
3592 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3593 }
3594 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3595 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3596 CGF.FinishFunction();
3597 return TaskDup;
3598}
3599
3600/// Checks if destructor function is required to be generated.
3601/// \return true if cleanups are required, false otherwise.
3602static bool
3603checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3605 for (const PrivateDataTy &P : Privates) {
3606 if (P.second.isLocalPrivate())
3607 continue;
3608 QualType Ty = P.second.Original->getType().getNonReferenceType();
3609 if (Ty.isDestructedType())
3610 return true;
3611 }
3612 return false;
3613}
3614
3615namespace {
3616/// Loop generator for OpenMP iterator expression.
3617class OMPIteratorGeneratorScope final
3619 CodeGenFunction &CGF;
3620 const OMPIteratorExpr *E = nullptr;
3621 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3622 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3623 OMPIteratorGeneratorScope() = delete;
3624 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3625
3626public:
3627 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3628 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3629 if (!E)
3630 return;
3631 SmallVector<llvm::Value *, 4> Uppers;
3632 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3633 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3634 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3635 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3636 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3637 addPrivate(
3638 HelperData.CounterVD,
3639 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3640 }
3641 Privatize();
3642
3643 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3644 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3645 LValue CLVal =
3646 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3647 HelperData.CounterVD->getType());
3648 // Counter = 0;
3649 CGF.EmitStoreOfScalar(
3650 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3651 CLVal);
3652 CodeGenFunction::JumpDest &ContDest =
3653 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3654 CodeGenFunction::JumpDest &ExitDest =
3655 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3656 // N = <number-of_iterations>;
3657 llvm::Value *N = Uppers[I];
3658 // cont:
3659 // if (Counter < N) goto body; else goto exit;
3660 CGF.EmitBlock(ContDest.getBlock());
3661 auto *CVal =
3662 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3663 llvm::Value *Cmp =
3664 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3665 ? CGF.Builder.CreateICmpSLT(CVal, N)
3666 : CGF.Builder.CreateICmpULT(CVal, N);
3667 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3668 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3669 // body:
3670 CGF.EmitBlock(BodyBB);
3671 // Iteri = Begini + Counter * Stepi;
3672 CGF.EmitIgnoredExpr(HelperData.Update);
3673 }
3674 }
3675 ~OMPIteratorGeneratorScope() {
3676 if (!E)
3677 return;
3678 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3679 // Counter = Counter + 1;
3680 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3681 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3682 // goto cont;
3683 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3684 // exit:
3685 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3686 }
3687 }
3688};
3689} // namespace
3690
3691static std::pair<llvm::Value *, llvm::Value *>
3693 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3694 llvm::Value *Addr;
3695 if (OASE) {
3696 const Expr *Base = OASE->getBase();
3697 Addr = CGF.EmitScalarExpr(Base);
3698 } else {
3699 Addr = CGF.EmitLValue(E).getPointer(CGF);
3700 }
3701 llvm::Value *SizeVal;
3702 QualType Ty = E->getType();
3703 if (OASE) {
3704 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3705 for (const Expr *SE : OASE->getDimensions()) {
3706 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3707 Sz = CGF.EmitScalarConversion(
3708 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3709 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3710 }
3711 } else if (const auto *ASE =
3712 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3713 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3714 Address UpAddrAddress = UpAddrLVal.getAddress();
3715 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3716 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3717 /*Idx0=*/1);
3718 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3719 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3720 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3721 } else {
3722 SizeVal = CGF.getTypeSize(Ty);
3723 }
3724 return std::make_pair(Addr, SizeVal);
3725}
3726
3727/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3728static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3729 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3730 if (KmpTaskAffinityInfoTy.isNull()) {
3731 RecordDecl *KmpAffinityInfoRD =
3732 C.buildImplicitRecord("kmp_task_affinity_info_t");
3733 KmpAffinityInfoRD->startDefinition();
3734 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3735 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3736 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3737 KmpAffinityInfoRD->completeDefinition();
3738 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3739 }
3740}
3741
3744 const OMPExecutableDirective &D,
3745 llvm::Function *TaskFunction, QualType SharedsTy,
3746 Address Shareds, const OMPTaskDataTy &Data) {
3747 ASTContext &C = CGM.getContext();
3749 // Aggregate privates and sort them by the alignment.
3750 const auto *I = Data.PrivateCopies.begin();
3751 for (const Expr *E : Data.PrivateVars) {
3752 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3753 Privates.emplace_back(
3754 C.getDeclAlign(VD),
3755 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3756 /*PrivateElemInit=*/nullptr));
3757 ++I;
3758 }
3759 I = Data.FirstprivateCopies.begin();
3760 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3761 for (const Expr *E : Data.FirstprivateVars) {
3762 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3763 Privates.emplace_back(
3764 C.getDeclAlign(VD),
3765 PrivateHelpersTy(
3766 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3767 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3768 ++I;
3769 ++IElemInitRef;
3770 }
3771 I = Data.LastprivateCopies.begin();
3772 for (const Expr *E : Data.LastprivateVars) {
3773 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3774 Privates.emplace_back(
3775 C.getDeclAlign(VD),
3776 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3777 /*PrivateElemInit=*/nullptr));
3778 ++I;
3779 }
3780 for (const VarDecl *VD : Data.PrivateLocals) {
3781 if (isAllocatableDecl(VD))
3782 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3783 else
3784 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3785 }
3786 llvm::stable_sort(Privates,
3787 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3788 return L.first > R.first;
3789 });
3790 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3791 // Build type kmp_routine_entry_t (if not built yet).
3792 emitKmpRoutineEntryT(KmpInt32Ty);
3793 // Build type kmp_task_t (if not built yet).
3794 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3795 if (SavedKmpTaskloopTQTy.isNull()) {
3796 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3797 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3798 }
3800 } else {
3801 assert((D.getDirectiveKind() == OMPD_task ||
3802 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3803 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3804 "Expected taskloop, task or target directive");
3805 if (SavedKmpTaskTQTy.isNull()) {
3806 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3807 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3808 }
3810 }
3811 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3812 // Build particular struct kmp_task_t for the given task.
3813 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3815 CanQualType KmpTaskTWithPrivatesQTy =
3816 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3817 QualType KmpTaskTWithPrivatesPtrQTy =
3818 C.getPointerType(KmpTaskTWithPrivatesQTy);
3819 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3820 llvm::Value *KmpTaskTWithPrivatesTySize =
3821 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3822 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3823
3824 // Emit initial values for private copies (if any).
3825 llvm::Value *TaskPrivatesMap = nullptr;
3826 llvm::Type *TaskPrivatesMapTy =
3827 std::next(TaskFunction->arg_begin(), 3)->getType();
3828 if (!Privates.empty()) {
3829 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3830 TaskPrivatesMap =
3831 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3832 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3833 TaskPrivatesMap, TaskPrivatesMapTy);
3834 } else {
3835 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3836 cast<llvm::PointerType>(TaskPrivatesMapTy));
3837 }
3838 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3839 // kmp_task_t *tt);
3840 llvm::Function *TaskEntry = emitProxyTaskFunction(
3841 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3842 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3843 TaskPrivatesMap);
3844
3845 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3846 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3847 // kmp_routine_entry_t *task_entry);
3848 // Task flags. Format is taken from
3849 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3850 // description of kmp_tasking_flags struct.
3851 enum {
3852 TiedFlag = 0x1,
3853 FinalFlag = 0x2,
3854 DestructorsFlag = 0x8,
3855 PriorityFlag = 0x20,
3856 DetachableFlag = 0x40,
3857 FreeAgentFlag = 0x80,
3858 TransparentFlag = 0x100,
3859 };
3860 unsigned Flags = Data.Tied ? TiedFlag : 0;
3861 bool NeedsCleanup = false;
3862 if (!Privates.empty()) {
3863 NeedsCleanup =
3864 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3865 if (NeedsCleanup)
3866 Flags = Flags | DestructorsFlag;
3867 }
3868 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3869 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3870 if (Kind == OMPC_THREADSET_omp_pool)
3871 Flags = Flags | FreeAgentFlag;
3872 }
3873 if (D.getSingleClause<OMPTransparentClause>())
3874 Flags |= TransparentFlag;
3875
3876 if (Data.Priority.getInt())
3877 Flags = Flags | PriorityFlag;
3878 if (D.hasClausesOfKind<OMPDetachClause>())
3879 Flags = Flags | DetachableFlag;
3880 llvm::Value *TaskFlags =
3881 Data.Final.getPointer()
3882 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3883 CGF.Builder.getInt32(FinalFlag),
3884 CGF.Builder.getInt32(/*C=*/0))
3885 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3886 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3887 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3889 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3891 TaskEntry, KmpRoutineEntryPtrTy)};
3892 llvm::Value *NewTask;
3893 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3894 // Check if we have any device clause associated with the directive.
3895 const Expr *Device = nullptr;
3896 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3897 Device = C->getDevice();
3898 // Emit device ID if any otherwise use default value.
3899 llvm::Value *DeviceID;
3900 if (Device)
3901 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3902 CGF.Int64Ty, /*isSigned=*/true);
3903 else
3904 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3905 AllocArgs.push_back(DeviceID);
3906 NewTask = CGF.EmitRuntimeCall(
3907 OMPBuilder.getOrCreateRuntimeFunction(
3908 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3909 AllocArgs);
3910 } else {
3911 NewTask =
3912 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3913 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3914 AllocArgs);
3915 }
3916 // Emit detach clause initialization.
3917 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3918 // task_descriptor);
3919 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3920 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3921 LValue EvtLVal = CGF.EmitLValue(Evt);
3922
3923 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3924 // int gtid, kmp_task_t *task);
3925 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3926 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3927 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3928 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3929 OMPBuilder.getOrCreateRuntimeFunction(
3930 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3931 {Loc, Tid, NewTask});
3932 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3933 Evt->getExprLoc());
3934 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3935 }
3936 // Process affinity clauses.
3937 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3938 // Process list of affinity data.
3939 ASTContext &C = CGM.getContext();
3940 Address AffinitiesArray = Address::invalid();
3941 // Calculate number of elements to form the array of affinity data.
3942 llvm::Value *NumOfElements = nullptr;
3943 unsigned NumAffinities = 0;
3944 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3945 if (const Expr *Modifier = C->getModifier()) {
3946 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3947 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3948 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3949 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3950 NumOfElements =
3951 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3952 }
3953 } else {
3954 NumAffinities += C->varlist_size();
3955 }
3956 }
3958 // Fields ids in kmp_task_affinity_info record.
3959 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3960
3961 QualType KmpTaskAffinityInfoArrayTy;
3962 if (NumOfElements) {
3963 NumOfElements = CGF.Builder.CreateNUWAdd(
3964 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3965 auto *OVE = new (C) OpaqueValueExpr(
3966 Loc,
3967 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3968 VK_PRValue);
3969 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3970 RValue::get(NumOfElements));
3971 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3973 /*IndexTypeQuals=*/0);
3974 // Properly emit variable-sized array.
3975 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3977 CGF.EmitVarDecl(*PD);
3978 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3979 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3980 /*isSigned=*/false);
3981 } else {
3982 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3984 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3985 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3986 AffinitiesArray =
3987 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3988 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3989 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3990 /*isSigned=*/false);
3991 }
3992
3993 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3994 // Fill array by elements without iterators.
3995 unsigned Pos = 0;
3996 bool HasIterator = false;
3997 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3998 if (C->getModifier()) {
3999 HasIterator = true;
4000 continue;
4001 }
4002 for (const Expr *E : C->varlist()) {
4003 llvm::Value *Addr;
4004 llvm::Value *Size;
4005 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4006 LValue Base =
4007 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4009 // affs[i].base_addr = &<Affinities[i].second>;
4010 LValue BaseAddrLVal = CGF.EmitLValueForField(
4011 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4012 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4013 BaseAddrLVal);
4014 // affs[i].len = sizeof(<Affinities[i].second>);
4015 LValue LenLVal = CGF.EmitLValueForField(
4016 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4017 CGF.EmitStoreOfScalar(Size, LenLVal);
4018 ++Pos;
4019 }
4020 }
4021 LValue PosLVal;
4022 if (HasIterator) {
4023 PosLVal = CGF.MakeAddrLValue(
4024 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4025 C.getSizeType());
4026 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4027 }
4028 // Process elements with iterators.
4029 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4030 const Expr *Modifier = C->getModifier();
4031 if (!Modifier)
4032 continue;
4033 OMPIteratorGeneratorScope IteratorScope(
4034 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4035 for (const Expr *E : C->varlist()) {
4036 llvm::Value *Addr;
4037 llvm::Value *Size;
4038 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4039 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4040 LValue Base =
4041 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
4043 // affs[i].base_addr = &<Affinities[i].second>;
4044 LValue BaseAddrLVal = CGF.EmitLValueForField(
4045 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4046 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4047 BaseAddrLVal);
4048 // affs[i].len = sizeof(<Affinities[i].second>);
4049 LValue LenLVal = CGF.EmitLValueForField(
4050 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4051 CGF.EmitStoreOfScalar(Size, LenLVal);
4052 Idx = CGF.Builder.CreateNUWAdd(
4053 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4054 CGF.EmitStoreOfScalar(Idx, PosLVal);
4055 }
4056 }
4057 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4058 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4059 // naffins, kmp_task_affinity_info_t *affin_list);
4060 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4061 llvm::Value *GTid = getThreadID(CGF, Loc);
4062 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4063 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
4064 // FIXME: Emit the function and ignore its result for now unless the
4065 // runtime function is properly implemented.
4066 (void)CGF.EmitRuntimeCall(
4067 OMPBuilder.getOrCreateRuntimeFunction(
4068 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4069 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4070 }
4071 llvm::Value *NewTaskNewTaskTTy =
4073 NewTask, KmpTaskTWithPrivatesPtrTy);
4074 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
4075 KmpTaskTWithPrivatesQTy);
4076 LValue TDBase =
4077 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4078 // Fill the data in the resulting kmp_task_t record.
4079 // Copy shareds if there are any.
4080 Address KmpTaskSharedsPtr = Address::invalid();
4081 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4082 KmpTaskSharedsPtr = Address(
4083 CGF.EmitLoadOfScalar(
4085 TDBase,
4086 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4087 Loc),
4088 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4089 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4090 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4091 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4092 }
4093 // Emit initial values for private copies (if any).
4095 if (!Privates.empty()) {
4096 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4097 SharedsTy, SharedsPtrTy, Data, Privates,
4098 /*ForDup=*/false);
4099 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4100 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4101 Result.TaskDupFn = emitTaskDupFunction(
4102 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4103 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4104 /*WithLastIter=*/!Data.LastprivateVars.empty());
4105 }
4106 }
4107 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4108 enum { Priority = 0, Destructors = 1 };
4109 // Provide pointer to function with destructors for privates.
4110 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4111 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4112 assert(KmpCmplrdataUD->isUnion());
4113 if (NeedsCleanup) {
4114 llvm::Value *DestructorFn = emitDestructorsFunction(
4115 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4116 KmpTaskTWithPrivatesQTy);
4117 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4118 LValue DestructorsLV = CGF.EmitLValueForField(
4119 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4121 DestructorFn, KmpRoutineEntryPtrTy),
4122 DestructorsLV);
4123 }
4124 // Set priority.
4125 if (Data.Priority.getInt()) {
4126 LValue Data2LV = CGF.EmitLValueForField(
4127 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4128 LValue PriorityLV = CGF.EmitLValueForField(
4129 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4130 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4131 }
4132 Result.NewTask = NewTask;
4133 Result.TaskEntry = TaskEntry;
4134 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4135 Result.TDBase = TDBase;
4136 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4137 return Result;
4138}
4139
4140/// Translates internal dependency kind into the runtime kind.
4142 RTLDependenceKindTy DepKind;
4143 switch (K) {
4144 case OMPC_DEPEND_in:
4145 DepKind = RTLDependenceKindTy::DepIn;
4146 break;
4147 // Out and InOut dependencies must use the same code.
4148 case OMPC_DEPEND_out:
4149 case OMPC_DEPEND_inout:
4150 DepKind = RTLDependenceKindTy::DepInOut;
4151 break;
4152 case OMPC_DEPEND_mutexinoutset:
4153 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4154 break;
4155 case OMPC_DEPEND_inoutset:
4156 DepKind = RTLDependenceKindTy::DepInOutSet;
4157 break;
4158 case OMPC_DEPEND_outallmemory:
4159 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4160 break;
4161 case OMPC_DEPEND_source:
4162 case OMPC_DEPEND_sink:
4163 case OMPC_DEPEND_depobj:
4164 case OMPC_DEPEND_inoutallmemory:
4166 llvm_unreachable("Unknown task dependence type");
4167 }
4168 return DepKind;
4169}
4170
4171/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4172static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4173 QualType &FlagsTy) {
4174 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4175 if (KmpDependInfoTy.isNull()) {
4176 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4177 KmpDependInfoRD->startDefinition();
4178 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4179 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4180 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4181 KmpDependInfoRD->completeDefinition();
4182 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4183 }
4184}
4185
4186std::pair<llvm::Value *, LValue>
4188 SourceLocation Loc) {
4189 ASTContext &C = CGM.getContext();
4190 QualType FlagsTy;
4191 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4192 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4193 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4195 DepobjLVal.getAddress().withElementType(
4196 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4197 KmpDependInfoPtrTy->castAs<PointerType>());
4198 Address DepObjAddr = CGF.Builder.CreateGEP(
4199 CGF, Base.getAddress(),
4200 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4201 LValue NumDepsBase = CGF.MakeAddrLValue(
4202 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4203 // NumDeps = deps[i].base_addr;
4204 LValue BaseAddrLVal = CGF.EmitLValueForField(
4205 NumDepsBase,
4206 *std::next(KmpDependInfoRD->field_begin(),
4207 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4208 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4209 return std::make_pair(NumDeps, Base);
4210}
4211
4212static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4213 llvm::PointerUnion<unsigned *, LValue *> Pos,
4215 Address DependenciesArray) {
4216 CodeGenModule &CGM = CGF.CGM;
4217 ASTContext &C = CGM.getContext();
4218 QualType FlagsTy;
4219 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4220 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4221 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4222
4223 OMPIteratorGeneratorScope IteratorScope(
4224 CGF, cast_or_null<OMPIteratorExpr>(
4225 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4226 : nullptr));
4227 for (const Expr *E : Data.DepExprs) {
4228 llvm::Value *Addr;
4229 llvm::Value *Size;
4230
4231 // The expression will be a nullptr in the 'omp_all_memory' case.
4232 if (E) {
4233 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4234 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4235 } else {
4236 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4237 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4238 }
4239 LValue Base;
4240 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4241 Base = CGF.MakeAddrLValue(
4242 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4243 } else {
4244 assert(E && "Expected a non-null expression");
4245 LValue &PosLVal = *cast<LValue *>(Pos);
4246 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4247 Base = CGF.MakeAddrLValue(
4248 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4249 }
4250 // deps[i].base_addr = &<Dependencies[i].second>;
4251 LValue BaseAddrLVal = CGF.EmitLValueForField(
4252 Base,
4253 *std::next(KmpDependInfoRD->field_begin(),
4254 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4255 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4256 // deps[i].len = sizeof(<Dependencies[i].second>);
4257 LValue LenLVal = CGF.EmitLValueForField(
4258 Base, *std::next(KmpDependInfoRD->field_begin(),
4259 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4260 CGF.EmitStoreOfScalar(Size, LenLVal);
4261 // deps[i].flags = <Dependencies[i].first>;
4262 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4263 LValue FlagsLVal = CGF.EmitLValueForField(
4264 Base,
4265 *std::next(KmpDependInfoRD->field_begin(),
4266 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4268 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4269 FlagsLVal);
4270 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4271 ++(*P);
4272 } else {
4273 LValue &PosLVal = *cast<LValue *>(Pos);
4274 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4275 Idx = CGF.Builder.CreateNUWAdd(Idx,
4276 llvm::ConstantInt::get(Idx->getType(), 1));
4277 CGF.EmitStoreOfScalar(Idx, PosLVal);
4278 }
4279 }
4280}
4281
4285 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4286 "Expected depobj dependency kind.");
4288 SmallVector<LValue, 4> SizeLVals;
4289 ASTContext &C = CGF.getContext();
4290 {
4291 OMPIteratorGeneratorScope IteratorScope(
4292 CGF, cast_or_null<OMPIteratorExpr>(
4293 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4294 : nullptr));
4295 for (const Expr *E : Data.DepExprs) {
4296 llvm::Value *NumDeps;
4297 LValue Base;
4298 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4299 std::tie(NumDeps, Base) =
4300 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4301 LValue NumLVal = CGF.MakeAddrLValue(
4302 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4303 C.getUIntPtrType());
4304 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4305 NumLVal.getAddress());
4306 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4307 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4308 CGF.EmitStoreOfScalar(Add, NumLVal);
4309 SizeLVals.push_back(NumLVal);
4310 }
4311 }
4312 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4313 llvm::Value *Size =
4314 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4315 Sizes.push_back(Size);
4316 }
4317 return Sizes;
4318}
4319
4322 LValue PosLVal,
4324 Address DependenciesArray) {
4325 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4326 "Expected depobj dependency kind.");
4327 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4328 {
4329 OMPIteratorGeneratorScope IteratorScope(
4330 CGF, cast_or_null<OMPIteratorExpr>(
4331 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4332 : nullptr));
4333 for (const Expr *E : Data.DepExprs) {
4334 llvm::Value *NumDeps;
4335 LValue Base;
4336 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4337 std::tie(NumDeps, Base) =
4338 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4339
4340 // memcopy dependency data.
4341 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4342 ElSize,
4343 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4344 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4345 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4346 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4347
4348 // Increase pos.
4349 // pos += size;
4350 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4351 CGF.EmitStoreOfScalar(Add, PosLVal);
4352 }
4353 }
4354}
4355
4356std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4358 SourceLocation Loc) {
4359 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4360 return D.DepExprs.empty();
4361 }))
4362 return std::make_pair(nullptr, Address::invalid());
4363 // Process list of dependencies.
4364 ASTContext &C = CGM.getContext();
4365 Address DependenciesArray = Address::invalid();
4366 llvm::Value *NumOfElements = nullptr;
4367 unsigned NumDependencies = std::accumulate(
4368 Dependencies.begin(), Dependencies.end(), 0,
4369 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4370 return D.DepKind == OMPC_DEPEND_depobj
4371 ? V
4372 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4373 });
4374 QualType FlagsTy;
4375 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4376 bool HasDepobjDeps = false;
4377 bool HasRegularWithIterators = false;
4378 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4379 llvm::Value *NumOfRegularWithIterators =
4380 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4381 // Calculate number of depobj dependencies and regular deps with the
4382 // iterators.
4383 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4384 if (D.DepKind == OMPC_DEPEND_depobj) {
4387 for (llvm::Value *Size : Sizes) {
4388 NumOfDepobjElements =
4389 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4390 }
4391 HasDepobjDeps = true;
4392 continue;
4393 }
4394 // Include number of iterations, if any.
4395
4396 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4397 llvm::Value *ClauseIteratorSpace =
4398 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4399 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4400 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4401 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4402 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4403 }
4404 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4405 ClauseIteratorSpace,
4406 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4407 NumOfRegularWithIterators =
4408 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4409 HasRegularWithIterators = true;
4410 continue;
4411 }
4412 }
4413
4414 QualType KmpDependInfoArrayTy;
4415 if (HasDepobjDeps || HasRegularWithIterators) {
4416 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4417 /*isSigned=*/false);
4418 if (HasDepobjDeps) {
4419 NumOfElements =
4420 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4421 }
4422 if (HasRegularWithIterators) {
4423 NumOfElements =
4424 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4425 }
4426 auto *OVE = new (C) OpaqueValueExpr(
4427 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4428 VK_PRValue);
4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430 RValue::get(NumOfElements));
4431 KmpDependInfoArrayTy =
4432 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4433 /*IndexTypeQuals=*/0);
4434 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4435 // Properly emit variable-sized array.
4436 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4438 CGF.EmitVarDecl(*PD);
4439 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4440 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4441 /*isSigned=*/false);
4442 } else {
4443 KmpDependInfoArrayTy = C.getConstantArrayType(
4444 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4445 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4446 DependenciesArray =
4447 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4448 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4450 /*isSigned=*/false);
4451 }
4452 unsigned Pos = 0;
4453 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4454 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4455 continue;
4456 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4457 }
4458 // Copy regular dependencies with iterators.
4459 LValue PosLVal = CGF.MakeAddrLValue(
4460 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4461 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4462 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4463 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4464 continue;
4465 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4466 }
4467 // Copy final depobj arrays without iterators.
4468 if (HasDepobjDeps) {
4469 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4470 if (Dep.DepKind != OMPC_DEPEND_depobj)
4471 continue;
4472 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4473 }
4474 }
4475 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4476 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4477 return std::make_pair(NumOfElements, DependenciesArray);
4478}
4479
4481 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4482 SourceLocation Loc) {
4483 if (Dependencies.DepExprs.empty())
4484 return Address::invalid();
4485 // Process list of dependencies.
4486 ASTContext &C = CGM.getContext();
4487 Address DependenciesArray = Address::invalid();
4488 unsigned NumDependencies = Dependencies.DepExprs.size();
4489 QualType FlagsTy;
4490 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4491 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4492
4493 llvm::Value *Size;
4494 // Define type kmp_depend_info[<Dependencies.size()>];
4495 // For depobj reserve one extra element to store the number of elements.
4496 // It is required to handle depobj(x) update(in) construct.
4497 // kmp_depend_info[<Dependencies.size()>] deps;
4498 llvm::Value *NumDepsVal;
4499 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4500 if (const auto *IE =
4501 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4502 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4503 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4504 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4505 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4506 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4507 }
4508 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4509 NumDepsVal);
4510 CharUnits SizeInBytes =
4511 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4512 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4513 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4514 NumDepsVal =
4515 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4516 } else {
4517 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4518 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4519 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4520 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4521 Size = CGM.getSize(Sz.alignTo(Align));
4522 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4523 }
4524 // Need to allocate on the dynamic memory.
4525 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4526 // Use default allocator.
4527 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4528 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4529
4530 llvm::Value *Addr =
4531 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4532 CGM.getModule(), OMPRTL___kmpc_alloc),
4533 Args, ".dep.arr.addr");
4534 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4536 Addr, CGF.Builder.getPtrTy(0));
4537 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4538 // Write number of elements in the first element of array for depobj.
4539 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4540 // deps[i].base_addr = NumDependencies;
4541 LValue BaseAddrLVal = CGF.EmitLValueForField(
4542 Base,
4543 *std::next(KmpDependInfoRD->field_begin(),
4544 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4545 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4546 llvm::PointerUnion<unsigned *, LValue *> Pos;
4547 unsigned Idx = 1;
4548 LValue PosLVal;
4549 if (Dependencies.IteratorExpr) {
4550 PosLVal = CGF.MakeAddrLValue(
4551 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4552 C.getSizeType());
4553 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4554 /*IsInit=*/true);
4555 Pos = &PosLVal;
4556 } else {
4557 Pos = &Idx;
4558 }
4559 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4560 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4561 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4562 CGF.Int8Ty);
4563 return DependenciesArray;
4564}
4565
4567 SourceLocation Loc) {
4568 ASTContext &C = CGM.getContext();
4569 QualType FlagsTy;
4570 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4571 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4572 C.VoidPtrTy.castAs<PointerType>());
4573 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4575 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4577 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4578 Addr.getElementType(), Addr.emitRawPointer(CGF),
4579 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4580 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4581 CGF.VoidPtrTy);
4582 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4583 // Use default allocator.
4584 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4586
4587 // _kmpc_free(gtid, addr, nullptr);
4588 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4589 CGM.getModule(), OMPRTL___kmpc_free),
4590 Args);
4591}
4592
4594 OpenMPDependClauseKind NewDepKind,
4595 SourceLocation Loc) {
4596 ASTContext &C = CGM.getContext();
4597 QualType FlagsTy;
4598 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4599 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4600 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4601 llvm::Value *NumDeps;
4602 LValue Base;
4603 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4604
4605 Address Begin = Base.getAddress();
4606 // Cast from pointer to array type to pointer to single element.
4607 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4608 Begin.emitRawPointer(CGF), NumDeps);
4609 // The basic structure here is a while-do loop.
4610 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4611 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4612 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4613 CGF.EmitBlock(BodyBB);
4614 llvm::PHINode *ElementPHI =
4615 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4616 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4617 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4618 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4619 Base.getTBAAInfo());
4620 // deps[i].flags = NewDepKind;
4621 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4622 LValue FlagsLVal = CGF.EmitLValueForField(
4623 Base, *std::next(KmpDependInfoRD->field_begin(),
4624 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4626 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4627 FlagsLVal);
4628
4629 // Shift the address forward by one element.
4630 llvm::Value *ElementNext =
4631 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4632 .emitRawPointer(CGF);
4633 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4634 llvm::Value *IsEmpty =
4635 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4636 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4637 // Done.
4638 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4639}
4640
4642 const OMPExecutableDirective &D,
4643 llvm::Function *TaskFunction,
4644 QualType SharedsTy, Address Shareds,
4645 const Expr *IfCond,
4646 const OMPTaskDataTy &Data) {
4647 if (!CGF.HaveInsertPoint())
4648 return;
4649
4651 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4652 llvm::Value *NewTask = Result.NewTask;
4653 llvm::Function *TaskEntry = Result.TaskEntry;
4654 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4655 LValue TDBase = Result.TDBase;
4656 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4657 // Process list of dependences.
4658 Address DependenciesArray = Address::invalid();
4659 llvm::Value *NumOfElements;
4660 std::tie(NumOfElements, DependenciesArray) =
4661 emitDependClause(CGF, Data.Dependences, Loc);
4662
4663 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4664 // libcall.
4665 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4666 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4667 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4668 // list is not empty
4669 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4670 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4671 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4672 llvm::Value *DepTaskArgs[7];
4673 if (!Data.Dependences.empty()) {
4674 DepTaskArgs[0] = UpLoc;
4675 DepTaskArgs[1] = ThreadID;
4676 DepTaskArgs[2] = NewTask;
4677 DepTaskArgs[3] = NumOfElements;
4678 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4679 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4680 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4681 }
4682 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4683 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4684 if (!Data.Tied) {
4685 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4686 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4687 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4688 }
4689 if (!Data.Dependences.empty()) {
4690 CGF.EmitRuntimeCall(
4691 OMPBuilder.getOrCreateRuntimeFunction(
4692 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4693 DepTaskArgs);
4694 } else {
4695 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4696 CGM.getModule(), OMPRTL___kmpc_omp_task),
4697 TaskArgs);
4698 }
4699 // Check if parent region is untied and build return for untied task;
4700 if (auto *Region =
4701 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4702 Region->emitUntiedSwitch(CGF);
4703 };
4704
4705 llvm::Value *DepWaitTaskArgs[7];
4706 if (!Data.Dependences.empty()) {
4707 DepWaitTaskArgs[0] = UpLoc;
4708 DepWaitTaskArgs[1] = ThreadID;
4709 DepWaitTaskArgs[2] = NumOfElements;
4710 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4711 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4712 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4713 DepWaitTaskArgs[6] =
4714 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4715 }
4716 auto &M = CGM.getModule();
4717 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4718 TaskEntry, &Data, &DepWaitTaskArgs,
4719 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4720 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4721 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4722 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4723 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4724 // is specified.
4725 if (!Data.Dependences.empty())
4726 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4727 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4728 DepWaitTaskArgs);
4729 // Call proxy_task_entry(gtid, new_task);
4730 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4731 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4732 Action.Enter(CGF);
4733 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4734 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4735 OutlinedFnArgs);
4736 };
4737
4738 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4739 // kmp_task_t *new_task);
4740 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4741 // kmp_task_t *new_task);
4743 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4744 M, OMPRTL___kmpc_omp_task_begin_if0),
4745 TaskArgs,
4746 OMPBuilder.getOrCreateRuntimeFunction(
4747 M, OMPRTL___kmpc_omp_task_complete_if0),
4748 TaskArgs);
4749 RCG.setAction(Action);
4750 RCG(CGF);
4751 };
4752
4753 if (IfCond) {
4754 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4755 } else {
4756 RegionCodeGenTy ThenRCG(ThenCodeGen);
4757 ThenRCG(CGF);
4758 }
4759}
4760
4762 const OMPLoopDirective &D,
4763 llvm::Function *TaskFunction,
4764 QualType SharedsTy, Address Shareds,
4765 const Expr *IfCond,
4766 const OMPTaskDataTy &Data) {
4767 if (!CGF.HaveInsertPoint())
4768 return;
4770 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4771 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4772 // libcall.
4773 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4774 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4775 // sched, kmp_uint64 grainsize, void *task_dup);
4776 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4777 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4778 llvm::Value *IfVal;
4779 if (IfCond) {
4780 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4781 /*isSigned=*/true);
4782 } else {
4783 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4784 }
4785
4786 LValue LBLVal = CGF.EmitLValueForField(
4787 Result.TDBase,
4788 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4789 const auto *LBVar =
4790 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4791 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4792 /*IsInitializer=*/true);
4793 LValue UBLVal = CGF.EmitLValueForField(
4794 Result.TDBase,
4795 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4796 const auto *UBVar =
4797 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4798 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4799 /*IsInitializer=*/true);
4800 LValue StLVal = CGF.EmitLValueForField(
4801 Result.TDBase,
4802 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4803 const auto *StVar =
4804 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4805 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4806 /*IsInitializer=*/true);
4807 // Store reductions address.
4808 LValue RedLVal = CGF.EmitLValueForField(
4809 Result.TDBase,
4810 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4811 if (Data.Reductions) {
4812 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4813 } else {
4814 CGF.EmitNullInitialization(RedLVal.getAddress(),
4815 CGF.getContext().VoidPtrTy);
4816 }
4817 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4819 UpLoc,
4820 ThreadID,
4821 Result.NewTask,
4822 IfVal,
4823 LBLVal.getPointer(CGF),
4824 UBLVal.getPointer(CGF),
4825 CGF.EmitLoadOfScalar(StLVal, Loc),
4826 llvm::ConstantInt::getSigned(
4827 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4828 llvm::ConstantInt::getSigned(
4829 CGF.IntTy, Data.Schedule.getPointer()
4830 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4831 : NoSchedule),
4832 Data.Schedule.getPointer()
4833 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4834 /*isSigned=*/false)
4835 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4836 if (Data.HasModifier)
4837 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4838
4839 TaskArgs.push_back(Result.TaskDupFn
4841 Result.TaskDupFn, CGF.VoidPtrTy)
4842 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4843 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4844 CGM.getModule(), Data.HasModifier
4845 ? OMPRTL___kmpc_taskloop_5
4846 : OMPRTL___kmpc_taskloop),
4847 TaskArgs);
4848}
4849
4850/// Emit reduction operation for each element of array (required for
4851/// array sections) LHS op = RHS.
4852/// \param Type Type of array.
4853/// \param LHSVar Variable on the left side of the reduction operation
4854/// (references element of array in original variable).
4855/// \param RHSVar Variable on the right side of the reduction operation
4856/// (references element of array in original variable).
4857/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4858/// RHSVar.
4860 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4861 const VarDecl *RHSVar,
4862 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4863 const Expr *, const Expr *)> &RedOpGen,
4864 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4865 const Expr *UpExpr = nullptr) {
4866 // Perform element-by-element initialization.
4867 QualType ElementTy;
4868 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4869 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4870
4871 // Drill down to the base element type on both arrays.
4872 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4873 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4874
4875 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4876 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4877 // Cast from pointer to array type to pointer to single element.
4878 llvm::Value *LHSEnd =
4879 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4880 // The basic structure here is a while-do loop.
4881 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4882 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4883 llvm::Value *IsEmpty =
4884 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4885 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4886
4887 // Enter the loop body, making that address the current address.
4888 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4889 CGF.EmitBlock(BodyBB);
4890
4891 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4892
4893 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4894 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4895 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4896 Address RHSElementCurrent(
4897 RHSElementPHI, RHSAddr.getElementType(),
4898 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4899
4900 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4901 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4902 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4903 Address LHSElementCurrent(
4904 LHSElementPHI, LHSAddr.getElementType(),
4905 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4906
4907 // Emit copy.
4909 Scope.addPrivate(LHSVar, LHSElementCurrent);
4910 Scope.addPrivate(RHSVar, RHSElementCurrent);
4911 Scope.Privatize();
4912 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4913 Scope.ForceCleanup();
4914
4915 // Shift the address forward by one element.
4916 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4917 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4918 "omp.arraycpy.dest.element");
4919 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4920 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4921 "omp.arraycpy.src.element");
4922 // Check whether we've reached the end.
4923 llvm::Value *Done =
4924 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4925 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4926 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4927 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4928
4929 // Done.
4930 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4931}
4932
4933/// Emit reduction combiner. If the combiner is a simple expression emit it as
4934/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4935/// UDR combiner function.
4937 const Expr *ReductionOp) {
4938 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4939 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4940 if (const auto *DRE =
4941 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4942 if (const auto *DRD =
4943 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4944 std::pair<llvm::Function *, llvm::Function *> Reduction =
4948 CGF.EmitIgnoredExpr(ReductionOp);
4949 return;
4950 }
4951 CGF.EmitIgnoredExpr(ReductionOp);
4952}
4953
4955 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4957 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4958 ASTContext &C = CGM.getContext();
4959
4960 // void reduction_func(void *LHSArg, void *RHSArg);
4961 FunctionArgList Args;
4962 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4964 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4966 Args.push_back(&LHSArg);
4967 Args.push_back(&RHSArg);
4968 const auto &CGFI =
4969 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4970 std::string Name = getReductionFuncName(ReducerName);
4971 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4972 llvm::GlobalValue::InternalLinkage, Name,
4973 &CGM.getModule());
4974 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4975 Fn->setDoesNotRecurse();
4976 CodeGenFunction CGF(CGM);
4977 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4978
4979 // Dst = (void*[n])(LHSArg);
4980 // Src = (void*[n])(RHSArg);
4982 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4983 CGF.Builder.getPtrTy(0)),
4984 ArgsElemType, CGF.getPointerAlign());
4986 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4987 CGF.Builder.getPtrTy(0)),
4988 ArgsElemType, CGF.getPointerAlign());
4989
4990 // ...
4991 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4992 // ...
4994 const auto *IPriv = Privates.begin();
4995 unsigned Idx = 0;
4996 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4997 const auto *RHSVar =
4998 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4999 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5000 const auto *LHSVar =
5001 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5002 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5003 QualType PrivTy = (*IPriv)->getType();
5004 if (PrivTy->isVariablyModifiedType()) {
5005 // Get array size and emit VLA type.
5006 ++Idx;
5007 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5008 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5009 const VariableArrayType *VLA =
5010 CGF.getContext().getAsVariableArrayType(PrivTy);
5011 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5013 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5014 CGF.EmitVariablyModifiedType(PrivTy);
5015 }
5016 }
5017 Scope.Privatize();
5018 IPriv = Privates.begin();
5019 const auto *ILHS = LHSExprs.begin();
5020 const auto *IRHS = RHSExprs.begin();
5021 for (const Expr *E : ReductionOps) {
5022 if ((*IPriv)->getType()->isArrayType()) {
5023 // Emit reduction for array section.
5024 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5025 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5027 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5028 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5029 emitReductionCombiner(CGF, E);
5030 });
5031 } else {
5032 // Emit reduction for array subscript or single variable.
5033 emitReductionCombiner(CGF, E);
5034 }
5035 ++IPriv;
5036 ++ILHS;
5037 ++IRHS;
5038 }
5039 Scope.ForceCleanup();
5040 CGF.FinishFunction();
5041 return Fn;
5042}
5043
5045 const Expr *ReductionOp,
5046 const Expr *PrivateRef,
5047 const DeclRefExpr *LHS,
5048 const DeclRefExpr *RHS) {
5049 if (PrivateRef->getType()->isArrayType()) {
5050 // Emit reduction for array section.
5051 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5052 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5054 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5055 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5056 emitReductionCombiner(CGF, ReductionOp);
5057 });
5058 } else {
5059 // Emit reduction for array subscript or single variable.
5060 emitReductionCombiner(CGF, ReductionOp);
5061 }
5062}
5063
5064static std::string generateUniqueName(CodeGenModule &CGM,
5065 llvm::StringRef Prefix, const Expr *Ref);
5066
5068 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5069 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5070
5071 // Create a shared global variable (__shared_reduction_var) to accumulate the
5072 // final result.
5073 //
5074 // Call __kmpc_barrier to synchronize threads before initialization.
5075 //
5076 // The master thread (thread_id == 0) initializes __shared_reduction_var
5077 // with the identity value or initializer.
5078 //
5079 // Call __kmpc_barrier to synchronize before combining.
5080 // For each i:
5081 // - Thread enters critical section.
5082 // - Reads its private value from LHSExprs[i].
5083 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5084 // Privates[i]).
5085 // - Exits critical section.
5086 //
5087 // Call __kmpc_barrier after combining.
5088 //
5089 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5090 //
5091 // Final __kmpc_barrier to synchronize after broadcasting
5092 QualType PrivateType = Privates->getType();
5093 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
5094
5095 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
5096 std::string ReductionVarNameStr;
5097 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
5098 ReductionVarNameStr =
5099 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
5100 else
5101 ReductionVarNameStr = "unnamed_priv_var";
5102
5103 // Create an internal shared variable
5104 std::string SharedName =
5105 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
5106 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5107 LLVMType, ".omp.reduction." + SharedName);
5108
5109 SharedVar->setAlignment(
5110 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
5111
5112 Address SharedResult =
5113 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
5114
5115 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5116 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5117 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5118
5119 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5120 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5121
5122 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5123 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5124 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5125
5126 CGF.EmitBlock(InitBB);
5127
5128 auto EmitSharedInit = [&]() {
5129 if (UDR) { // Check if it's a User-Defined Reduction
5130 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5131 std::pair<llvm::Function *, llvm::Function *> FnPair =
5133 llvm::Function *InitializerFn = FnPair.second;
5134 if (InitializerFn) {
5135 if (const auto *CE =
5136 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5137 const auto *OutDRE = cast<DeclRefExpr>(
5138 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5139 ->getSubExpr());
5140 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5141
5142 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5143 LocalScope.addPrivate(OutVD, SharedResult);
5144
5145 (void)LocalScope.Privatize();
5146 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5147 CE->getCallee()->IgnoreParenImpCasts())) {
5149 CGF, OVE, RValue::get(InitializerFn));
5150 CGF.EmitIgnoredExpr(CE);
5151 } else {
5152 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5153 PrivateType.getQualifiers(),
5154 /*IsInitializer=*/true);
5155 }
5156 } else {
5157 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5158 PrivateType.getQualifiers(),
5159 /*IsInitializer=*/true);
5160 }
5161 } else {
5162 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5163 PrivateType.getQualifiers(),
5164 /*IsInitializer=*/true);
5165 }
5166 } else {
5167 // EmitNullInitialization handles default construction for C++ classes
5168 // and zeroing for scalars, which is a reasonable default.
5169 CGF.EmitNullInitialization(SharedResult, PrivateType);
5170 }
5171 return; // UDR initialization handled
5172 }
5173 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5174 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5175 if (const Expr *InitExpr = VD->getInit()) {
5176 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5177 PrivateType.getQualifiers(), true);
5178 return;
5179 }
5180 }
5181 }
5182 CGF.EmitNullInitialization(SharedResult, PrivateType);
5183 };
5184 EmitSharedInit();
5185 CGF.Builder.CreateBr(InitEndBB);
5186 CGF.EmitBlock(InitEndBB);
5187
5188 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5189 CGM.getModule(), OMPRTL___kmpc_barrier),
5190 BarrierArgs);
5191
5192 const Expr *ReductionOp = ReductionOps;
5193 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5194 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5195 LValue LHSLV = CGF.EmitLValue(Privates);
5196
5197 auto EmitCriticalReduction = [&](auto ReductionGen) {
5198 std::string CriticalName = getName({"reduction_critical"});
5199 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5200 };
5201
5202 if (CurrentUDR) {
5203 // Handle user-defined reduction.
5204 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5205 Action.Enter(CGF);
5206 std::pair<llvm::Function *, llvm::Function *> FnPair =
5207 getUserDefinedReduction(CurrentUDR);
5208 if (FnPair.first) {
5209 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5210 const auto *OutDRE = cast<DeclRefExpr>(
5211 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5212 ->getSubExpr());
5213 const auto *InDRE = cast<DeclRefExpr>(
5214 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5215 ->getSubExpr());
5216 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5217 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5218 SharedLV.getAddress());
5219 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5220 LHSLV.getAddress());
5221 (void)LocalScope.Privatize();
5222 emitReductionCombiner(CGF, ReductionOp);
5223 }
5224 }
5225 };
5226 EmitCriticalReduction(ReductionGen);
5227 } else {
5228 // Handle built-in reduction operations.
5229#ifndef NDEBUG
5230 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5231 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5232 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5233
5234 const Expr *AssignRHS = nullptr;
5235 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5236 if (BinOp->getOpcode() == BO_Assign)
5237 AssignRHS = BinOp->getRHS();
5238 } else if (const auto *OpCall =
5239 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5240 if (OpCall->getOperator() == OO_Equal)
5241 AssignRHS = OpCall->getArg(1);
5242 }
5243
5244 assert(AssignRHS &&
5245 "Private Variable Reduction : Invalid ReductionOp expression");
5246#endif
5247
5248 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5249 Action.Enter(CGF);
5250 const auto *OmpOutDRE =
5251 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5252 const auto *OmpInDRE =
5253 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5254 assert(
5255 OmpOutDRE && OmpInDRE &&
5256 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5257 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5258 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5259 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5260 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5261 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5262 (void)LocalScope.Privatize();
5263 // Emit the actual reduction operation
5264 CGF.EmitIgnoredExpr(ReductionOp);
5265 };
5266 EmitCriticalReduction(ReductionGen);
5267 }
5268
5269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5270 CGM.getModule(), OMPRTL___kmpc_barrier),
5271 BarrierArgs);
5272
5273 // Broadcast final result
5274 bool IsAggregate = PrivateType->isAggregateType();
5275 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5276 llvm::Value *FinalResultVal = nullptr;
5277 Address FinalResultAddr = Address::invalid();
5278
5279 if (IsAggregate)
5280 FinalResultAddr = SharedResult;
5281 else
5282 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5283
5284 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5285 if (IsAggregate) {
5286 CGF.EmitAggregateCopy(TargetLHSLV,
5287 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5288 PrivateType, AggValueSlot::DoesNotOverlap, false);
5289 } else {
5290 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5291 }
5292 // Final synchronization barrier
5293 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5294 CGM.getModule(), OMPRTL___kmpc_barrier),
5295 BarrierArgs);
5296
5297 // Combiner with original list item
5298 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5299 PrePostActionTy &Action) {
5300 Action.Enter(CGF);
5301 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5302 cast<DeclRefExpr>(LHSExprs),
5303 cast<DeclRefExpr>(RHSExprs));
5304 };
5305 EmitCriticalReduction(OriginalListCombiner);
5306}
5307
5309 ArrayRef<const Expr *> OrgPrivates,
5310 ArrayRef<const Expr *> OrgLHSExprs,
5311 ArrayRef<const Expr *> OrgRHSExprs,
5312 ArrayRef<const Expr *> OrgReductionOps,
5313 ReductionOptionsTy Options) {
5314 if (!CGF.HaveInsertPoint())
5315 return;
5316
5317 bool WithNowait = Options.WithNowait;
5318 bool SimpleReduction = Options.SimpleReduction;
5319
5320 // Next code should be emitted for reduction:
5321 //
5322 // static kmp_critical_name lock = { 0 };
5323 //
5324 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5325 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5326 // ...
5327 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5328 // *(Type<n>-1*)rhs[<n>-1]);
5329 // }
5330 //
5331 // ...
5332 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5333 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5334 // RedList, reduce_func, &<lock>)) {
5335 // case 1:
5336 // ...
5337 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5338 // ...
5339 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5340 // break;
5341 // case 2:
5342 // ...
5343 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5344 // ...
5345 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5346 // break;
5347 // default:;
5348 // }
5349 //
5350 // if SimpleReduction is true, only the next code is generated:
5351 // ...
5352 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5353 // ...
5354
5355 ASTContext &C = CGM.getContext();
5356
5357 if (SimpleReduction) {
5359 const auto *IPriv = OrgPrivates.begin();
5360 const auto *ILHS = OrgLHSExprs.begin();
5361 const auto *IRHS = OrgRHSExprs.begin();
5362 for (const Expr *E : OrgReductionOps) {
5363 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5364 cast<DeclRefExpr>(*IRHS));
5365 ++IPriv;
5366 ++ILHS;
5367 ++IRHS;
5368 }
5369 return;
5370 }
5371
5372 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5373 // Only keep entries where the corresponding variable is not private.
5374 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5375 FilteredRHSExprs, FilteredReductionOps;
5376 for (unsigned I : llvm::seq<unsigned>(
5377 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5378 if (!Options.IsPrivateVarReduction[I]) {
5379 FilteredPrivates.emplace_back(OrgPrivates[I]);
5380 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5381 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5382 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5383 }
5384 }
5385 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5386 // processing.
5387 ArrayRef<const Expr *> Privates = FilteredPrivates;
5388 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5389 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5390 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5391
5392 // 1. Build a list of reduction variables.
5393 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5394 auto Size = RHSExprs.size();
5395 for (const Expr *E : Privates) {
5396 if (E->getType()->isVariablyModifiedType())
5397 // Reserve place for array size.
5398 ++Size;
5399 }
5400 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5401 QualType ReductionArrayTy = C.getConstantArrayType(
5402 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5403 /*IndexTypeQuals=*/0);
5404 RawAddress ReductionList =
5405 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5406 const auto *IPriv = Privates.begin();
5407 unsigned Idx = 0;
5408 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5409 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5410 CGF.Builder.CreateStore(
5412 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5413 Elem);
5414 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5415 // Store array size.
5416 ++Idx;
5417 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5418 llvm::Value *Size = CGF.Builder.CreateIntCast(
5419 CGF.getVLASize(
5420 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5421 .NumElts,
5422 CGF.SizeTy, /*isSigned=*/false);
5423 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5424 Elem);
5425 }
5426 }
5427
5428 // 2. Emit reduce_func().
5429 llvm::Function *ReductionFn = emitReductionFunction(
5430 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5431 Privates, LHSExprs, RHSExprs, ReductionOps);
5432
5433 // 3. Create static kmp_critical_name lock = { 0 };
5434 std::string Name = getName({"reduction"});
5435 llvm::Value *Lock = getCriticalRegionLock(Name);
5436
5437 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5438 // RedList, reduce_func, &<lock>);
5439 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5440 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5441 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5442 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5443 ReductionList.getPointer(), CGF.VoidPtrTy);
5444 llvm::Value *Args[] = {
5445 IdentTLoc, // ident_t *<loc>
5446 ThreadId, // i32 <gtid>
5447 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5448 ReductionArrayTySize, // size_type sizeof(RedList)
5449 RL, // void *RedList
5450 ReductionFn, // void (*) (void *, void *) <reduce_func>
5451 Lock // kmp_critical_name *&<lock>
5452 };
5453 llvm::Value *Res = CGF.EmitRuntimeCall(
5454 OMPBuilder.getOrCreateRuntimeFunction(
5455 CGM.getModule(),
5456 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5457 Args);
5458
5459 // 5. Build switch(res)
5460 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5461 llvm::SwitchInst *SwInst =
5462 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5463
5464 // 6. Build case 1:
5465 // ...
5466 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5467 // ...
5468 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5469 // break;
5470 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5471 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5472 CGF.EmitBlock(Case1BB);
5473
5474 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5475 llvm::Value *EndArgs[] = {
5476 IdentTLoc, // ident_t *<loc>
5477 ThreadId, // i32 <gtid>
5478 Lock // kmp_critical_name *&<lock>
5479 };
5480 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5481 CodeGenFunction &CGF, PrePostActionTy &Action) {
5483 const auto *IPriv = Privates.begin();
5484 const auto *ILHS = LHSExprs.begin();
5485 const auto *IRHS = RHSExprs.begin();
5486 for (const Expr *E : ReductionOps) {
5487 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5488 cast<DeclRefExpr>(*IRHS));
5489 ++IPriv;
5490 ++ILHS;
5491 ++IRHS;
5492 }
5493 };
5495 CommonActionTy Action(
5496 nullptr, {},
5497 OMPBuilder.getOrCreateRuntimeFunction(
5498 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5499 : OMPRTL___kmpc_end_reduce),
5500 EndArgs);
5501 RCG.setAction(Action);
5502 RCG(CGF);
5503
5504 CGF.EmitBranch(DefaultBB);
5505
5506 // 7. Build case 2:
5507 // ...
5508 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5509 // ...
5510 // break;
5511 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5512 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5513 CGF.EmitBlock(Case2BB);
5514
5515 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5516 CodeGenFunction &CGF, PrePostActionTy &Action) {
5517 const auto *ILHS = LHSExprs.begin();
5518 const auto *IRHS = RHSExprs.begin();
5519 const auto *IPriv = Privates.begin();
5520 for (const Expr *E : ReductionOps) {
5521 const Expr *XExpr = nullptr;
5522 const Expr *EExpr = nullptr;
5523 const Expr *UpExpr = nullptr;
5524 BinaryOperatorKind BO = BO_Comma;
5525 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5526 if (BO->getOpcode() == BO_Assign) {
5527 XExpr = BO->getLHS();
5528 UpExpr = BO->getRHS();
5529 }
5530 }
5531 // Try to emit update expression as a simple atomic.
5532 const Expr *RHSExpr = UpExpr;
5533 if (RHSExpr) {
5534 // Analyze RHS part of the whole expression.
5535 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5536 RHSExpr->IgnoreParenImpCasts())) {
5537 // If this is a conditional operator, analyze its condition for
5538 // min/max reduction operator.
5539 RHSExpr = ACO->getCond();
5540 }
5541 if (const auto *BORHS =
5542 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5543 EExpr = BORHS->getRHS();
5544 BO = BORHS->getOpcode();
5545 }
5546 }
5547 if (XExpr) {
5548 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5549 auto &&AtomicRedGen = [BO, VD,
5550 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5551 const Expr *EExpr, const Expr *UpExpr) {
5552 LValue X = CGF.EmitLValue(XExpr);
5553 RValue E;
5554 if (EExpr)
5555 E = CGF.EmitAnyExpr(EExpr);
5556 CGF.EmitOMPAtomicSimpleUpdateExpr(
5557 X, E, BO, /*IsXLHSInRHSPart=*/true,
5558 llvm::AtomicOrdering::Monotonic, Loc,
5559 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5560 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5561 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5562 CGF.emitOMPSimpleStore(
5563 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5564 VD->getType().getNonReferenceType(), Loc);
5565 PrivateScope.addPrivate(VD, LHSTemp);
5566 (void)PrivateScope.Privatize();
5567 return CGF.EmitAnyExpr(UpExpr);
5568 });
5569 };
5570 if ((*IPriv)->getType()->isArrayType()) {
5571 // Emit atomic reduction for array section.
5572 const auto *RHSVar =
5573 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5574 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5575 AtomicRedGen, XExpr, EExpr, UpExpr);
5576 } else {
5577 // Emit atomic reduction for array subscript or single variable.
5578 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5579 }
5580 } else {
5581 // Emit as a critical region.
5582 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5583 const Expr *, const Expr *) {
5584 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5585 std::string Name = RT.getName({"atomic_reduction"});
5587 CGF, Name,
5588 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5589 Action.Enter(CGF);
5590 emitReductionCombiner(CGF, E);
5591 },
5592 Loc);
5593 };
5594 if ((*IPriv)->getType()->isArrayType()) {
5595 const auto *LHSVar =
5596 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5597 const auto *RHSVar =
5598 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5599 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5600 CritRedGen);
5601 } else {
5602 CritRedGen(CGF, nullptr, nullptr, nullptr);
5603 }
5604 }
5605 ++ILHS;
5606 ++IRHS;
5607 ++IPriv;
5608 }
5609 };
5610 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5611 if (!WithNowait) {
5612 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5613 llvm::Value *EndArgs[] = {
5614 IdentTLoc, // ident_t *<loc>
5615 ThreadId, // i32 <gtid>
5616 Lock // kmp_critical_name *&<lock>
5617 };
5618 CommonActionTy Action(nullptr, {},
5619 OMPBuilder.getOrCreateRuntimeFunction(
5620 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5621 EndArgs);
5622 AtomicRCG.setAction(Action);
5623 AtomicRCG(CGF);
5624 } else {
5625 AtomicRCG(CGF);
5626 }
5627
5628 CGF.EmitBranch(DefaultBB);
5629 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5630 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5631 "PrivateVarReduction: Privates size mismatch");
5632 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5633 "PrivateVarReduction: ReductionOps size mismatch");
5634 for (unsigned I : llvm::seq<unsigned>(
5635 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5636 if (Options.IsPrivateVarReduction[I])
5637 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5638 OrgRHSExprs[I], OrgReductionOps[I]);
5639 }
5640}
5641
5642/// Generates unique name for artificial threadprivate variables.
5643/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5644static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5645 const Expr *Ref) {
5646 SmallString<256> Buffer;
5647 llvm::raw_svector_ostream Out(Buffer);
5648 const clang::DeclRefExpr *DE;
5649 const VarDecl *D = ::getBaseDecl(Ref, DE);
5650 if (!D)
5651 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5652 D = D->getCanonicalDecl();
5653 std::string Name = CGM.getOpenMPRuntime().getName(
5654 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5655 Out << Prefix << Name << "_"
5657 return std::string(Out.str());
5658}
5659
5660/// Emits reduction initializer function:
5661/// \code
5662/// void @.red_init(void* %arg, void* %orig) {
5663/// %0 = bitcast void* %arg to <type>*
5664/// store <type> <init>, <type>* %0
5665/// ret void
5666/// }
5667/// \endcode
5668static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5669 SourceLocation Loc,
5670 ReductionCodeGen &RCG, unsigned N) {
5671 ASTContext &C = CGM.getContext();
5672 QualType VoidPtrTy = C.VoidPtrTy;
5673 VoidPtrTy.addRestrict();
5674 FunctionArgList Args;
5675 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5677 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5679 Args.emplace_back(&Param);
5680 Args.emplace_back(&ParamOrig);
5681 const auto &FnInfo =
5682 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5683 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5684 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5685 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5686 Name, &CGM.getModule());
5687 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5688 Fn->setDoesNotRecurse();
5689 CodeGenFunction CGF(CGM);
5690 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5691 QualType PrivateType = RCG.getPrivateType(N);
5692 Address PrivateAddr = CGF.EmitLoadOfPointer(
5693 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5694 C.getPointerType(PrivateType)->castAs<PointerType>());
5695 llvm::Value *Size = nullptr;
5696 // If the size of the reduction item is non-constant, load it from global
5697 // threadprivate variable.
5698 if (RCG.getSizes(N).second) {
5700 CGF, CGM.getContext().getSizeType(),
5701 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5702 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5703 CGM.getContext().getSizeType(), Loc);
5704 }
5705 RCG.emitAggregateType(CGF, N, Size);
5706 Address OrigAddr = Address::invalid();
5707 // If initializer uses initializer from declare reduction construct, emit a
5708 // pointer to the address of the original reduction item (reuired by reduction
5709 // initializer)
5710 if (RCG.usesReductionInitializer(N)) {
5711 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5712 OrigAddr = CGF.EmitLoadOfPointer(
5713 SharedAddr,
5714 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5715 }
5716 // Emit the initializer:
5717 // %0 = bitcast void* %arg to <type>*
5718 // store <type> <init>, <type>* %0
5719 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5720 [](CodeGenFunction &) { return false; });
5721 CGF.FinishFunction();
5722 return Fn;
5723}
5724
5725/// Emits reduction combiner function:
5726/// \code
5727/// void @.red_comb(void* %arg0, void* %arg1) {
5728/// %lhs = bitcast void* %arg0 to <type>*
5729/// %rhs = bitcast void* %arg1 to <type>*
5730/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5731/// store <type> %2, <type>* %lhs
5732/// ret void
5733/// }
5734/// \endcode
5735static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5736 SourceLocation Loc,
5737 ReductionCodeGen &RCG, unsigned N,
5738 const Expr *ReductionOp,
5739 const Expr *LHS, const Expr *RHS,
5740 const Expr *PrivateRef) {
5741 ASTContext &C = CGM.getContext();
5742 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5743 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5744 FunctionArgList Args;
5745 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5746 C.VoidPtrTy, ImplicitParamKind::Other);
5747 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5749 Args.emplace_back(&ParamInOut);
5750 Args.emplace_back(&ParamIn);
5751 const auto &FnInfo =
5752 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5753 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5754 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5755 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5756 Name, &CGM.getModule());
5757 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5758 Fn->setDoesNotRecurse();
5759 CodeGenFunction CGF(CGM);
5760 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5761 llvm::Value *Size = nullptr;
5762 // If the size of the reduction item is non-constant, load it from global
5763 // threadprivate variable.
5764 if (RCG.getSizes(N).second) {
5766 CGF, CGM.getContext().getSizeType(),
5767 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5768 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5769 CGM.getContext().getSizeType(), Loc);
5770 }
5771 RCG.emitAggregateType(CGF, N, Size);
5772 // Remap lhs and rhs variables to the addresses of the function arguments.
5773 // %lhs = bitcast void* %arg0 to <type>*
5774 // %rhs = bitcast void* %arg1 to <type>*
5775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5776 PrivateScope.addPrivate(
5777 LHSVD,
5778 // Pull out the pointer to the variable.
5780 CGF.GetAddrOfLocalVar(&ParamInOut)
5781 .withElementType(CGF.Builder.getPtrTy(0)),
5782 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5783 PrivateScope.addPrivate(
5784 RHSVD,
5785 // Pull out the pointer to the variable.
5787 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5788 CGF.Builder.getPtrTy(0)),
5789 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5790 PrivateScope.Privatize();
5791 // Emit the combiner body:
5792 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5793 // store <type> %2, <type>* %lhs
5795 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5796 cast<DeclRefExpr>(RHS));
5797 CGF.FinishFunction();
5798 return Fn;
5799}
5800
5801/// Emits reduction finalizer function:
5802/// \code
5803/// void @.red_fini(void* %arg) {
5804/// %0 = bitcast void* %arg to <type>*
5805/// <destroy>(<type>* %0)
5806/// ret void
5807/// }
5808/// \endcode
5809static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5810 SourceLocation Loc,
5811 ReductionCodeGen &RCG, unsigned N) {
5812 if (!RCG.needCleanups(N))
5813 return nullptr;
5814 ASTContext &C = CGM.getContext();
5815 FunctionArgList Args;
5816 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5818 Args.emplace_back(&Param);
5819 const auto &FnInfo =
5820 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5821 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5822 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5823 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5824 Name, &CGM.getModule());
5825 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5826 Fn->setDoesNotRecurse();
5827 CodeGenFunction CGF(CGM);
5828 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5829 Address PrivateAddr = CGF.EmitLoadOfPointer(
5830 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5831 llvm::Value *Size = nullptr;
5832 // If the size of the reduction item is non-constant, load it from global
5833 // threadprivate variable.
5834 if (RCG.getSizes(N).second) {
5836 CGF, CGM.getContext().getSizeType(),
5837 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5838 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5839 CGM.getContext().getSizeType(), Loc);
5840 }
5841 RCG.emitAggregateType(CGF, N, Size);
5842 // Emit the finalizer body:
5843 // <destroy>(<type>* %0)
5844 RCG.emitCleanups(CGF, N, PrivateAddr);
5845 CGF.FinishFunction(Loc);
5846 return Fn;
5847}
5848
5851 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5852 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5853 return nullptr;
5854
5855 // Build typedef struct:
5856 // kmp_taskred_input {
5857 // void *reduce_shar; // shared reduction item
5858 // void *reduce_orig; // original reduction item used for initialization
5859 // size_t reduce_size; // size of data item
5860 // void *reduce_init; // data initialization routine
5861 // void *reduce_fini; // data finalization routine
5862 // void *reduce_comb; // data combiner routine
5863 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5864 // } kmp_taskred_input_t;
5865 ASTContext &C = CGM.getContext();
5866 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5867 RD->startDefinition();
5868 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5869 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5870 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5871 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5872 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5873 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5874 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5875 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5876 RD->completeDefinition();
5877 CanQualType RDType = C.getCanonicalTagType(RD);
5878 unsigned Size = Data.ReductionVars.size();
5879 llvm::APInt ArraySize(/*numBits=*/64, Size);
5880 QualType ArrayRDType =
5881 C.getConstantArrayType(RDType, ArraySize, nullptr,
5882 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5883 // kmp_task_red_input_t .rd_input.[Size];
5884 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5885 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5886 Data.ReductionCopies, Data.ReductionOps);
5887 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5888 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5889 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5890 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5891 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5892 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5893 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5894 ".rd_input.gep.");
5895 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5896 // ElemLVal.reduce_shar = &Shareds[Cnt];
5897 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5898 RCG.emitSharedOrigLValue(CGF, Cnt);
5899 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5900 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5901 // ElemLVal.reduce_orig = &Origs[Cnt];
5902 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5903 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5904 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5905 RCG.emitAggregateType(CGF, Cnt);
5906 llvm::Value *SizeValInChars;
5907 llvm::Value *SizeVal;
5908 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5909 // We use delayed creation/initialization for VLAs and array sections. It is
5910 // required because runtime does not provide the way to pass the sizes of
5911 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5912 // threadprivate global variables are used to store these values and use
5913 // them in the functions.
5914 bool DelayedCreation = !!SizeVal;
5915 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5916 /*isSigned=*/false);
5917 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5918 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5919 // ElemLVal.reduce_init = init;
5920 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5921 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5922 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5923 // ElemLVal.reduce_fini = fini;
5924 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5925 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5926 llvm::Value *FiniAddr =
5927 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5928 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5929 // ElemLVal.reduce_comb = comb;
5930 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5931 llvm::Value *CombAddr = emitReduceCombFunction(
5932 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5933 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5934 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5935 // ElemLVal.flags = 0;
5936 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5937 if (DelayedCreation) {
5939 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5940 FlagsLVal);
5941 } else
5942 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5943 }
5944 if (Data.IsReductionWithTaskMod) {
5945 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5946 // is_ws, int num, void *data);
5947 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5948 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5949 CGM.IntTy, /*isSigned=*/true);
5950 llvm::Value *Args[] = {
5951 IdentTLoc, GTid,
5952 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5953 /*isSigned=*/true),
5954 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5956 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5957 return CGF.EmitRuntimeCall(
5958 OMPBuilder.getOrCreateRuntimeFunction(
5959 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5960 Args);
5961 }
5962 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5963 llvm::Value *Args[] = {
5964 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5965 /*isSigned=*/true),
5966 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5968 CGM.VoidPtrTy)};
5969 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5970 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5971 Args);
5972}
5973
5975 SourceLocation Loc,
5976 bool IsWorksharingReduction) {
5977 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5978 // is_ws, int num, void *data);
5979 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5980 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5981 CGM.IntTy, /*isSigned=*/true);
5982 llvm::Value *Args[] = {IdentTLoc, GTid,
5983 llvm::ConstantInt::get(CGM.IntTy,
5984 IsWorksharingReduction ? 1 : 0,
5985 /*isSigned=*/true)};
5986 (void)CGF.EmitRuntimeCall(
5987 OMPBuilder.getOrCreateRuntimeFunction(
5988 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5989 Args);
5990}
5991
5993 SourceLocation Loc,
5994 ReductionCodeGen &RCG,
5995 unsigned N) {
5996 auto Sizes = RCG.getSizes(N);
5997 // Emit threadprivate global variable if the type is non-constant
5998 // (Sizes.second = nullptr).
5999 if (Sizes.second) {
6000 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6001 /*isSigned=*/false);
6003 CGF, CGM.getContext().getSizeType(),
6004 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6005 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6006 }
6007}
6008
6010 SourceLocation Loc,
6011 llvm::Value *ReductionsPtr,
6012 LValue SharedLVal) {
6013 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6014 // *d);
6015 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6016 CGM.IntTy,
6017 /*isSigned=*/true),
6018 ReductionsPtr,
6020 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6021 return Address(
6022 CGF.EmitRuntimeCall(
6023 OMPBuilder.getOrCreateRuntimeFunction(
6024 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6025 Args),
6026 CGF.Int8Ty, SharedLVal.getAlignment());
6027}
6028
6030 const OMPTaskDataTy &Data) {
6031 if (!CGF.HaveInsertPoint())
6032 return;
6033
6034 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6035 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6036 OMPBuilder.createTaskwait(CGF.Builder);
6037 } else {
6038 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6039 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6040 auto &M = CGM.getModule();
6041 Address DependenciesArray = Address::invalid();
6042 llvm::Value *NumOfElements;
6043 std::tie(NumOfElements, DependenciesArray) =
6044 emitDependClause(CGF, Data.Dependences, Loc);
6045 if (!Data.Dependences.empty()) {
6046 llvm::Value *DepWaitTaskArgs[7];
6047 DepWaitTaskArgs[0] = UpLoc;
6048 DepWaitTaskArgs[1] = ThreadID;
6049 DepWaitTaskArgs[2] = NumOfElements;
6050 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6051 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6052 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6053 DepWaitTaskArgs[6] =
6054 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
6055
6056 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6057
6058 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6059 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6060 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6061 // kmp_int32 has_no_wait); if dependence info is specified.
6062 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6063 M, OMPRTL___kmpc_omp_taskwait_deps_51),
6064 DepWaitTaskArgs);
6065
6066 } else {
6067
6068 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6069 // global_tid);
6070 llvm::Value *Args[] = {UpLoc, ThreadID};
6071 // Ignore return result until untied tasks are supported.
6072 CGF.EmitRuntimeCall(
6073 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6074 Args);
6075 }
6076 }
6077
6078 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6079 Region->emitUntiedSwitch(CGF);
6080}
6081
6083 OpenMPDirectiveKind InnerKind,
6084 const RegionCodeGenTy &CodeGen,
6085 bool HasCancel) {
6086 if (!CGF.HaveInsertPoint())
6087 return;
6088 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6089 InnerKind != OMPD_critical &&
6090 InnerKind != OMPD_master &&
6091 InnerKind != OMPD_masked);
6092 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6093}
6094
6095namespace {
6096enum RTCancelKind {
6097 CancelNoreq = 0,
6098 CancelParallel = 1,
6099 CancelLoop = 2,
6100 CancelSections = 3,
6101 CancelTaskgroup = 4
6102};
6103} // anonymous namespace
6104
6105static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6106 RTCancelKind CancelKind = CancelNoreq;
6107 if (CancelRegion == OMPD_parallel)
6108 CancelKind = CancelParallel;
6109 else if (CancelRegion == OMPD_for)
6110 CancelKind = CancelLoop;
6111 else if (CancelRegion == OMPD_sections)
6112 CancelKind = CancelSections;
6113 else {
6114 assert(CancelRegion == OMPD_taskgroup);
6115 CancelKind = CancelTaskgroup;
6116 }
6117 return CancelKind;
6118}
6119
6122 OpenMPDirectiveKind CancelRegion) {
6123 if (!CGF.HaveInsertPoint())
6124 return;
6125 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6126 // global_tid, kmp_int32 cncl_kind);
6127 if (auto *OMPRegionInfo =
6128 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6129 // For 'cancellation point taskgroup', the task region info may not have a
6130 // cancel. This may instead happen in another adjacent task.
6131 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6132 llvm::Value *Args[] = {
6133 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6134 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6135 // Ignore return result until untied tasks are supported.
6136 llvm::Value *Result = CGF.EmitRuntimeCall(
6137 OMPBuilder.getOrCreateRuntimeFunction(
6138 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6139 Args);
6140 // if (__kmpc_cancellationpoint()) {
6141 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6142 // exit from construct;
6143 // }
6144 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6145 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6146 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6147 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6148 CGF.EmitBlock(ExitBB);
6149 if (CancelRegion == OMPD_parallel)
6150 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6151 // exit from construct;
6152 CodeGenFunction::JumpDest CancelDest =
6153 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6154 CGF.EmitBranchThroughCleanup(CancelDest);
6155 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6156 }
6157 }
6158}
6159
6161 const Expr *IfCond,
6162 OpenMPDirectiveKind CancelRegion) {
6163 if (!CGF.HaveInsertPoint())
6164 return;
6165 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6166 // kmp_int32 cncl_kind);
6167 auto &M = CGM.getModule();
6168 if (auto *OMPRegionInfo =
6169 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6170 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6171 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6172 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6173 llvm::Value *Args[] = {
6174 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6175 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6176 // Ignore return result until untied tasks are supported.
6177 llvm::Value *Result = CGF.EmitRuntimeCall(
6178 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6179 // if (__kmpc_cancel()) {
6180 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6181 // exit from construct;
6182 // }
6183 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6184 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6185 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6186 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6187 CGF.EmitBlock(ExitBB);
6188 if (CancelRegion == OMPD_parallel)
6189 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6190 // exit from construct;
6191 CodeGenFunction::JumpDest CancelDest =
6192 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6193 CGF.EmitBranchThroughCleanup(CancelDest);
6194 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6195 };
6196 if (IfCond) {
6197 emitIfClause(CGF, IfCond, ThenGen,
6198 [](CodeGenFunction &, PrePostActionTy &) {});
6199 } else {
6200 RegionCodeGenTy ThenRCG(ThenGen);
6201 ThenRCG(CGF);
6202 }
6203 }
6204}
6205
6206namespace {
6207/// Cleanup action for uses_allocators support.
6208class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6210
6211public:
6212 OMPUsesAllocatorsActionTy(
6213 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6214 : Allocators(Allocators) {}
6215 void Enter(CodeGenFunction &CGF) override {
6216 if (!CGF.HaveInsertPoint())
6217 return;
6218 for (const auto &AllocatorData : Allocators) {
6220 CGF, AllocatorData.first, AllocatorData.second);
6221 }
6222 }
6223 void Exit(CodeGenFunction &CGF) override {
6224 if (!CGF.HaveInsertPoint())
6225 return;
6226 for (const auto &AllocatorData : Allocators) {
6228 AllocatorData.first);
6229 }
6230 }
6231};
6232} // namespace
6233
6235 const OMPExecutableDirective &D, StringRef ParentName,
6236 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6237 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6238 assert(!ParentName.empty() && "Invalid target entry parent name!");
6241 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6242 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6243 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6244 if (!D.AllocatorTraits)
6245 continue;
6246 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6247 }
6248 }
6249 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6250 CodeGen.setAction(UsesAllocatorAction);
6251 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6252 IsOffloadEntry, CodeGen);
6253}
6254
6256 const Expr *Allocator,
6257 const Expr *AllocatorTraits) {
6258 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6259 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6260 // Use default memspace handle.
6261 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6262 llvm::Value *NumTraits = llvm::ConstantInt::get(
6264 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6265 ->getSize()
6266 .getLimitedValue());
6267 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6269 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6270 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6271 AllocatorTraitsLVal.getBaseInfo(),
6272 AllocatorTraitsLVal.getTBAAInfo());
6273 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6274
6275 llvm::Value *AllocatorVal =
6276 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6277 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6278 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6279 // Store to allocator.
6281 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6282 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6283 AllocatorVal =
6284 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6285 Allocator->getType(), Allocator->getExprLoc());
6286 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6287}
6288
6290 const Expr *Allocator) {
6291 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6292 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6293 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6294 llvm::Value *AllocatorVal =
6295 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6296 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6297 CGF.getContext().VoidPtrTy,
6298 Allocator->getExprLoc());
6299 (void)CGF.EmitRuntimeCall(
6300 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6301 OMPRTL___kmpc_destroy_allocator),
6302 {ThreadId, AllocatorVal});
6303}
6304
6307 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6308 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6309 "invalid default attrs structure");
6310 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6311 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6312
6313 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6314 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6315 /*UpperBoundOnly=*/true);
6316
6317 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6318 for (auto *A : C->getAttrs()) {
6319 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6320 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6321 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6322 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6323 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6324 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6325 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6326 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6327 &AttrMaxThreadsVal);
6328 else
6329 continue;
6330
6331 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6332 if (AttrMaxThreadsVal > 0)
6333 MaxThreadsVal = MaxThreadsVal > 0
6334 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6335 : AttrMaxThreadsVal;
6336 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6337 if (AttrMaxBlocksVal > 0)
6338 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6339 : AttrMaxBlocksVal;
6340 }
6341 }
6342}
6343
6345 const OMPExecutableDirective &D, StringRef ParentName,
6346 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6347 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6348
6349 llvm::TargetRegionEntryInfo EntryInfo =
6350 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6351
6352 CodeGenFunction CGF(CGM, true);
6353 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6354 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6355 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6356
6357 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6358 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6359 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6360 };
6361
6362 cantFail(OMPBuilder.emitTargetRegionFunction(
6363 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6364 OutlinedFnID));
6365
6366 if (!OutlinedFn)
6367 return;
6368
6369 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6370
6371 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6372 for (auto *A : C->getAttrs()) {
6373 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6374 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6375 }
6376 }
6377 registerVTable(D);
6378}
6379
6380/// Checks if the expression is constant or does not have non-trivial function
6381/// calls.
6382static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6383 // We can skip constant expressions.
6384 // We can skip expressions with trivial calls or simple expressions.
6386 !E->hasNonTrivialCall(Ctx)) &&
6387 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6388}
6389
6391 const Stmt *Body) {
6392 const Stmt *Child = Body->IgnoreContainers();
6393 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6394 Child = nullptr;
6395 for (const Stmt *S : C->body()) {
6396 if (const auto *E = dyn_cast<Expr>(S)) {
6397 if (isTrivial(Ctx, E))
6398 continue;
6399 }
6400 // Some of the statements can be ignored.
6403 continue;
6404 // Analyze declarations.
6405 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6406 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6407 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6408 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6409 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6410 isa<UsingDirectiveDecl>(D) ||
6411 isa<OMPDeclareReductionDecl>(D) ||
6412 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6413 return true;
6414 const auto *VD = dyn_cast<VarDecl>(D);
6415 if (!VD)
6416 return false;
6417 return VD->hasGlobalStorage() || !VD->isUsed();
6418 }))
6419 continue;
6420 }
6421 // Found multiple children - cannot get the one child only.
6422 if (Child)
6423 return nullptr;
6424 Child = S;
6425 }
6426 if (Child)
6427 Child = Child->IgnoreContainers();
6428 }
6429 return Child;
6430}
6431
6433 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6434 int32_t &MaxTeamsVal) {
6435
6436 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6437 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6438 "Expected target-based executable directive.");
6439 switch (DirectiveKind) {
6440 case OMPD_target: {
6441 const auto *CS = D.getInnermostCapturedStmt();
6442 const auto *Body =
6443 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6444 const Stmt *ChildStmt =
6446 if (const auto *NestedDir =
6447 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6448 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6449 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6450 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6451 ->getNumTeams()
6452 .front();
6453 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6454 if (auto Constant =
6455 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6456 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6457 return NumTeams;
6458 }
6459 MinTeamsVal = MaxTeamsVal = 0;
6460 return nullptr;
6461 }
6462 MinTeamsVal = MaxTeamsVal = 1;
6463 return nullptr;
6464 }
6465 // A value of -1 is used to check if we need to emit no teams region
6466 MinTeamsVal = MaxTeamsVal = -1;
6467 return nullptr;
6468 }
6469 case OMPD_target_teams_loop:
6470 case OMPD_target_teams:
6471 case OMPD_target_teams_distribute:
6472 case OMPD_target_teams_distribute_simd:
6473 case OMPD_target_teams_distribute_parallel_for:
6474 case OMPD_target_teams_distribute_parallel_for_simd: {
6475 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6476 const Expr *NumTeams =
6477 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6478 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6479 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6480 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6481 return NumTeams;
6482 }
6483 MinTeamsVal = MaxTeamsVal = 0;
6484 return nullptr;
6485 }
6486 case OMPD_target_parallel:
6487 case OMPD_target_parallel_for:
6488 case OMPD_target_parallel_for_simd:
6489 case OMPD_target_parallel_loop:
6490 case OMPD_target_simd:
6491 MinTeamsVal = MaxTeamsVal = 1;
6492 return nullptr;
6493 case OMPD_parallel:
6494 case OMPD_for:
6495 case OMPD_parallel_for:
6496 case OMPD_parallel_loop:
6497 case OMPD_parallel_master:
6498 case OMPD_parallel_sections:
6499 case OMPD_for_simd:
6500 case OMPD_parallel_for_simd:
6501 case OMPD_cancel:
6502 case OMPD_cancellation_point:
6503 case OMPD_ordered:
6504 case OMPD_threadprivate:
6505 case OMPD_allocate:
6506 case OMPD_task:
6507 case OMPD_simd:
6508 case OMPD_tile:
6509 case OMPD_unroll:
6510 case OMPD_sections:
6511 case OMPD_section:
6512 case OMPD_single:
6513 case OMPD_master:
6514 case OMPD_critical:
6515 case OMPD_taskyield:
6516 case OMPD_barrier:
6517 case OMPD_taskwait:
6518 case OMPD_taskgroup:
6519 case OMPD_atomic:
6520 case OMPD_flush:
6521 case OMPD_depobj:
6522 case OMPD_scan:
6523 case OMPD_teams:
6524 case OMPD_target_data:
6525 case OMPD_target_exit_data:
6526 case OMPD_target_enter_data:
6527 case OMPD_distribute:
6528 case OMPD_distribute_simd:
6529 case OMPD_distribute_parallel_for:
6530 case OMPD_distribute_parallel_for_simd:
6531 case OMPD_teams_distribute:
6532 case OMPD_teams_distribute_simd:
6533 case OMPD_teams_distribute_parallel_for:
6534 case OMPD_teams_distribute_parallel_for_simd:
6535 case OMPD_target_update:
6536 case OMPD_declare_simd:
6537 case OMPD_declare_variant:
6538 case OMPD_begin_declare_variant:
6539 case OMPD_end_declare_variant:
6540 case OMPD_declare_target:
6541 case OMPD_end_declare_target:
6542 case OMPD_declare_reduction:
6543 case OMPD_declare_mapper:
6544 case OMPD_taskloop:
6545 case OMPD_taskloop_simd:
6546 case OMPD_master_taskloop:
6547 case OMPD_master_taskloop_simd:
6548 case OMPD_parallel_master_taskloop:
6549 case OMPD_parallel_master_taskloop_simd:
6550 case OMPD_requires:
6551 case OMPD_metadirective:
6552 case OMPD_unknown:
6553 break;
6554 default:
6555 break;
6556 }
6557 llvm_unreachable("Unexpected directive kind.");
6558}
6559
6561 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6562 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6563 "Clauses associated with the teams directive expected to be emitted "
6564 "only for the host!");
6565 CGBuilderTy &Bld = CGF.Builder;
6566 int32_t MinNT = -1, MaxNT = -1;
6567 const Expr *NumTeams =
6568 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6569 if (NumTeams != nullptr) {
6570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571
6572 switch (DirectiveKind) {
6573 case OMPD_target: {
6574 const auto *CS = D.getInnermostCapturedStmt();
6575 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6576 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6577 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6578 /*IgnoreResultAssign*/ true);
6579 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6580 /*isSigned=*/true);
6581 }
6582 case OMPD_target_teams:
6583 case OMPD_target_teams_distribute:
6584 case OMPD_target_teams_distribute_simd:
6585 case OMPD_target_teams_distribute_parallel_for:
6586 case OMPD_target_teams_distribute_parallel_for_simd: {
6587 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6588 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6589 /*IgnoreResultAssign*/ true);
6590 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6591 /*isSigned=*/true);
6592 }
6593 default:
6594 break;
6595 }
6596 }
6597
6598 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6599 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6600}
6601
6602/// Check for a num threads constant value (stored in \p DefaultVal), or
6603/// expression (stored in \p E). If the value is conditional (via an if-clause),
6604/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6605/// nullptr, no expression evaluation is perfomed.
6606static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6607 const Expr **E, int32_t &UpperBound,
6608 bool UpperBoundOnly, llvm::Value **CondVal) {
6610 CGF.getContext(), CS->getCapturedStmt());
6611 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6612 if (!Dir)
6613 return;
6614
6615 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6616 // Handle if clause. If if clause present, the number of threads is
6617 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6618 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6619 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6620 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6621 const OMPIfClause *IfClause = nullptr;
6622 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6623 if (C->getNameModifier() == OMPD_unknown ||
6624 C->getNameModifier() == OMPD_parallel) {
6625 IfClause = C;
6626 break;
6627 }
6628 }
6629 if (IfClause) {
6630 const Expr *CondExpr = IfClause->getCondition();
6631 bool Result;
6632 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6633 if (!Result) {
6634 UpperBound = 1;
6635 return;
6636 }
6637 } else {
6639 if (const auto *PreInit =
6640 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6641 for (const auto *I : PreInit->decls()) {
6642 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6643 CGF.EmitVarDecl(cast<VarDecl>(*I));
6644 } else {
6647 CGF.EmitAutoVarCleanups(Emission);
6648 }
6649 }
6650 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6651 }
6652 }
6653 }
6654 }
6655 // Check the value of num_threads clause iff if clause was not specified
6656 // or is not evaluated to false.
6657 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6658 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6659 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6660 const auto *NumThreadsClause =
6661 Dir->getSingleClause<OMPNumThreadsClause>();
6662 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6663 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6664 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6665 UpperBound =
6666 UpperBound
6667 ? Constant->getZExtValue()
6668 : std::min(UpperBound,
6669 static_cast<int32_t>(Constant->getZExtValue()));
6670 // If we haven't found a upper bound, remember we saw a thread limiting
6671 // clause.
6672 if (UpperBound == -1)
6673 UpperBound = 0;
6674 if (!E)
6675 return;
6676 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6677 if (const auto *PreInit =
6678 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6679 for (const auto *I : PreInit->decls()) {
6680 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6681 CGF.EmitVarDecl(cast<VarDecl>(*I));
6682 } else {
6685 CGF.EmitAutoVarCleanups(Emission);
6686 }
6687 }
6688 }
6689 *E = NTExpr;
6690 }
6691 return;
6692 }
6693 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6694 UpperBound = 1;
6695}
6696
6698 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6699 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6700 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6701 "Clauses associated with the teams directive expected to be emitted "
6702 "only for the host!");
6703 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6704 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6705 "Expected target-based executable directive.");
6706
6707 const Expr *NT = nullptr;
6708 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6709
6710 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6711 if (E->isIntegerConstantExpr(CGF.getContext())) {
6712 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6713 UpperBound = UpperBound ? Constant->getZExtValue()
6714 : std::min(UpperBound,
6715 int32_t(Constant->getZExtValue()));
6716 }
6717 // If we haven't found a upper bound, remember we saw a thread limiting
6718 // clause.
6719 if (UpperBound == -1)
6720 UpperBound = 0;
6721 if (EPtr)
6722 *EPtr = E;
6723 };
6724
6725 auto ReturnSequential = [&]() {
6726 UpperBound = 1;
6727 return NT;
6728 };
6729
6730 switch (DirectiveKind) {
6731 case OMPD_target: {
6732 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6733 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6735 CGF.getContext(), CS->getCapturedStmt());
6736 // TODO: The standard is not clear how to resolve two thread limit clauses,
6737 // let's pick the teams one if it's present, otherwise the target one.
6738 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6739 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6740 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6741 ThreadLimitClause = TLC;
6742 if (ThreadLimitExpr) {
6743 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6744 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6746 CGF,
6747 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6748 if (const auto *PreInit =
6749 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6750 for (const auto *I : PreInit->decls()) {
6751 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6752 CGF.EmitVarDecl(cast<VarDecl>(*I));
6753 } else {
6756 CGF.EmitAutoVarCleanups(Emission);
6757 }
6758 }
6759 }
6760 }
6761 }
6762 }
6763 if (ThreadLimitClause)
6764 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6765 ThreadLimitExpr);
6766 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6767 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6768 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6769 CS = Dir->getInnermostCapturedStmt();
6771 CGF.getContext(), CS->getCapturedStmt());
6772 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6773 }
6774 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6775 CS = Dir->getInnermostCapturedStmt();
6776 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6777 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6778 return ReturnSequential();
6779 }
6780 return NT;
6781 }
6782 case OMPD_target_teams: {
6783 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6784 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6785 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6786 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6787 ThreadLimitExpr);
6788 }
6789 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6790 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6792 CGF.getContext(), CS->getCapturedStmt());
6793 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6794 if (Dir->getDirectiveKind() == OMPD_distribute) {
6795 CS = Dir->getInnermostCapturedStmt();
6796 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6797 }
6798 }
6799 return NT;
6800 }
6801 case OMPD_target_teams_distribute:
6802 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6803 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6804 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6805 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6806 ThreadLimitExpr);
6807 }
6808 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6809 UpperBoundOnly, CondVal);
6810 return NT;
6811 case OMPD_target_teams_loop:
6812 case OMPD_target_parallel_loop:
6813 case OMPD_target_parallel:
6814 case OMPD_target_parallel_for:
6815 case OMPD_target_parallel_for_simd:
6816 case OMPD_target_teams_distribute_parallel_for:
6817 case OMPD_target_teams_distribute_parallel_for_simd: {
6818 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6819 const OMPIfClause *IfClause = nullptr;
6820 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6821 if (C->getNameModifier() == OMPD_unknown ||
6822 C->getNameModifier() == OMPD_parallel) {
6823 IfClause = C;
6824 break;
6825 }
6826 }
6827 if (IfClause) {
6828 const Expr *Cond = IfClause->getCondition();
6829 bool Result;
6830 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6831 if (!Result)
6832 return ReturnSequential();
6833 } else {
6835 *CondVal = CGF.EvaluateExprAsBool(Cond);
6836 }
6837 }
6838 }
6839 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6840 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6841 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6842 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6843 ThreadLimitExpr);
6844 }
6845 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6846 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6847 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6848 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6849 return NumThreadsClause->getNumThreads();
6850 }
6851 return NT;
6852 }
6853 case OMPD_target_teams_distribute_simd:
6854 case OMPD_target_simd:
6855 return ReturnSequential();
6856 default:
6857 break;
6858 }
6859 llvm_unreachable("Unsupported directive kind.");
6860}
6861
6863 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6864 llvm::Value *NumThreadsVal = nullptr;
6865 llvm::Value *CondVal = nullptr;
6866 llvm::Value *ThreadLimitVal = nullptr;
6867 const Expr *ThreadLimitExpr = nullptr;
6868 int32_t UpperBound = -1;
6869
6871 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6872 &ThreadLimitExpr);
6873
6874 // Thread limit expressions are used below, emit them.
6875 if (ThreadLimitExpr) {
6876 ThreadLimitVal =
6877 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6878 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6879 /*isSigned=*/false);
6880 }
6881
6882 // Generate the num teams expression.
6883 if (UpperBound == 1) {
6884 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6885 } else if (NT) {
6886 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6887 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6888 /*isSigned=*/false);
6889 } else if (ThreadLimitVal) {
6890 // If we do not have a num threads value but a thread limit, replace the
6891 // former with the latter. We know handled the thread limit expression.
6892 NumThreadsVal = ThreadLimitVal;
6893 ThreadLimitVal = nullptr;
6894 } else {
6895 // Default to "0" which means runtime choice.
6896 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6897 NumThreadsVal = CGF.Builder.getInt32(0);
6898 }
6899
6900 // Handle if clause. If if clause present, the number of threads is
6901 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6902 if (CondVal) {
6904 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6905 CGF.Builder.getInt32(1));
6906 }
6907
6908 // If the thread limit and num teams expression were present, take the
6909 // minimum.
6910 if (ThreadLimitVal) {
6911 NumThreadsVal = CGF.Builder.CreateSelect(
6912 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6913 ThreadLimitVal, NumThreadsVal);
6914 }
6915
6916 return NumThreadsVal;
6917}
6918
6919namespace {
6921
6922// Utility to handle information from clauses associated with a given
6923// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6924// It provides a convenient interface to obtain the information and generate
6925// code for that information.
6926class MappableExprsHandler {
6927public:
6928 /// Custom comparator for attach-pointer expressions that compares them by
6929 /// complexity (i.e. their component-depth) first, then by the order in which
6930 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6931 /// different.
6932 struct AttachPtrExprComparator {
6933 const MappableExprsHandler &Handler;
6934 // Cache of previous equality comparison results.
6935 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6936 CachedEqualityComparisons;
6937
6938 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6939 AttachPtrExprComparator() = delete;
6940
6941 // Return true iff LHS is "less than" RHS.
6942 bool operator()(const Expr *LHS, const Expr *RHS) const {
6943 if (LHS == RHS)
6944 return false;
6945
6946 // First, compare by complexity (depth)
6947 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6948 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6949
6950 std::optional<size_t> DepthLHS =
6951 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6952 : std::nullopt;
6953 std::optional<size_t> DepthRHS =
6954 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6955 : std::nullopt;
6956
6957 // std::nullopt (no attach pointer) has lowest complexity
6958 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6959 // Both have same complexity, now check semantic equality
6960 if (areEqual(LHS, RHS))
6961 return false;
6962 // Different semantically, compare by computation order
6963 return wasComputedBefore(LHS, RHS);
6964 }
6965 if (!DepthLHS.has_value())
6966 return true; // LHS has lower complexity
6967 if (!DepthRHS.has_value())
6968 return false; // RHS has lower complexity
6969
6970 // Both have values, compare by depth (lower depth = lower complexity)
6971 if (DepthLHS.value() != DepthRHS.value())
6972 return DepthLHS.value() < DepthRHS.value();
6973
6974 // Same complexity, now check semantic equality
6975 if (areEqual(LHS, RHS))
6976 return false;
6977 // Different semantically, compare by computation order
6978 return wasComputedBefore(LHS, RHS);
6979 }
6980
6981 public:
6982 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6983 /// results, if available, otherwise does a recursive semantic comparison.
6984 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6985 // Check cache first for faster lookup
6986 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6987 if (CachedResultIt != CachedEqualityComparisons.end())
6988 return CachedResultIt->second;
6989
6990 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6991
6992 // Cache the result for future lookups (both orders since semantic
6993 // equality is commutative)
6994 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6995 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6996 return ComparisonResult;
6997 }
6998
6999 /// Compare the two attach-ptr expressions by their computation order.
7000 /// Returns true iff LHS was computed before RHS by
7001 /// collectAttachPtrExprInfo().
7002 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7003 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
7004 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
7005
7006 return OrderLHS < OrderRHS;
7007 }
7008
7009 private:
7010 /// Helper function to compare attach-pointer expressions semantically.
7011 /// This function handles various expression types that can be part of an
7012 /// attach-pointer.
7013 /// TODO: Not urgent, but we should ideally return true when comparing
7014 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7015 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7016 if (LHS == RHS)
7017 return true;
7018
7019 // If only one is null, they aren't equal
7020 if (!LHS || !RHS)
7021 return false;
7022
7023 ASTContext &Ctx = Handler.CGF.getContext();
7024 // Strip away parentheses and no-op casts to get to the core expression
7025 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7026 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7027
7028 // Direct pointer comparison of the underlying expressions
7029 if (LHS == RHS)
7030 return true;
7031
7032 // Check if the expression classes match
7033 if (LHS->getStmtClass() != RHS->getStmtClass())
7034 return false;
7035
7036 // Handle DeclRefExpr (variable references)
7037 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
7038 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
7039 if (!RD)
7040 return false;
7041 return LD->getDecl()->getCanonicalDecl() ==
7042 RD->getDecl()->getCanonicalDecl();
7043 }
7044
7045 // Handle ArraySubscriptExpr (array indexing like a[i])
7046 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
7047 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
7048 if (!RA)
7049 return false;
7050 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
7051 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
7052 }
7053
7054 // Handle MemberExpr (member access like s.m or p->m)
7055 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
7056 const auto *RM = dyn_cast<MemberExpr>(RHS);
7057 if (!RM)
7058 return false;
7059 if (LM->getMemberDecl()->getCanonicalDecl() !=
7060 RM->getMemberDecl()->getCanonicalDecl())
7061 return false;
7062 return areSemanticallyEqual(LM->getBase(), RM->getBase());
7063 }
7064
7065 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7066 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
7067 const auto *RU = dyn_cast<UnaryOperator>(RHS);
7068 if (!RU)
7069 return false;
7070 if (LU->getOpcode() != RU->getOpcode())
7071 return false;
7072 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
7073 }
7074
7075 // Handle BinaryOperator (binary operations like p + offset)
7076 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
7077 const auto *RB = dyn_cast<BinaryOperator>(RHS);
7078 if (!RB)
7079 return false;
7080 if (LB->getOpcode() != RB->getOpcode())
7081 return false;
7082 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
7083 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
7084 }
7085
7086 // Handle ArraySectionExpr (array sections like a[0:1])
7087 // Attach pointers should not contain array-sections, but currently we
7088 // don't emit an error.
7089 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
7090 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
7091 if (!RAS)
7092 return false;
7093 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
7094 areSemanticallyEqual(LAS->getLowerBound(),
7095 RAS->getLowerBound()) &&
7096 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
7097 }
7098
7099 // Handle CastExpr (explicit casts)
7100 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
7101 const auto *RC = dyn_cast<CastExpr>(RHS);
7102 if (!RC)
7103 return false;
7104 if (LC->getCastKind() != RC->getCastKind())
7105 return false;
7106 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
7107 }
7108
7109 // Handle CXXThisExpr (this pointer)
7110 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
7111 return true;
7112
7113 // Handle IntegerLiteral (integer constants)
7114 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
7115 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
7116 if (!RI)
7117 return false;
7118 return LI->getValue() == RI->getValue();
7119 }
7120
7121 // Handle CharacterLiteral (character constants)
7122 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7123 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7124 if (!RC)
7125 return false;
7126 return LC->getValue() == RC->getValue();
7127 }
7128
7129 // Handle FloatingLiteral (floating point constants)
7130 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7131 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7132 if (!RF)
7133 return false;
7134 // Use bitwise comparison for floating point literals
7135 return LF->getValue().bitwiseIsEqual(RF->getValue());
7136 }
7137
7138 // Handle StringLiteral (string constants)
7139 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7140 const auto *RS = dyn_cast<StringLiteral>(RHS);
7141 if (!RS)
7142 return false;
7143 return LS->getString() == RS->getString();
7144 }
7145
7146 // Handle CXXNullPtrLiteralExpr (nullptr)
7148 return true;
7149
7150 // Handle CXXBoolLiteralExpr (true/false)
7151 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7152 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7153 if (!RB)
7154 return false;
7155 return LB->getValue() == RB->getValue();
7156 }
7157
7158 // Fallback for other forms - use the existing comparison method
7159 return Expr::isSameComparisonOperand(LHS, RHS);
7160 }
7161 };
7162
7163 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7164 static unsigned getFlagMemberOffset() {
7165 unsigned Offset = 0;
7166 for (uint64_t Remain =
7167 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7168 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7169 !(Remain & 1); Remain = Remain >> 1)
7170 Offset++;
7171 return Offset;
7172 }
7173
7174 /// Class that holds debugging information for a data mapping to be passed to
7175 /// the runtime library.
7176 class MappingExprInfo {
7177 /// The variable declaration used for the data mapping.
7178 const ValueDecl *MapDecl = nullptr;
7179 /// The original expression used in the map clause, or null if there is
7180 /// none.
7181 const Expr *MapExpr = nullptr;
7182
7183 public:
7184 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7185 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7186
7187 const ValueDecl *getMapDecl() const { return MapDecl; }
7188 const Expr *getMapExpr() const { return MapExpr; }
7189 };
7190
7191 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7192 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7193 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7194 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7195 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7196 using MapNonContiguousArrayTy =
7197 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7198 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7199 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7200 using MapData =
7202 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7203 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7204 using MapDataArrayTy = SmallVector<MapData, 4>;
7205
7206 /// This structure contains combined information generated for mappable
7207 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7208 /// mappers, and non-contiguous information.
7209 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7210 MapExprsArrayTy Exprs;
7211 MapValueDeclsArrayTy Mappers;
7212 MapValueDeclsArrayTy DevicePtrDecls;
7213
7214 /// Append arrays in \a CurInfo.
7215 void append(MapCombinedInfoTy &CurInfo) {
7216 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7217 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7218 CurInfo.DevicePtrDecls.end());
7219 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7220 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7221 }
7222 };
7223
7224 /// Map between a struct and the its lowest & highest elements which have been
7225 /// mapped.
7226 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7227 /// HE(FieldIndex, Pointer)}
7228 struct StructRangeInfoTy {
7229 MapCombinedInfoTy PreliminaryMapData;
7230 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7231 0, Address::invalid()};
7232 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7233 0, Address::invalid()};
7234 Address Base = Address::invalid();
7235 Address LB = Address::invalid();
7236 bool IsArraySection = false;
7237 bool HasCompleteRecord = false;
7238 };
7239
7240 /// A struct to store the attach pointer and pointee information, to be used
7241 /// when emitting an attach entry.
7242 struct AttachInfoTy {
7243 Address AttachPtrAddr = Address::invalid();
7244 Address AttachPteeAddr = Address::invalid();
7245 const ValueDecl *AttachPtrDecl = nullptr;
7246 const Expr *AttachMapExpr = nullptr;
7247
7248 bool isValid() const {
7249 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7250 }
7251 };
7252
7253 /// Check if there's any component list where the attach pointer expression
7254 /// matches the given captured variable.
7255 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7256 for (const auto &AttachEntry : AttachPtrExprMap) {
7257 if (AttachEntry.second) {
7258 // Check if the attach pointer expression is a DeclRefExpr that
7259 // references the captured variable
7260 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7261 if (DRE->getDecl() == VD)
7262 return true;
7263 }
7264 }
7265 return false;
7266 }
7267
7268 /// Get the previously-cached attach pointer for a component list, if-any.
7269 const Expr *getAttachPtrExpr(
7271 const {
7272 const auto It = AttachPtrExprMap.find(Components);
7273 if (It != AttachPtrExprMap.end())
7274 return It->second;
7275
7276 return nullptr;
7277 }
7278
7279private:
7280 /// Kind that defines how a device pointer has to be returned.
7281 struct MapInfo {
7284 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7285 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7286 bool ReturnDevicePointer = false;
7287 bool IsImplicit = false;
7288 const ValueDecl *Mapper = nullptr;
7289 const Expr *VarRef = nullptr;
7290 bool ForDeviceAddr = false;
7291
7292 MapInfo() = default;
7293 MapInfo(
7295 OpenMPMapClauseKind MapType,
7296 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7297 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7298 bool ReturnDevicePointer, bool IsImplicit,
7299 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7300 bool ForDeviceAddr = false)
7301 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7302 MotionModifiers(MotionModifiers),
7303 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7304 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7305 };
7306
7307 /// The target directive from where the mappable clauses were extracted. It
7308 /// is either a executable directive or a user-defined mapper directive.
7309 llvm::PointerUnion<const OMPExecutableDirective *,
7310 const OMPDeclareMapperDecl *>
7311 CurDir;
7312
7313 /// Function the directive is being generated for.
7314 CodeGenFunction &CGF;
7315
7316 /// Set of all first private variables in the current directive.
7317 /// bool data is set to true if the variable is implicitly marked as
7318 /// firstprivate, false otherwise.
7319 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7320
7321 /// Set of defaultmap clause kinds that use firstprivate behavior.
7322 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7323
7324 /// Map between device pointer declarations and their expression components.
7325 /// The key value for declarations in 'this' is null.
7326 llvm::DenseMap<
7327 const ValueDecl *,
7328 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7329 DevPointersMap;
7330
7331 /// Map between device addr declarations and their expression components.
7332 /// The key value for declarations in 'this' is null.
7333 llvm::DenseMap<
7334 const ValueDecl *,
7335 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7336 HasDevAddrsMap;
7337
7338 /// Map between lambda declarations and their map type.
7339 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7340
7341 /// Map from component lists to their attach pointer expressions.
7343 const Expr *>
7344 AttachPtrExprMap;
7345
7346 /// Map from attach pointer expressions to their component depth.
7347 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7348 /// expressions with increasing/decreasing depth.
7349 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7350 /// TODO: Not urgent, but we should ideally use the number of pointer
7351 /// dereferences in an expr as an indicator of its complexity, instead of the
7352 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7353 /// `*(p + 5 + 5)` together.
7354 llvm::DenseMap<const Expr *, std::optional<size_t>>
7355 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7356
7357 /// Map from attach pointer expressions to the order they were computed in, in
7358 /// collectAttachPtrExprInfo().
7359 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7360 {nullptr, 0}};
7361
7362 /// An instance of attach-ptr-expr comparator that can be used throughout the
7363 /// lifetime of this handler.
7364 AttachPtrExprComparator AttachPtrComparator;
7365
7366 llvm::Value *getExprTypeSize(const Expr *E) const {
7367 QualType ExprTy = E->getType().getCanonicalType();
7368
7369 // Calculate the size for array shaping expression.
7370 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7371 llvm::Value *Size =
7372 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7373 for (const Expr *SE : OAE->getDimensions()) {
7374 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7375 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7376 CGF.getContext().getSizeType(),
7377 SE->getExprLoc());
7378 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7379 }
7380 return Size;
7381 }
7382
7383 // Reference types are ignored for mapping purposes.
7384 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7385 ExprTy = RefTy->getPointeeType().getCanonicalType();
7386
7387 // Given that an array section is considered a built-in type, we need to
7388 // do the calculation based on the length of the section instead of relying
7389 // on CGF.getTypeSize(E->getType()).
7390 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7391 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7392 OAE->getBase()->IgnoreParenImpCasts())
7394
7395 // If there is no length associated with the expression and lower bound is
7396 // not specified too, that means we are using the whole length of the
7397 // base.
7398 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7399 !OAE->getLowerBound())
7400 return CGF.getTypeSize(BaseTy);
7401
7402 llvm::Value *ElemSize;
7403 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7404 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7405 } else {
7406 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7407 assert(ATy && "Expecting array type if not a pointer type.");
7408 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7409 }
7410
7411 // If we don't have a length at this point, that is because we have an
7412 // array section with a single element.
7413 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7414 return ElemSize;
7415
7416 if (const Expr *LenExpr = OAE->getLength()) {
7417 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7418 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7419 CGF.getContext().getSizeType(),
7420 LenExpr->getExprLoc());
7421 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7422 }
7423 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7424 OAE->getLowerBound() && "expected array_section[lb:].");
7425 // Size = sizetype - lb * elemtype;
7426 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7427 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7428 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7429 CGF.getContext().getSizeType(),
7430 OAE->getLowerBound()->getExprLoc());
7431 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7432 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7433 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7434 LengthVal = CGF.Builder.CreateSelect(
7435 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7436 return LengthVal;
7437 }
7438 return CGF.getTypeSize(ExprTy);
7439 }
7440
7441 /// Return the corresponding bits for a given map clause modifier. Add
7442 /// a flag marking the map as a pointer if requested. Add a flag marking the
7443 /// map as the first one of a series of maps that relate to the same map
7444 /// expression.
7445 OpenMPOffloadMappingFlags getMapTypeBits(
7446 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7447 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7448 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7449 OpenMPOffloadMappingFlags Bits =
7450 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7451 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7452 switch (MapType) {
7453 case OMPC_MAP_alloc:
7454 case OMPC_MAP_release:
7455 // alloc and release is the default behavior in the runtime library, i.e.
7456 // if we don't pass any bits alloc/release that is what the runtime is
7457 // going to do. Therefore, we don't need to signal anything for these two
7458 // type modifiers.
7459 break;
7460 case OMPC_MAP_to:
7461 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7462 break;
7463 case OMPC_MAP_from:
7464 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7465 break;
7466 case OMPC_MAP_tofrom:
7467 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7468 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7469 break;
7470 case OMPC_MAP_delete:
7471 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7472 break;
7473 case OMPC_MAP_unknown:
7474 llvm_unreachable("Unexpected map type!");
7475 }
7476 if (AddPtrFlag)
7477 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7478 if (AddIsTargetParamFlag)
7479 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7480 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7481 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7482 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7483 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7484 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7485 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7486 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7487 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7488 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7489 if (IsNonContiguous)
7490 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7491 return Bits;
7492 }
7493
7494 /// Return true if the provided expression is a final array section. A
7495 /// final array section, is one whose length can't be proved to be one.
7496 bool isFinalArraySectionExpression(const Expr *E) const {
7497 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7498
7499 // It is not an array section and therefore not a unity-size one.
7500 if (!OASE)
7501 return false;
7502
7503 // An array section with no colon always refer to a single element.
7504 if (OASE->getColonLocFirst().isInvalid())
7505 return false;
7506
7507 const Expr *Length = OASE->getLength();
7508
7509 // If we don't have a length we have to check if the array has size 1
7510 // for this dimension. Also, we should always expect a length if the
7511 // base type is pointer.
7512 if (!Length) {
7513 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7514 OASE->getBase()->IgnoreParenImpCasts())
7516 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7517 return ATy->getSExtSize() != 1;
7518 // If we don't have a constant dimension length, we have to consider
7519 // the current section as having any size, so it is not necessarily
7520 // unitary. If it happen to be unity size, that's user fault.
7521 return true;
7522 }
7523
7524 // Check if the length evaluates to 1.
7525 Expr::EvalResult Result;
7526 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7527 return true; // Can have more that size 1.
7528
7529 llvm::APSInt ConstLength = Result.Val.getInt();
7530 return ConstLength.getSExtValue() != 1;
7531 }
7532
7533 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7534 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7535 /// an attach entry has the following form:
7536 /// &p, &p[1], sizeof(void*), ATTACH
7537 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7538 const AttachInfoTy &AttachInfo) const {
7539 assert(AttachInfo.isValid() &&
7540 "Expected valid attach pointer/pointee information!");
7541
7542 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7543 // size
7544 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7545 llvm::ConstantInt::get(
7546 CGF.CGM.SizeTy, CGF.getContext()
7548 .getQuantity()),
7549 CGF.Int64Ty, /*isSigned=*/true);
7550
7551 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7552 AttachInfo.AttachMapExpr);
7553 CombinedInfo.BasePointers.push_back(
7554 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7555 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7556 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7557 CombinedInfo.Pointers.push_back(
7558 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7559 CombinedInfo.Sizes.push_back(PointerSize);
7560 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7561 CombinedInfo.Mappers.push_back(nullptr);
7562 CombinedInfo.NonContigInfo.Dims.push_back(1);
7563 }
7564
7565 /// A helper class to copy structures with overlapped elements, i.e. those
7566 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7567 /// are not explicitly copied have mapping nodes synthesized for them,
7568 /// taking care to avoid generating zero-sized copies.
7569 class CopyOverlappedEntryGaps {
7570 CodeGenFunction &CGF;
7571 MapCombinedInfoTy &CombinedInfo;
7572 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7573 const ValueDecl *MapDecl = nullptr;
7574 const Expr *MapExpr = nullptr;
7575 Address BP = Address::invalid();
7576 bool IsNonContiguous = false;
7577 uint64_t DimSize = 0;
7578 // These elements track the position as the struct is iterated over
7579 // (in order of increasing element address).
7580 const RecordDecl *LastParent = nullptr;
7581 uint64_t Cursor = 0;
7582 unsigned LastIndex = -1u;
7583 Address LB = Address::invalid();
7584
7585 public:
7586 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7587 MapCombinedInfoTy &CombinedInfo,
7588 OpenMPOffloadMappingFlags Flags,
7589 const ValueDecl *MapDecl, const Expr *MapExpr,
7590 Address BP, Address LB, bool IsNonContiguous,
7591 uint64_t DimSize)
7592 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7593 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7594 DimSize(DimSize), LB(LB) {}
7595
7596 void processField(
7597 const OMPClauseMappableExprCommon::MappableComponent &MC,
7598 const FieldDecl *FD,
7599 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7600 EmitMemberExprBase) {
7601 const RecordDecl *RD = FD->getParent();
7602 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7603 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7604 uint64_t FieldSize =
7606 Address ComponentLB = Address::invalid();
7607
7608 if (FD->getType()->isLValueReferenceType()) {
7609 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7610 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7611 ComponentLB =
7612 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7613 } else {
7614 ComponentLB =
7616 }
7617
7618 if (!LastParent)
7619 LastParent = RD;
7620 if (FD->getParent() == LastParent) {
7621 if (FD->getFieldIndex() != LastIndex + 1)
7622 copyUntilField(FD, ComponentLB);
7623 } else {
7624 LastParent = FD->getParent();
7625 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7626 copyUntilField(FD, ComponentLB);
7627 }
7628 Cursor = FieldOffset + FieldSize;
7629 LastIndex = FD->getFieldIndex();
7630 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7631 }
7632
7633 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7634 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7635 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7636 llvm::Value *Size =
7637 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7638 copySizedChunk(LBPtr, Size);
7639 }
7640
7641 void copyUntilEnd(Address HB) {
7642 if (LastParent) {
7643 const ASTRecordLayout &RL =
7644 CGF.getContext().getASTRecordLayout(LastParent);
7645 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7646 return;
7647 }
7648 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7649 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7650 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7651 LBPtr);
7652 copySizedChunk(LBPtr, Size);
7653 }
7654
7655 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7656 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7657 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7658 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7659 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7660 CombinedInfo.Pointers.push_back(Base);
7661 CombinedInfo.Sizes.push_back(
7662 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7663 CombinedInfo.Types.push_back(Flags);
7664 CombinedInfo.Mappers.push_back(nullptr);
7665 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7666 }
7667 };
7668
7669 /// Generate the base pointers, section pointers, sizes, map type bits, and
7670 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7671 /// map type, map or motion modifiers, and expression components.
7672 /// \a IsFirstComponent should be set to true if the provided set of
7673 /// components is the first associated with a capture.
7674 void generateInfoForComponentList(
7675 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7676 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7678 MapCombinedInfoTy &CombinedInfo,
7679 MapCombinedInfoTy &StructBaseCombinedInfo,
7680 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7681 bool IsFirstComponentList, bool IsImplicit,
7682 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7683 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7684 const Expr *MapExpr = nullptr,
7685 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7686 OverlappedElements = {}) const {
7687
7688 // The following summarizes what has to be generated for each map and the
7689 // types below. The generated information is expressed in this order:
7690 // base pointer, section pointer, size, flags
7691 // (to add to the ones that come from the map type and modifier).
7692 // Entries annotated with (+) are only generated for "target" constructs,
7693 // and only if the variable at the beginning of the expression is used in
7694 // the region.
7695 //
7696 // double d;
7697 // int i[100];
7698 // float *p;
7699 // int **a = &i;
7700 //
7701 // struct S1 {
7702 // int i;
7703 // float f[50];
7704 // }
7705 // struct S2 {
7706 // int i;
7707 // float f[50];
7708 // S1 s;
7709 // double *p;
7710 // double *&pref;
7711 // struct S2 *ps;
7712 // int &ref;
7713 // }
7714 // S2 s;
7715 // S2 *ps;
7716 //
7717 // map(d)
7718 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7719 //
7720 // map(i)
7721 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7722 //
7723 // map(i[1:23])
7724 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7725 //
7726 // map(p)
7727 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7728 //
7729 // map(p[1:24])
7730 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7731 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7732 // // are present, and either is new
7733 //
7734 // map(([22])p)
7735 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7736 // &p, p, sizeof(void*), ATTACH
7737 //
7738 // map((*a)[0:3])
7739 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7740 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7741 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7742 // (+) Only on target, if a is used in the region
7743 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7744 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7745 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7746 // referenced in the target region, because it is a pointer.
7747 //
7748 // map(**a)
7749 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7750 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7751 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7752 // (+) Only on target, if a is used in the region
7753 //
7754 // map(s)
7755 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7756 // effect is supposed to be same as if the user had a map for every element
7757 // of the struct. We currently do a shallow-map of s.
7758 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7759 //
7760 // map(s.i)
7761 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7762 //
7763 // map(s.s.f)
7764 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7765 //
7766 // map(s.p)
7767 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7768 //
7769 // map(to: s.p[:22])
7770 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7771 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7772 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7773 //
7774 // map(to: s.ref)
7775 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7776 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7777 // (*) alloc space for struct members, only this is a target parameter.
7778 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7779 // optimizes this entry out, same in the examples below)
7780 // (***) map the pointee (map: to)
7781 // Note: ptr(s.ref) represents the referring pointer of s.ref
7782 // ptee(s.ref) represents the referenced pointee of s.ref
7783 //
7784 // map(to: s.pref)
7785 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7786 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7787 //
7788 // map(to: s.pref[:22])
7789 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7790 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7791 // FROM | IMPLICIT // (+)
7792 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7793 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7794 //
7795 // map(s.ps)
7796 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7797 //
7798 // map(from: s.ps->s.i)
7799 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7800 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7801 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7802 //
7803 // map(to: s.ps->ps)
7804 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7805 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7806 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7807 //
7808 // map(s.ps->ps->ps)
7809 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7810 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7811 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7812 //
7813 // map(to: s.ps->ps->s.f[:22])
7814 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7815 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7816 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7817 //
7818 // map(ps)
7819 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7820 //
7821 // map(ps->i)
7822 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7823 // &ps, &(ps->i), sizeof(void*), ATTACH
7824 //
7825 // map(ps->s.f)
7826 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7827 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7828 //
7829 // map(from: ps->p)
7830 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7831 // &ps, &(ps->p), sizeof(ps), ATTACH
7832 //
7833 // map(to: ps->p[:22])
7834 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7835 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7836 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7837 //
7838 // map(ps->ps)
7839 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7840 // &ps, &(ps->ps), sizeof(ps), ATTACH
7841 //
7842 // map(from: ps->ps->s.i)
7843 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7844 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7845 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7846 //
7847 // map(from: ps->ps->ps)
7848 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7849 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7850 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7851 //
7852 // map(ps->ps->ps->ps)
7853 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7854 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7855 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7856 //
7857 // map(to: ps->ps->ps->s.f[:22])
7858 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7859 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7860 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7861 //
7862 // map(to: s.f[:22]) map(from: s.p[:33])
7863 // On target, and if s is used in the region:
7864 //
7865 // &s, &(s.f[0]), 50*sizeof(float) +
7866 // sizeof(struct S1) +
7867 // sizeof(double*) (**), TARGET_PARAM
7868 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7869 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7870 // FROM | IMPLICIT
7871 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7872 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7873 // (**) allocate contiguous space needed to fit all mapped members even if
7874 // we allocate space for members not mapped (in this example,
7875 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7876 // them as well because they fall between &s.f[0] and &s.p)
7877 //
7878 // On other constructs, and, if s is not used in the region, on target:
7879 // &s, &(s.f[0]), 22*sizeof(float), TO
7880 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7881 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7882 //
7883 // map(from: s.f[:22]) map(to: ps->p[:33])
7884 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7885 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7886 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7887 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7888 //
7889 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7890 // &s, &(s.f[0]), 50*sizeof(float) +
7891 // sizeof(struct S1), TARGET_PARAM
7892 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7893 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7894 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7895 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7896 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7897 //
7898 // map(p[:100], p)
7899 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7900 // p, &p[0], 100*sizeof(float), TO | FROM
7901 // &p, &p[0], sizeof(float*), ATTACH
7902
7903 // Track if the map information being generated is the first for a capture.
7904 bool IsCaptureFirstInfo = IsFirstComponentList;
7905 // When the variable is on a declare target link or in a to clause with
7906 // unified memory, a reference is needed to hold the host/device address
7907 // of the variable.
7908 bool RequiresReference = false;
7909
7910 // Scan the components from the base to the complete expression.
7911 auto CI = Components.rbegin();
7912 auto CE = Components.rend();
7913 auto I = CI;
7914
7915 // Track if the map information being generated is the first for a list of
7916 // components.
7917 bool IsExpressionFirstInfo = true;
7918 bool FirstPointerInComplexData = false;
7919 Address BP = Address::invalid();
7920 Address FinalLowestElem = Address::invalid();
7921 const Expr *AssocExpr = I->getAssociatedExpression();
7922 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7923 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7924 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7925
7926 // Get the pointer-attachment base-pointer for the given list, if any.
7927 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7928 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7929 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7930
7931 bool HasAttachPtr = AttachPtrExpr != nullptr;
7932 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7933 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7934
7935 if (FirstComponentIsForAttachPtr) {
7936 // No need to process AttachPtr here. It will be processed at the end
7937 // after we have computed the pointee's address.
7938 ++I;
7939 } else if (isa<MemberExpr>(AssocExpr)) {
7940 // The base is the 'this' pointer. The content of the pointer is going
7941 // to be the base of the field being mapped.
7942 BP = CGF.LoadCXXThisAddress();
7943 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7944 (OASE &&
7945 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7946 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7947 } else if (OAShE &&
7948 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7949 BP = Address(
7950 CGF.EmitScalarExpr(OAShE->getBase()),
7951 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7952 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7953 } else {
7954 // The base is the reference to the variable.
7955 // BP = &Var.
7956 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7957 if (const auto *VD =
7958 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7959 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7960 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7961 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7962 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7963 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7965 RequiresReference = true;
7967 }
7968 }
7969 }
7970
7971 // If the variable is a pointer and is being dereferenced (i.e. is not
7972 // the last component), the base has to be the pointer itself, not its
7973 // reference. References are ignored for mapping purposes.
7974 QualType Ty =
7975 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7976 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7977 // No need to generate individual map information for the pointer, it
7978 // can be associated with the combined storage if shared memory mode is
7979 // active or the base declaration is not global variable.
7980 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7982 !VD || VD->hasLocalStorage() || HasAttachPtr)
7983 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7984 else
7985 FirstPointerInComplexData = true;
7986 ++I;
7987 }
7988 }
7989
7990 // Track whether a component of the list should be marked as MEMBER_OF some
7991 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7992 // in a component list should be marked as MEMBER_OF, all subsequent entries
7993 // do not belong to the base struct. E.g.
7994 // struct S2 s;
7995 // s.ps->ps->ps->f[:]
7996 // (1) (2) (3) (4)
7997 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7998 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7999 // is the pointee of ps(2) which is not member of struct s, so it should not
8000 // be marked as such (it is still PTR_AND_OBJ).
8001 // The variable is initialized to false so that PTR_AND_OBJ entries which
8002 // are not struct members are not considered (e.g. array of pointers to
8003 // data).
8004 bool ShouldBeMemberOf = false;
8005
8006 // Variable keeping track of whether or not we have encountered a component
8007 // in the component list which is a member expression. Useful when we have a
8008 // pointer or a final array section, in which case it is the previous
8009 // component in the list which tells us whether we have a member expression.
8010 // E.g. X.f[:]
8011 // While processing the final array section "[:]" it is "f" which tells us
8012 // whether we are dealing with a member of a declared struct.
8013 const MemberExpr *EncounteredME = nullptr;
8014
8015 // Track for the total number of dimension. Start from one for the dummy
8016 // dimension.
8017 uint64_t DimSize = 1;
8018
8019 // Detects non-contiguous updates due to strided accesses.
8020 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8021 // correctly when generating information to be passed to the runtime. The
8022 // flag is set to true if any array section has a stride not equal to 1, or
8023 // if the stride is not a constant expression (conservatively assumed
8024 // non-contiguous).
8025 bool IsNonContiguous =
8026 CombinedInfo.NonContigInfo.IsNonContiguous ||
8027 any_of(Components, [&](const auto &Component) {
8028 const auto *OASE =
8029 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8030 if (!OASE)
8031 return false;
8032
8033 const Expr *StrideExpr = OASE->getStride();
8034 if (!StrideExpr)
8035 return false;
8036
8037 const auto Constant =
8038 StrideExpr->getIntegerConstantExpr(CGF.getContext());
8039 if (!Constant)
8040 return false;
8041
8042 return !Constant->isOne();
8043 });
8044
8045 bool IsPrevMemberReference = false;
8046
8047 bool IsPartialMapped =
8048 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8049
8050 // We need to check if we will be encountering any MEs. If we do not
8051 // encounter any ME expression it means we will be mapping the whole struct.
8052 // In that case we need to skip adding an entry for the struct to the
8053 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8054 // list only when generating all info for clauses.
8055 bool IsMappingWholeStruct = true;
8056 if (!GenerateAllInfoForClauses) {
8057 IsMappingWholeStruct = false;
8058 } else {
8059 for (auto TempI = I; TempI != CE; ++TempI) {
8060 const MemberExpr *PossibleME =
8061 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
8062 if (PossibleME) {
8063 IsMappingWholeStruct = false;
8064 break;
8065 }
8066 }
8067 }
8068
8069 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8070 for (; I != CE; ++I) {
8071 // If we have a valid attach-ptr, we skip processing all components until
8072 // after the attach-ptr.
8073 if (HasAttachPtr && !SeenAttachPtr) {
8074 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8075 continue;
8076 }
8077
8078 // After finding the attach pointer, skip binary-ops, to skip past
8079 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8080 // the attach-ptr.
8081 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8082 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8083 if (BO)
8084 continue;
8085
8086 // Found the first non-binary-operator component after attach
8087 SeenFirstNonBinOpExprAfterAttachPtr = true;
8088 BP = AttachPteeBaseAddr;
8089 }
8090
8091 // If the current component is member of a struct (parent struct) mark it.
8092 if (!EncounteredME) {
8093 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8094 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8095 // as MEMBER_OF the parent struct.
8096 if (EncounteredME) {
8097 ShouldBeMemberOf = true;
8098 // Do not emit as complex pointer if this is actually not array-like
8099 // expression.
8100 if (FirstPointerInComplexData) {
8101 QualType Ty = std::prev(I)
8102 ->getAssociatedDeclaration()
8103 ->getType()
8104 .getNonReferenceType();
8105 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8106 FirstPointerInComplexData = false;
8107 }
8108 }
8109 }
8110
8111 auto Next = std::next(I);
8112
8113 // We need to generate the addresses and sizes if this is the last
8114 // component, if the component is a pointer or if it is an array section
8115 // whose length can't be proved to be one. If this is a pointer, it
8116 // becomes the base address for the following components.
8117
8118 // A final array section, is one whose length can't be proved to be one.
8119 // If the map item is non-contiguous then we don't treat any array section
8120 // as final array section.
8121 bool IsFinalArraySection =
8122 !IsNonContiguous &&
8123 isFinalArraySectionExpression(I->getAssociatedExpression());
8124
8125 // If we have a declaration for the mapping use that, otherwise use
8126 // the base declaration of the map clause.
8127 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8128 ? I->getAssociatedDeclaration()
8129 : BaseDecl;
8130 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8131 : MapExpr;
8132
8133 // Get information on whether the element is a pointer. Have to do a
8134 // special treatment for array sections given that they are built-in
8135 // types.
8136 const auto *OASE =
8137 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8138 const auto *OAShE =
8139 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8140 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8141 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8142 bool IsPointer =
8143 OAShE ||
8146 ->isAnyPointerType()) ||
8147 I->getAssociatedExpression()->getType()->isAnyPointerType();
8148 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8149 MapDecl &&
8150 MapDecl->getType()->isLValueReferenceType();
8151 bool IsNonDerefPointer = IsPointer &&
8152 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8153 !IsNonContiguous;
8154
8155 if (OASE)
8156 ++DimSize;
8157
8158 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8159 IsFinalArraySection) {
8160 // If this is not the last component, we expect the pointer to be
8161 // associated with an array expression or member expression.
8162 assert((Next == CE ||
8163 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8164 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8165 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8166 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8167 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8168 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8169 "Unexpected expression");
8170
8171 Address LB = Address::invalid();
8172 Address LowestElem = Address::invalid();
8173 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8174 const MemberExpr *E) {
8175 const Expr *BaseExpr = E->getBase();
8176 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8177 // scalar.
8178 LValue BaseLV;
8179 if (E->isArrow()) {
8180 LValueBaseInfo BaseInfo;
8181 TBAAAccessInfo TBAAInfo;
8182 Address Addr =
8183 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8184 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8185 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8186 } else {
8187 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8188 }
8189 return BaseLV;
8190 };
8191 if (OAShE) {
8192 LowestElem = LB =
8193 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8195 OAShE->getBase()->getType()->getPointeeType()),
8197 OAShE->getBase()->getType()));
8198 } else if (IsMemberReference) {
8199 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8200 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8201 LowestElem = CGF.EmitLValueForFieldInitialization(
8202 BaseLVal, cast<FieldDecl>(MapDecl))
8203 .getAddress();
8204 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8205 .getAddress();
8206 } else {
8207 LowestElem = LB =
8208 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8209 .getAddress();
8210 }
8211
8212 // Save the final LowestElem, to use it as the pointee in attach maps,
8213 // if emitted.
8214 if (Next == CE)
8215 FinalLowestElem = LowestElem;
8216
8217 // If this component is a pointer inside the base struct then we don't
8218 // need to create any entry for it - it will be combined with the object
8219 // it is pointing to into a single PTR_AND_OBJ entry.
8220 bool IsMemberPointerOrAddr =
8221 EncounteredME &&
8222 (((IsPointer || ForDeviceAddr) &&
8223 I->getAssociatedExpression() == EncounteredME) ||
8224 (IsPrevMemberReference && !IsPointer) ||
8225 (IsMemberReference && Next != CE &&
8226 !Next->getAssociatedExpression()->getType()->isPointerType()));
8227 if (!OverlappedElements.empty() && Next == CE) {
8228 // Handle base element with the info for overlapped elements.
8229 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8230 assert(!IsPointer &&
8231 "Unexpected base element with the pointer type.");
8232 // Mark the whole struct as the struct that requires allocation on the
8233 // device.
8234 PartialStruct.LowestElem = {0, LowestElem};
8235 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8236 I->getAssociatedExpression()->getType());
8237 Address HB = CGF.Builder.CreateConstGEP(
8239 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8240 TypeSize.getQuantity() - 1);
8241 PartialStruct.HighestElem = {
8242 std::numeric_limits<decltype(
8243 PartialStruct.HighestElem.first)>::max(),
8244 HB};
8245 PartialStruct.Base = BP;
8246 PartialStruct.LB = LB;
8247 assert(
8248 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8249 "Overlapped elements must be used only once for the variable.");
8250 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8251 // Emit data for non-overlapped data.
8252 OpenMPOffloadMappingFlags Flags =
8253 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8254 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8255 /*AddPtrFlag=*/false,
8256 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8257 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8258 MapExpr, BP, LB, IsNonContiguous,
8259 DimSize);
8260 // Do bitcopy of all non-overlapped structure elements.
8262 Component : OverlappedElements) {
8263 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8264 Component) {
8265 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8266 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8267 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8268 }
8269 }
8270 }
8271 }
8272 CopyGaps.copyUntilEnd(HB);
8273 break;
8274 }
8275 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8276 // Skip adding an entry in the CurInfo of this combined entry if the
8277 // whole struct is currently being mapped. The struct needs to be added
8278 // in the first position before any data internal to the struct is being
8279 // mapped.
8280 // Skip adding an entry in the CurInfo of this combined entry if the
8281 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8282 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8283 (Next == CE && MapType != OMPC_MAP_unknown)) {
8284 if (!IsMappingWholeStruct) {
8285 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8286 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8287 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8288 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8289 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8290 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8291 Size, CGF.Int64Ty, /*isSigned=*/true));
8292 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8293 : 1);
8294 } else {
8295 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8296 StructBaseCombinedInfo.BasePointers.push_back(
8297 BP.emitRawPointer(CGF));
8298 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8299 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8300 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8301 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8302 Size, CGF.Int64Ty, /*isSigned=*/true));
8303 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8304 IsNonContiguous ? DimSize : 1);
8305 }
8306
8307 // If Mapper is valid, the last component inherits the mapper.
8308 bool HasMapper = Mapper && Next == CE;
8309 if (!IsMappingWholeStruct)
8310 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8311 else
8312 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8313 : nullptr);
8314
8315 // We need to add a pointer flag for each map that comes from the
8316 // same expression except for the first one. We also need to signal
8317 // this map is the first one that relates with the current capture
8318 // (there is a set of entries for each capture).
8319 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8320 MapType, MapModifiers, MotionModifiers, IsImplicit,
8321 !IsExpressionFirstInfo || RequiresReference ||
8322 FirstPointerInComplexData || IsMemberReference,
8323 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8324
8325 if (!IsExpressionFirstInfo || IsMemberReference) {
8326 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8327 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8328 if (IsPointer || (IsMemberReference && Next != CE))
8329 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8330 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8331 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8332 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8333 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8334
8335 if (ShouldBeMemberOf) {
8336 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8337 // should be later updated with the correct value of MEMBER_OF.
8338 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8339 // From now on, all subsequent PTR_AND_OBJ entries should not be
8340 // marked as MEMBER_OF.
8341 ShouldBeMemberOf = false;
8342 }
8343 }
8344
8345 if (!IsMappingWholeStruct)
8346 CombinedInfo.Types.push_back(Flags);
8347 else
8348 StructBaseCombinedInfo.Types.push_back(Flags);
8349 }
8350
8351 // If we have encountered a member expression so far, keep track of the
8352 // mapped member. If the parent is "*this", then the value declaration
8353 // is nullptr.
8354 if (EncounteredME) {
8355 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8356 unsigned FieldIndex = FD->getFieldIndex();
8357
8358 // Update info about the lowest and highest elements for this struct
8359 if (!PartialStruct.Base.isValid()) {
8360 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8361 if (IsFinalArraySection && OASE) {
8362 Address HB =
8363 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8364 .getAddress();
8365 PartialStruct.HighestElem = {FieldIndex, HB};
8366 } else {
8367 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8368 }
8369 PartialStruct.Base = BP;
8370 PartialStruct.LB = BP;
8371 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8372 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8373 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8374 if (IsFinalArraySection && OASE) {
8375 Address HB =
8376 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8377 .getAddress();
8378 PartialStruct.HighestElem = {FieldIndex, HB};
8379 } else {
8380 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8381 }
8382 }
8383 }
8384
8385 // Need to emit combined struct for array sections.
8386 if (IsFinalArraySection || IsNonContiguous)
8387 PartialStruct.IsArraySection = true;
8388
8389 // If we have a final array section, we are done with this expression.
8390 if (IsFinalArraySection)
8391 break;
8392
8393 // The pointer becomes the base for the next element.
8394 if (Next != CE)
8395 BP = IsMemberReference ? LowestElem : LB;
8396 if (!IsPartialMapped)
8397 IsExpressionFirstInfo = false;
8398 IsCaptureFirstInfo = false;
8399 FirstPointerInComplexData = false;
8400 IsPrevMemberReference = IsMemberReference;
8401 } else if (FirstPointerInComplexData) {
8402 QualType Ty = Components.rbegin()
8403 ->getAssociatedDeclaration()
8404 ->getType()
8405 .getNonReferenceType();
8406 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8407 FirstPointerInComplexData = false;
8408 }
8409 }
8410 // If ran into the whole component - allocate the space for the whole
8411 // record.
8412 if (!EncounteredME)
8413 PartialStruct.HasCompleteRecord = true;
8414
8415 // Populate ATTACH information for later processing by emitAttachEntry.
8416 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8417 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8418 AttachInfo.AttachPteeAddr = FinalLowestElem;
8419 AttachInfo.AttachPtrDecl = BaseDecl;
8420 AttachInfo.AttachMapExpr = MapExpr;
8421 }
8422
8423 if (!IsNonContiguous)
8424 return;
8425
8426 const ASTContext &Context = CGF.getContext();
8427
8428 // For supporting stride in array section, we need to initialize the first
8429 // dimension size as 1, first offset as 0, and first count as 1
8430 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8431 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8432 MapValuesArrayTy CurStrides;
8433 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8434 uint64_t ElementTypeSize;
8435
8436 // Collect Size information for each dimension and get the element size as
8437 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8438 // should be [10, 10] and the first stride is 4 btyes.
8439 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8440 Components) {
8441 const Expr *AssocExpr = Component.getAssociatedExpression();
8442 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8443
8444 if (!OASE)
8445 continue;
8446
8447 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8448 auto *CAT = Context.getAsConstantArrayType(Ty);
8449 auto *VAT = Context.getAsVariableArrayType(Ty);
8450
8451 // We need all the dimension size except for the last dimension.
8452 assert((VAT || CAT || &Component == &*Components.begin()) &&
8453 "Should be either ConstantArray or VariableArray if not the "
8454 "first Component");
8455
8456 // Get element size if CurStrides is empty.
8457 if (CurStrides.empty()) {
8458 const Type *ElementType = nullptr;
8459 if (CAT)
8460 ElementType = CAT->getElementType().getTypePtr();
8461 else if (VAT)
8462 ElementType = VAT->getElementType().getTypePtr();
8463 else if (&Component == &*Components.begin()) {
8464 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8465 // there was no earlier CAT/VAT/array handling to establish
8466 // ElementType. Capture the pointee type now so that subsequent
8467 // components (offset/length/stride) have a concrete element type to
8468 // work with. This makes pointer-backed sections behave consistently
8469 // with CAT/VAT/array bases.
8470 if (const auto *PtrType = Ty->getAs<PointerType>())
8471 ElementType = PtrType->getPointeeType().getTypePtr();
8472 } else {
8473 // Any component after the first should never have a raw pointer type;
8474 // by this point. ElementType must already be known (set above or in
8475 // prior array / CAT / VAT handling).
8476 assert(!Ty->isPointerType() &&
8477 "Non-first components should not be raw pointers");
8478 }
8479
8480 // At this stage, if ElementType was a base pointer and we are in the
8481 // first iteration, it has been computed.
8482 if (ElementType) {
8483 // For the case that having pointer as base, we need to remove one
8484 // level of indirection.
8485 if (&Component != &*Components.begin())
8486 ElementType = ElementType->getPointeeOrArrayElementType();
8487 ElementTypeSize =
8488 Context.getTypeSizeInChars(ElementType).getQuantity();
8489 CurStrides.push_back(
8490 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8491 }
8492 }
8493 // Get dimension value except for the last dimension since we don't need
8494 // it.
8495 if (DimSizes.size() < Components.size() - 1) {
8496 if (CAT)
8497 DimSizes.push_back(
8498 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8499 else if (VAT)
8500 DimSizes.push_back(CGF.Builder.CreateIntCast(
8501 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8502 /*IsSigned=*/false));
8503 }
8504 }
8505
8506 // Skip the dummy dimension since we have already have its information.
8507 auto *DI = DimSizes.begin() + 1;
8508 // Product of dimension.
8509 llvm::Value *DimProd =
8510 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8511
8512 // Collect info for non-contiguous. Notice that offset, count, and stride
8513 // are only meaningful for array-section, so we insert a null for anything
8514 // other than array-section.
8515 // Also, the size of offset, count, and stride are not the same as
8516 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8517 // count, and stride are the same as the number of non-contiguous
8518 // declaration in target update to/from clause.
8519 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8520 Components) {
8521 const Expr *AssocExpr = Component.getAssociatedExpression();
8522
8523 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8524 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8525 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8526 /*isSigned=*/false);
8527 CurOffsets.push_back(Offset);
8528 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8529 CurStrides.push_back(CurStrides.back());
8530 continue;
8531 }
8532
8533 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8534
8535 if (!OASE)
8536 continue;
8537
8538 // Offset
8539 const Expr *OffsetExpr = OASE->getLowerBound();
8540 llvm::Value *Offset = nullptr;
8541 if (!OffsetExpr) {
8542 // If offset is absent, then we just set it to zero.
8543 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8544 } else {
8545 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8546 CGF.Int64Ty,
8547 /*isSigned=*/false);
8548 }
8549 CurOffsets.push_back(Offset);
8550
8551 // Count
8552 const Expr *CountExpr = OASE->getLength();
8553 llvm::Value *Count = nullptr;
8554 if (!CountExpr) {
8555 // In Clang, once a high dimension is an array section, we construct all
8556 // the lower dimension as array section, however, for case like
8557 // arr[0:2][2], Clang construct the inner dimension as an array section
8558 // but it actually is not in an array section form according to spec.
8559 if (!OASE->getColonLocFirst().isValid() &&
8560 !OASE->getColonLocSecond().isValid()) {
8561 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8562 } else {
8563 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8564 // When the length is absent it defaults to ⌈(size −
8565 // lower-bound)/stride⌉, where size is the size of the array
8566 // dimension.
8567 const Expr *StrideExpr = OASE->getStride();
8568 llvm::Value *Stride =
8569 StrideExpr
8570 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8571 CGF.Int64Ty, /*isSigned=*/false)
8572 : nullptr;
8573 if (Stride)
8574 Count = CGF.Builder.CreateUDiv(
8575 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8576 else
8577 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8578 }
8579 } else {
8580 Count = CGF.EmitScalarExpr(CountExpr);
8581 }
8582 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8583 CurCounts.push_back(Count);
8584
8585 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8586 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8587 // Offset Count Stride
8588 // D0 0 1 4 (int) <- dummy dimension
8589 // D1 0 2 8 (2 * (1) * 4)
8590 // D2 1 2 20 (1 * (1 * 5) * 4)
8591 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8592 const Expr *StrideExpr = OASE->getStride();
8593 llvm::Value *Stride =
8594 StrideExpr
8595 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8596 CGF.Int64Ty, /*isSigned=*/false)
8597 : nullptr;
8598 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8599 if (Stride)
8600 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8601 else
8602 CurStrides.push_back(DimProd);
8603 if (DI != DimSizes.end())
8604 ++DI;
8605 }
8606
8607 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8608 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8609 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8610 }
8611
8612 /// Return the adjusted map modifiers if the declaration a capture refers to
8613 /// appears in a first-private clause. This is expected to be used only with
8614 /// directives that start with 'target'.
8615 OpenMPOffloadMappingFlags
8616 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8617 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8618
8619 // A first private variable captured by reference will use only the
8620 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8621 // declaration is known as first-private in this handler.
8622 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8623 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8624 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8625 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8626 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8627 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8628 }
8629 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8630 if (I != LambdasMap.end())
8631 // for map(to: lambda): using user specified map type.
8632 return getMapTypeBits(
8633 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8634 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8635 /*AddPtrFlag=*/false,
8636 /*AddIsTargetParamFlag=*/false,
8637 /*isNonContiguous=*/false);
8638 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8639 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8640 }
8641
8642 void getPlainLayout(const CXXRecordDecl *RD,
8643 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8644 bool AsBase) const {
8645 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8646
8647 llvm::StructType *St =
8648 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8649
8650 unsigned NumElements = St->getNumElements();
8651 llvm::SmallVector<
8652 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8653 RecordLayout(NumElements);
8654
8655 // Fill bases.
8656 for (const auto &I : RD->bases()) {
8657 if (I.isVirtual())
8658 continue;
8659
8660 QualType BaseTy = I.getType();
8661 const auto *Base = BaseTy->getAsCXXRecordDecl();
8662 // Ignore empty bases.
8663 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8664 CGF.getContext()
8665 .getASTRecordLayout(Base)
8667 .isZero())
8668 continue;
8669
8670 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8671 RecordLayout[FieldIndex] = Base;
8672 }
8673 // Fill in virtual bases.
8674 for (const auto &I : RD->vbases()) {
8675 QualType BaseTy = I.getType();
8676 // Ignore empty bases.
8677 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8678 continue;
8679
8680 const auto *Base = BaseTy->getAsCXXRecordDecl();
8681 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8682 if (RecordLayout[FieldIndex])
8683 continue;
8684 RecordLayout[FieldIndex] = Base;
8685 }
8686 // Fill in all the fields.
8687 assert(!RD->isUnion() && "Unexpected union.");
8688 for (const auto *Field : RD->fields()) {
8689 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8690 // will fill in later.)
8691 if (!Field->isBitField() &&
8692 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8693 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8694 RecordLayout[FieldIndex] = Field;
8695 }
8696 }
8697 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8698 &Data : RecordLayout) {
8699 if (Data.isNull())
8700 continue;
8701 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8702 getPlainLayout(Base, Layout, /*AsBase=*/true);
8703 else
8704 Layout.push_back(cast<const FieldDecl *>(Data));
8705 }
8706 }
8707
8708 /// Returns the address corresponding to \p PointerExpr.
8709 static Address getAttachPtrAddr(const Expr *PointerExpr,
8710 CodeGenFunction &CGF) {
8711 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8712 Address AttachPtrAddr = Address::invalid();
8713
8714 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8715 // If the pointer is a variable, we can use its address directly.
8716 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8717 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8718 AttachPtrAddr =
8719 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8720 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8721 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8722 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8723 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8724 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8725 assert(UO->getOpcode() == UO_Deref &&
8726 "Unexpected unary-operator on attach-ptr-expr");
8727 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8728 }
8729 assert(AttachPtrAddr.isValid() &&
8730 "Failed to get address for attach pointer expression");
8731 return AttachPtrAddr;
8732 }
8733
8734 /// Get the address of the attach pointer, and a load from it, to get the
8735 /// pointee base address.
8736 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8737 /// contains invalid addresses if \p AttachPtrExpr is null.
8738 static std::pair<Address, Address>
8739 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8740 CodeGenFunction &CGF) {
8741
8742 if (!AttachPtrExpr)
8743 return {Address::invalid(), Address::invalid()};
8744
8745 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8746 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8747
8748 QualType AttachPtrType =
8751
8752 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8753 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8754 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8755
8756 return {AttachPtrAddr, AttachPteeBaseAddr};
8757 }
8758
8759 /// Returns whether an attach entry should be emitted for a map on
8760 /// \p MapBaseDecl on the directive \p CurDir.
8761 static bool
8762 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8763 CodeGenFunction &CGF,
8764 llvm::PointerUnion<const OMPExecutableDirective *,
8765 const OMPDeclareMapperDecl *>
8766 CurDir) {
8767 if (!PointerExpr)
8768 return false;
8769
8770 // Pointer attachment is needed at map-entering time or for declare
8771 // mappers.
8772 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8775 ->getDirectiveKind());
8776 }
8777
8778 /// Computes the attach-ptr expr for \p Components, and updates various maps
8779 /// with the information.
8780 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8781 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8782 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8783 /// AttachPtrExprMap.
8784 void collectAttachPtrExprInfo(
8786 llvm::PointerUnion<const OMPExecutableDirective *,
8787 const OMPDeclareMapperDecl *>
8788 CurDir) {
8789
8790 OpenMPDirectiveKind CurDirectiveID =
8792 ? OMPD_declare_mapper
8793 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8794
8795 const auto &[AttachPtrExpr, Depth] =
8797 CurDirectiveID);
8798
8799 AttachPtrComputationOrderMap.try_emplace(
8800 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8801 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8802 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8803 }
8804
8805 /// Generate all the base pointers, section pointers, sizes, map types, and
8806 /// mappers for the extracted mappable expressions (all included in \a
8807 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8808 /// pair of the relevant declaration and index where it occurs is appended to
8809 /// the device pointers info array.
8810 void generateAllInfoForClauses(
8811 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8812 llvm::OpenMPIRBuilder &OMPBuilder,
8813 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8814 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8815 // We have to process the component lists that relate with the same
8816 // declaration in a single chunk so that we can generate the map flags
8817 // correctly. Therefore, we organize all lists in a map.
8818 enum MapKind { Present, Allocs, Other, Total };
8819 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8820 SmallVector<SmallVector<MapInfo, 8>, 4>>
8821 Info;
8822
8823 // Helper function to fill the information map for the different supported
8824 // clauses.
8825 auto &&InfoGen =
8826 [&Info, &SkipVarSet](
8827 const ValueDecl *D, MapKind Kind,
8829 OpenMPMapClauseKind MapType,
8830 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8831 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8832 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8833 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8834 if (SkipVarSet.contains(D))
8835 return;
8836 auto It = Info.try_emplace(D, Total).first;
8837 It->second[Kind].emplace_back(
8838 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8839 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8840 };
8841
8842 for (const auto *Cl : Clauses) {
8843 const auto *C = dyn_cast<OMPMapClause>(Cl);
8844 if (!C)
8845 continue;
8846 MapKind Kind = Other;
8847 if (llvm::is_contained(C->getMapTypeModifiers(),
8848 OMPC_MAP_MODIFIER_present))
8849 Kind = Present;
8850 else if (C->getMapType() == OMPC_MAP_alloc)
8851 Kind = Allocs;
8852 const auto *EI = C->getVarRefs().begin();
8853 for (const auto L : C->component_lists()) {
8854 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8855 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8856 C->getMapTypeModifiers(), {},
8857 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8858 E);
8859 ++EI;
8860 }
8861 }
8862 for (const auto *Cl : Clauses) {
8863 const auto *C = dyn_cast<OMPToClause>(Cl);
8864 if (!C)
8865 continue;
8866 MapKind Kind = Other;
8867 if (llvm::is_contained(C->getMotionModifiers(),
8868 OMPC_MOTION_MODIFIER_present))
8869 Kind = Present;
8870 if (llvm::is_contained(C->getMotionModifiers(),
8871 OMPC_MOTION_MODIFIER_iterator)) {
8872 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8873 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8874 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8875 CGF.EmitVarDecl(*VD);
8876 }
8877 }
8878
8879 const auto *EI = C->getVarRefs().begin();
8880 for (const auto L : C->component_lists()) {
8881 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8882 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8883 C->isImplicit(), std::get<2>(L), *EI);
8884 ++EI;
8885 }
8886 }
8887 for (const auto *Cl : Clauses) {
8888 const auto *C = dyn_cast<OMPFromClause>(Cl);
8889 if (!C)
8890 continue;
8891 MapKind Kind = Other;
8892 if (llvm::is_contained(C->getMotionModifiers(),
8893 OMPC_MOTION_MODIFIER_present))
8894 Kind = Present;
8895 if (llvm::is_contained(C->getMotionModifiers(),
8896 OMPC_MOTION_MODIFIER_iterator)) {
8897 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8898 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8899 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8900 CGF.EmitVarDecl(*VD);
8901 }
8902 }
8903
8904 const auto *EI = C->getVarRefs().begin();
8905 for (const auto L : C->component_lists()) {
8906 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8907 C->getMotionModifiers(),
8908 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8909 *EI);
8910 ++EI;
8911 }
8912 }
8913
8914 // Look at the use_device_ptr and use_device_addr clauses information and
8915 // mark the existing map entries as such. If there is no map information for
8916 // an entry in the use_device_ptr and use_device_addr list, we create one
8917 // with map type 'return_param' and zero size section. It is the user's
8918 // fault if that was not mapped before. If there is no map information, then
8919 // we defer the emission of that entry until all the maps for the same VD
8920 // have been handled.
8921 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8922
8923 auto &&UseDeviceDataCombinedInfoGen =
8924 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8925 CodeGenFunction &CGF, bool IsDevAddr) {
8926 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8927 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8928 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8929 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8930 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8931 // FIXME: For use_device_addr on array-sections, this should
8932 // be the starting address of the section.
8933 // e.g. int *p;
8934 // ... use_device_addr(p[3])
8935 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8936 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8937 UseDeviceDataCombinedInfo.Sizes.push_back(
8938 llvm::Constant::getNullValue(CGF.Int64Ty));
8939 UseDeviceDataCombinedInfo.Types.push_back(
8940 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8941 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8942 };
8943
8944 auto &&MapInfoGen =
8945 [&UseDeviceDataCombinedInfoGen](
8946 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8948 Components,
8949 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false) {
8950 // We didn't find any match in our map information - generate a zero
8951 // size array section.
8952 llvm::Value *Ptr;
8953 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8954 if (IE->isGLValue())
8955 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8956 else
8957 Ptr = CGF.EmitScalarExpr(IE);
8958 } else {
8959 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8960 }
8961 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8962 // For the purpose of address-translation, treat something like the
8963 // following:
8964 // int *p;
8965 // ... use_device_addr(p[1])
8966 // equivalent to
8967 // ... use_device_ptr(p)
8968 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
8969 !TreatDevAddrAsDevPtr);
8970 };
8971
8972 auto &&IsMapInfoExist = [&Info, this](CodeGenFunction &CGF,
8973 const ValueDecl *VD, const Expr *IE,
8974 const Expr *DesiredAttachPtrExpr,
8975 bool IsDevAddr) -> bool {
8976 // We potentially have map information for this declaration already.
8977 // Look for the first set of components that refer to it. If found,
8978 // return true.
8979 // If the first component is a member expression, we have to look into
8980 // 'this', which maps to null in the map of map information. Otherwise
8981 // look directly for the information.
8982 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8983 if (It != Info.end()) {
8984 bool Found = false;
8985 for (auto &Data : It->second) {
8986 MapInfo *CI = nullptr;
8987 // We potentially have multiple maps for the same decl. We need to
8988 // only consider those for which the attach-ptr matches the desired
8989 // attach-ptr.
8990 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
8991 if (MI.Components.back().getAssociatedDeclaration() != VD)
8992 return false;
8993
8994 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
8995 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
8996 DesiredAttachPtrExpr);
8997 return Match;
8998 });
8999
9000 if (It != Data.end())
9001 CI = &*It;
9002
9003 if (CI) {
9004 if (IsDevAddr) {
9005 CI->ForDeviceAddr = true;
9006 CI->ReturnDevicePointer = true;
9007 Found = true;
9008 break;
9009 } else {
9010 auto PrevCI = std::next(CI->Components.rbegin());
9011 const auto *VarD = dyn_cast<VarDecl>(VD);
9012 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
9013 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9014 isa<MemberExpr>(IE) ||
9015 !VD->getType().getNonReferenceType()->isPointerType() ||
9016 PrevCI == CI->Components.rend() ||
9017 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
9018 VarD->hasLocalStorage() ||
9019 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
9020 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
9021 CI->ForDeviceAddr = IsDevAddr;
9022 CI->ReturnDevicePointer = true;
9023 Found = true;
9024 break;
9025 }
9026 }
9027 }
9028 }
9029 return Found;
9030 }
9031 return false;
9032 };
9033
9034 // Look at the use_device_ptr clause information and mark the existing map
9035 // entries as such. If there is no map information for an entry in the
9036 // use_device_ptr list, we create one with map type 'alloc' and zero size
9037 // section. It is the user fault if that was not mapped before. If there is
9038 // no map information and the pointer is a struct member, then we defer the
9039 // emission of that entry until the whole struct has been processed.
9040 for (const auto *Cl : Clauses) {
9041 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
9042 if (!C)
9043 continue;
9044 for (const auto L : C->component_lists()) {
9046 std::get<1>(L);
9047 assert(!Components.empty() &&
9048 "Not expecting empty list of components!");
9049 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9051 const Expr *IE = Components.back().getAssociatedExpression();
9052 // For use_device_ptr, we match an existing map clause if its attach-ptr
9053 // is same as the use_device_ptr operand. e.g.
9054 // map expr | use_device_ptr expr | current behavior
9055 // ---------|---------------------|-----------------
9056 // p[1] | p | match
9057 // ps->a | ps | match
9058 // p | p | no match
9059 const Expr *UDPOperandExpr =
9060 Components.front().getAssociatedExpression();
9061 if (IsMapInfoExist(CGF, VD, IE,
9062 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9063 /*IsDevAddr=*/false))
9064 continue;
9065 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false);
9066 }
9067 }
9068
9069 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9070 for (const auto *Cl : Clauses) {
9071 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
9072 if (!C)
9073 continue;
9074 for (const auto L : C->component_lists()) {
9076 std::get<1>(L);
9077 assert(!std::get<1>(L).empty() &&
9078 "Not expecting empty list of components!");
9079 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
9080 if (!Processed.insert(VD).second)
9081 continue;
9083 // For use_device_addr, we match an existing map clause if the
9084 // use_device_addr operand's attach-ptr matches the map operand's
9085 // attach-ptr.
9086 // We chould also restrict to only match cases when there is a full
9087 // match between the map/use_device_addr clause exprs, but that may be
9088 // unnecessary.
9089 //
9090 // map expr | use_device_addr expr | current | possible restrictive/
9091 // | | behavior | safer behavior
9092 // ---------|----------------------|-----------|-----------------------
9093 // p | p | match | match
9094 // p[0] | p[0] | match | match
9095 // p[0:1] | p[0] | match | no match
9096 // p[0:1] | p[2:1] | match | no match
9097 // p[1] | p[0] | match | no match
9098 // ps->a | ps->b | match | no match
9099 // p | p[0] | no match | no match
9100 // pp | pp[0][0] | no match | no match
9101 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9102 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
9103 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9104 "use_device_addr operand has an attach-ptr, but does not match "
9105 "last component's expr.");
9106 if (IsMapInfoExist(CGF, VD, IE,
9107 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9108 /*IsDevAddr=*/true))
9109 continue;
9110 MapInfoGen(CGF, IE, VD, Components,
9111 /*IsDevAddr=*/true,
9112 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9113 }
9114 }
9115
9116 for (const auto &Data : Info) {
9117 MapCombinedInfoTy CurInfo;
9118 const Decl *D = Data.first;
9119 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
9120 // Group component lists by their AttachPtrExpr and process them in order
9121 // of increasing complexity (nullptr first, then simple expressions like
9122 // p, then more complex ones like p[0], etc.)
9123 //
9124 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9125 // grouping for target constructs.
9126 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9127
9128 // First, collect all MapData entries with their attach-ptr exprs.
9129 for (const auto &M : Data.second) {
9130 for (const MapInfo &L : M) {
9131 assert(!L.Components.empty() &&
9132 "Not expecting declaration with no component lists.");
9133
9134 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9135 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9136 }
9137 }
9138
9139 // Next, sort by increasing order of their complexity.
9140 llvm::stable_sort(AttachPtrMapInfoPairs,
9141 [this](const auto &LHS, const auto &RHS) {
9142 return AttachPtrComparator(LHS.first, RHS.first);
9143 });
9144
9145 // And finally, process them all in order, grouping those with
9146 // equivalent attach-ptr exprs together.
9147 auto *It = AttachPtrMapInfoPairs.begin();
9148 while (It != AttachPtrMapInfoPairs.end()) {
9149 const Expr *AttachPtrExpr = It->first;
9150
9151 SmallVector<MapInfo, 8> GroupLists;
9152 while (It != AttachPtrMapInfoPairs.end() &&
9153 (It->first == AttachPtrExpr ||
9154 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9155 GroupLists.push_back(It->second);
9156 ++It;
9157 }
9158 assert(!GroupLists.empty() && "GroupLists should not be empty");
9159
9160 StructRangeInfoTy PartialStruct;
9161 AttachInfoTy AttachInfo;
9162 MapCombinedInfoTy GroupCurInfo;
9163 // Current group's struct base information:
9164 MapCombinedInfoTy GroupStructBaseCurInfo;
9165 for (const MapInfo &L : GroupLists) {
9166 // Remember the current base pointer index.
9167 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9168 unsigned StructBasePointersIdx =
9169 GroupStructBaseCurInfo.BasePointers.size();
9170
9171 GroupCurInfo.NonContigInfo.IsNonContiguous =
9172 L.Components.back().isNonContiguous();
9173 generateInfoForComponentList(
9174 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9175 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9176 /*IsFirstComponentList=*/false, L.IsImplicit,
9177 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9178 L.VarRef, /*OverlappedElements*/ {});
9179
9180 // If this entry relates to a device pointer, set the relevant
9181 // declaration and add the 'return pointer' flag.
9182 if (L.ReturnDevicePointer) {
9183 // Check whether a value was added to either GroupCurInfo or
9184 // GroupStructBaseCurInfo and error if no value was added to either
9185 // of them:
9186 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9187 StructBasePointersIdx <
9188 GroupStructBaseCurInfo.BasePointers.size()) &&
9189 "Unexpected number of mapped base pointers.");
9190
9191 // Choose a base pointer index which is always valid:
9192 const ValueDecl *RelevantVD =
9193 L.Components.back().getAssociatedDeclaration();
9194 assert(RelevantVD &&
9195 "No relevant declaration related with device pointer??");
9196
9197 // If GroupStructBaseCurInfo has been updated this iteration then
9198 // work on the first new entry added to it i.e. make sure that when
9199 // multiple values are added to any of the lists, the first value
9200 // added is being modified by the assignments below (not the last
9201 // value added).
9202 if (StructBasePointersIdx <
9203 GroupStructBaseCurInfo.BasePointers.size()) {
9204 GroupStructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
9205 RelevantVD;
9206 GroupStructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
9207 L.ForDeviceAddr ? DeviceInfoTy::Address
9208 : DeviceInfoTy::Pointer;
9209 GroupStructBaseCurInfo.Types[StructBasePointersIdx] |=
9210 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9211 } else {
9212 GroupCurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
9213 GroupCurInfo.DevicePointers[CurrentBasePointersIdx] =
9214 L.ForDeviceAddr ? DeviceInfoTy::Address
9215 : DeviceInfoTy::Pointer;
9216 GroupCurInfo.Types[CurrentBasePointersIdx] |=
9217 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9218 }
9219 }
9220 }
9221
9222 // Unify entries in one list making sure the struct mapping precedes the
9223 // individual fields:
9224 MapCombinedInfoTy GroupUnionCurInfo;
9225 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9226 GroupUnionCurInfo.append(GroupCurInfo);
9227
9228 // If there is an entry in PartialStruct it means we have a struct with
9229 // individual members mapped. Emit an extra combined entry.
9230 if (PartialStruct.Base.isValid()) {
9231 // Prepend a synthetic dimension of length 1 to represent the
9232 // aggregated struct object. Using 1 (not 0, as 0 produced an
9233 // incorrect non-contiguous descriptor (DimSize==1), causing the
9234 // non-contiguous motion clause path to be skipped.) is important:
9235 // * It preserves the correct rank so targetDataUpdate() computes
9236 // DimSize == 2 for cases like strided array sections originating
9237 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9238 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9239 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9240 emitCombinedEntry(
9241 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9242 /*IsMapThis=*/!VD, OMPBuilder, VD,
9243 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9244 /*NotTargetParams=*/true);
9245 }
9246
9247 // Append this group's results to the overall CurInfo in the correct
9248 // order: combined-entry -> original-field-entries -> attach-entry
9249 CurInfo.append(GroupUnionCurInfo);
9250 if (AttachInfo.isValid())
9251 emitAttachEntry(CGF, CurInfo, AttachInfo);
9252 }
9253
9254 // We need to append the results of this capture to what we already have.
9255 CombinedInfo.append(CurInfo);
9256 }
9257 // Append data for use_device_ptr/addr clauses.
9258 CombinedInfo.append(UseDeviceDataCombinedInfo);
9259 }
9260
9261public:
9262 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9263 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9264 // Extract firstprivate clause information.
9265 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9266 for (const auto *D : C->varlist())
9267 FirstPrivateDecls.try_emplace(
9268 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9269 // Extract implicit firstprivates from uses_allocators clauses.
9270 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9271 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9272 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9273 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9274 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9275 /*Implicit=*/true);
9276 else if (const auto *VD = dyn_cast<VarDecl>(
9277 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9278 ->getDecl()))
9279 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9280 }
9281 }
9282 // Extract defaultmap clause information.
9283 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9284 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9285 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9286 // Extract device pointer clause information.
9287 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9288 for (auto L : C->component_lists())
9289 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9290 // Extract device addr clause information.
9291 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9292 for (auto L : C->component_lists())
9293 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9294 // Extract map information.
9295 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9296 if (C->getMapType() != OMPC_MAP_to)
9297 continue;
9298 for (auto L : C->component_lists()) {
9299 const ValueDecl *VD = std::get<0>(L);
9300 const auto *RD = VD ? VD->getType()
9301 .getCanonicalType()
9302 .getNonReferenceType()
9303 ->getAsCXXRecordDecl()
9304 : nullptr;
9305 if (RD && RD->isLambda())
9306 LambdasMap.try_emplace(std::get<0>(L), C);
9307 }
9308 }
9309
9310 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9311 for (auto L : C->component_lists()) {
9313 std::get<1>(L);
9314 if (!Components.empty())
9315 collectAttachPtrExprInfo(Components, CurDir);
9316 }
9317 };
9318
9319 // Populate the AttachPtrExprMap for all component lists from map-related
9320 // clauses.
9321 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9322 CollectAttachPtrExprsForClauseComponents(C);
9323 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9324 CollectAttachPtrExprsForClauseComponents(C);
9325 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9326 CollectAttachPtrExprsForClauseComponents(C);
9327 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9328 CollectAttachPtrExprsForClauseComponents(C);
9329 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9330 CollectAttachPtrExprsForClauseComponents(C);
9331 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9332 CollectAttachPtrExprsForClauseComponents(C);
9333 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9334 CollectAttachPtrExprsForClauseComponents(C);
9335 }
9336
9337 /// Constructor for the declare mapper directive.
9338 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9339 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9340
9341 /// Generate code for the combined entry if we have a partially mapped struct
9342 /// and take care of the mapping flags of the arguments corresponding to
9343 /// individual struct members.
9344 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9345 /// to the combined-entry's begin address, if emitted.
9346 /// \p PartialStruct contains attach base-pointer information.
9347 /// \returns The index of the combined entry if one was added, std::nullopt
9348 /// otherwise.
9349 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9350 MapFlagsArrayTy &CurTypes,
9351 const StructRangeInfoTy &PartialStruct,
9352 AttachInfoTy &AttachInfo, bool IsMapThis,
9353 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9354 unsigned OffsetForMemberOfFlag,
9355 bool NotTargetParams) const {
9356 if (CurTypes.size() == 1 &&
9357 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9358 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9359 !PartialStruct.IsArraySection)
9360 return;
9361 Address LBAddr = PartialStruct.LowestElem.second;
9362 Address HBAddr = PartialStruct.HighestElem.second;
9363 if (PartialStruct.HasCompleteRecord) {
9364 LBAddr = PartialStruct.LB;
9365 HBAddr = PartialStruct.LB;
9366 }
9367 CombinedInfo.Exprs.push_back(VD);
9368 // Base is the base of the struct
9369 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9370 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9371 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9372 // Pointer is the address of the lowest element
9373 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9374 const CXXMethodDecl *MD =
9375 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9376 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9377 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9378 // There should not be a mapper for a combined entry.
9379 if (HasBaseClass) {
9380 // OpenMP 5.2 148:21:
9381 // If the target construct is within a class non-static member function,
9382 // and a variable is an accessible data member of the object for which the
9383 // non-static data member function is invoked, the variable is treated as
9384 // if the this[:1] expression had appeared in a map clause with a map-type
9385 // of tofrom.
9386 // Emit this[:1]
9387 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9388 QualType Ty = MD->getFunctionObjectParameterType();
9389 llvm::Value *Size =
9390 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9391 /*isSigned=*/true);
9392 CombinedInfo.Sizes.push_back(Size);
9393 } else {
9394 CombinedInfo.Pointers.push_back(LB);
9395 // Size is (addr of {highest+1} element) - (addr of lowest element)
9396 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9397 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9398 HBAddr.getElementType(), HB, /*Idx0=*/1);
9399 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9400 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9401 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9402 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9403 /*isSigned=*/false);
9404 CombinedInfo.Sizes.push_back(Size);
9405 }
9406 CombinedInfo.Mappers.push_back(nullptr);
9407 // Map type is always TARGET_PARAM, if generate info for captures.
9408 CombinedInfo.Types.push_back(
9409 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9410 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9411 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9412 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9413 // If any element has the present modifier, then make sure the runtime
9414 // doesn't attempt to allocate the struct.
9415 if (CurTypes.end() !=
9416 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9417 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9418 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9419 }))
9420 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9421 // Remove TARGET_PARAM flag from the first element
9422 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9423 // If any element has the ompx_hold modifier, then make sure the runtime
9424 // uses the hold reference count for the struct as a whole so that it won't
9425 // be unmapped by an extra dynamic reference count decrement. Add it to all
9426 // elements as well so the runtime knows which reference count to check
9427 // when determining whether it's time for device-to-host transfers of
9428 // individual elements.
9429 if (CurTypes.end() !=
9430 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9431 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9432 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9433 })) {
9434 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9435 for (auto &M : CurTypes)
9436 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9437 }
9438
9439 // All other current entries will be MEMBER_OF the combined entry
9440 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9441 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9442 // to be handled by themselves, after all other maps).
9443 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9444 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9445 for (auto &M : CurTypes)
9446 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9447
9448 // When we are emitting a combined entry. If there were any pending
9449 // attachments to be done, we do them to the begin address of the combined
9450 // entry. Note that this means only one attachment per combined-entry will
9451 // be done. So, for instance, if we have:
9452 // S *ps;
9453 // ... map(ps->a, ps->b)
9454 // When we are emitting a combined entry. If AttachInfo is valid,
9455 // update the pointee address to point to the begin address of the combined
9456 // entry. This ensures that if we have multiple maps like:
9457 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9458 //
9459 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9460 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9461 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9462 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9463 if (AttachInfo.isValid())
9464 AttachInfo.AttachPteeAddr = LBAddr;
9465 }
9466
9467 /// Generate all the base pointers, section pointers, sizes, map types, and
9468 /// mappers for the extracted mappable expressions (all included in \a
9469 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9470 /// pair of the relevant declaration and index where it occurs is appended to
9471 /// the device pointers info array.
9472 void generateAllInfo(
9473 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9474 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9475 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9476 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9477 "Expect a executable directive");
9478 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9479 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9480 SkipVarSet);
9481 }
9482
9483 /// Generate all the base pointers, section pointers, sizes, map types, and
9484 /// mappers for the extracted map clauses of user-defined mapper (all included
9485 /// in \a CombinedInfo).
9486 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9487 llvm::OpenMPIRBuilder &OMPBuilder) const {
9488 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9489 "Expect a declare mapper directive");
9490 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9491 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9492 OMPBuilder);
9493 }
9494
9495 /// Emit capture info for lambdas for variables captured by reference.
9496 void generateInfoForLambdaCaptures(
9497 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9498 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9499 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9500 const auto *RD = VDType->getAsCXXRecordDecl();
9501 if (!RD || !RD->isLambda())
9502 return;
9503 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9504 CGF.getContext().getDeclAlign(VD));
9505 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9506 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9507 FieldDecl *ThisCapture = nullptr;
9508 RD->getCaptureFields(Captures, ThisCapture);
9509 if (ThisCapture) {
9510 LValue ThisLVal =
9511 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9512 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9513 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9514 VDLVal.getPointer(CGF));
9515 CombinedInfo.Exprs.push_back(VD);
9516 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9517 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9518 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9519 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9520 CombinedInfo.Sizes.push_back(
9521 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9522 CGF.Int64Ty, /*isSigned=*/true));
9523 CombinedInfo.Types.push_back(
9524 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9525 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9526 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9527 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9528 CombinedInfo.Mappers.push_back(nullptr);
9529 }
9530 for (const LambdaCapture &LC : RD->captures()) {
9531 if (!LC.capturesVariable())
9532 continue;
9533 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9534 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9535 continue;
9536 auto It = Captures.find(VD);
9537 assert(It != Captures.end() && "Found lambda capture without field.");
9538 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9539 if (LC.getCaptureKind() == LCK_ByRef) {
9540 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9541 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9542 VDLVal.getPointer(CGF));
9543 CombinedInfo.Exprs.push_back(VD);
9544 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9545 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9546 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9547 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9548 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9549 CGF.getTypeSize(
9551 CGF.Int64Ty, /*isSigned=*/true));
9552 } else {
9553 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9554 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9555 VDLVal.getPointer(CGF));
9556 CombinedInfo.Exprs.push_back(VD);
9557 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9558 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9559 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9560 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9561 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9562 }
9563 CombinedInfo.Types.push_back(
9564 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9565 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9566 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9567 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9568 CombinedInfo.Mappers.push_back(nullptr);
9569 }
9570 }
9571
9572 /// Set correct indices for lambdas captures.
9573 void adjustMemberOfForLambdaCaptures(
9574 llvm::OpenMPIRBuilder &OMPBuilder,
9575 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9576 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9577 MapFlagsArrayTy &Types) const {
9578 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9579 // Set correct member_of idx for all implicit lambda captures.
9580 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9581 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9582 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9583 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9584 continue;
9585 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9586 assert(BasePtr && "Unable to find base lambda address.");
9587 int TgtIdx = -1;
9588 for (unsigned J = I; J > 0; --J) {
9589 unsigned Idx = J - 1;
9590 if (Pointers[Idx] != BasePtr)
9591 continue;
9592 TgtIdx = Idx;
9593 break;
9594 }
9595 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9596 // All other current entries will be MEMBER_OF the combined entry
9597 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9598 // 0xFFFF in the MEMBER_OF field).
9599 OpenMPOffloadMappingFlags MemberOfFlag =
9600 OMPBuilder.getMemberOfFlag(TgtIdx);
9601 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9602 }
9603 }
9604
9605 /// Populate component lists for non-lambda captured variables from map,
9606 /// is_device_ptr and has_device_addr clause info.
9607 void populateComponentListsForNonLambdaCaptureFromClauses(
9608 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9609 SmallVectorImpl<
9610 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9611 &StorageForImplicitlyAddedComponentLists) const {
9612 if (VD && LambdasMap.count(VD))
9613 return;
9614
9615 // For member fields list in is_device_ptr, store it in
9616 // DeclComponentLists for generating components info.
9618 auto It = DevPointersMap.find(VD);
9619 if (It != DevPointersMap.end())
9620 for (const auto &MCL : It->second)
9621 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9622 /*IsImpicit = */ true, nullptr,
9623 nullptr);
9624 auto I = HasDevAddrsMap.find(VD);
9625 if (I != HasDevAddrsMap.end())
9626 for (const auto &MCL : I->second)
9627 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9628 /*IsImpicit = */ true, nullptr,
9629 nullptr);
9630 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9631 "Expect a executable directive");
9632 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9633 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9634 const auto *EI = C->getVarRefs().begin();
9635 for (const auto L : C->decl_component_lists(VD)) {
9636 const ValueDecl *VDecl, *Mapper;
9637 // The Expression is not correct if the mapping is implicit
9638 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9640 std::tie(VDecl, Components, Mapper) = L;
9641 assert(VDecl == VD && "We got information for the wrong declaration??");
9642 assert(!Components.empty() &&
9643 "Not expecting declaration with no component lists.");
9644 DeclComponentLists.emplace_back(Components, C->getMapType(),
9645 C->getMapTypeModifiers(),
9646 C->isImplicit(), Mapper, E);
9647 ++EI;
9648 }
9649 }
9650
9651 // For the target construct, if there's a map with a base-pointer that's
9652 // a member of an implicitly captured struct, of the current class,
9653 // we need to emit an implicit map on the pointer.
9654 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9655 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9656 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9657
9658 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9659 const MapData &RHS) {
9660 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9661 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9662 bool HasPresent =
9663 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9664 bool HasAllocs = MapType == OMPC_MAP_alloc;
9665 MapModifiers = std::get<2>(RHS);
9666 MapType = std::get<1>(LHS);
9667 bool HasPresentR =
9668 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9669 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9670 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9671 });
9672 }
9673
9674 /// On a target construct, if there's an implicit map on a struct, or that of
9675 /// this[:], and an explicit map with a member of that struct/class as the
9676 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9677 /// to make sure we don't map the full struct/class. For example:
9678 ///
9679 /// \code
9680 /// struct S {
9681 /// int dummy[10000];
9682 /// int *p;
9683 /// void f1() {
9684 /// #pragma omp target map(p[0:1])
9685 /// (void)this;
9686 /// }
9687 /// }; S s;
9688 ///
9689 /// void f2() {
9690 /// #pragma omp target map(s.p[0:10])
9691 /// (void)s;
9692 /// }
9693 /// \endcode
9694 ///
9695 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9696 //
9697 // OpenMP 6.0: 7.9.6 map clause, pg 285
9698 // If a list item with an implicitly determined data-mapping attribute does
9699 // not have any corresponding storage in the device data environment prior to
9700 // a task encountering the construct associated with the map clause, and one
9701 // or more contiguous parts of the original storage are either list items or
9702 // base pointers to list items that are explicitly mapped on the construct,
9703 // only those parts of the original storage will have corresponding storage in
9704 // the device data environment as a result of the map clauses on the
9705 // construct.
9706 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9707 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9708 SmallVectorImpl<
9709 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9710 &ComponentVectorStorage) const {
9711 bool IsThisCapture = CapturedVD == nullptr;
9712
9713 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9715 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9716 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9717 if (!AttachPtrExpr)
9718 continue;
9719
9720 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9721 if (!ME)
9722 continue;
9723
9724 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9725
9726 // If we are handling a "this" capture, then we are looking for
9727 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9728 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9729 continue;
9730
9731 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9732 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9733 continue;
9734
9735 // For non-this captures, we are looking for attach-ptrs of form
9736 // `s.p`.
9737 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9738 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9739 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9740 continue;
9741
9742 // Check if we have an existing map on either:
9743 // this[:], s, this->p, or s.p, in which case, we don't need to add
9744 // an implicit one for the attach-ptr s.p/this->p.
9745 bool FoundExistingMap = false;
9746 for (const MapData &ExistingL : DeclComponentLists) {
9748 ExistingComponents = std::get<0>(ExistingL);
9749
9750 if (ExistingComponents.empty())
9751 continue;
9752
9753 // First check if we have a map like map(this->p) or map(s.p).
9754 const auto &FirstComponent = ExistingComponents.front();
9755 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9756
9757 if (!FirstExpr)
9758 continue;
9759
9760 // First check if we have a map like map(this->p) or map(s.p).
9761 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9762 FoundExistingMap = true;
9763 break;
9764 }
9765
9766 // Check if we have a map like this[0:1]
9767 if (IsThisCapture) {
9768 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9769 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9770 FoundExistingMap = true;
9771 break;
9772 }
9773 }
9774 continue;
9775 }
9776
9777 // When the attach-ptr is something like `s.p`, check if
9778 // `s` itself is mapped explicitly.
9779 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9780 if (DRE->getDecl() == CapturedVD) {
9781 FoundExistingMap = true;
9782 break;
9783 }
9784 }
9785 }
9786
9787 if (FoundExistingMap)
9788 continue;
9789
9790 // If no base map is found, we need to create an implicit map for the
9791 // attach-pointer expr.
9792
9793 ComponentVectorStorage.emplace_back();
9794 auto &AttachPtrComponents = ComponentVectorStorage.back();
9795
9797 bool SeenAttachPtrComponent = false;
9798 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9799 // components from the component-list which has `s.p/this->p`
9800 // as the attach-ptr, starting from the component which matches
9801 // `s.p/this->p`. This way, we'll have component-lists of
9802 // `s.p` -> `s`, and `this->p` -> `this`.
9803 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9804 const auto &Component = ComponentsWithAttachPtr[i];
9805 const Expr *ComponentExpr = Component.getAssociatedExpression();
9806
9807 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9808 continue;
9809 SeenAttachPtrComponent = true;
9810
9811 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9812 Component.getAssociatedDeclaration(),
9813 Component.isNonContiguous());
9814 }
9815 assert(!AttachPtrComponents.empty() &&
9816 "Could not populate component-lists for mapping attach-ptr");
9817
9818 DeclComponentLists.emplace_back(
9819 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9820 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9821 }
9822 }
9823
9824 /// For a capture that has an associated clause, generate the base pointers,
9825 /// section pointers, sizes, map types, and mappers (all included in
9826 /// \a CurCaptureVarInfo).
9827 void generateInfoForCaptureFromClauseInfo(
9828 const MapDataArrayTy &DeclComponentListsFromClauses,
9829 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9830 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9831 unsigned OffsetForMemberOfFlag) const {
9832 assert(!Cap->capturesVariableArrayType() &&
9833 "Not expecting to generate map info for a variable array type!");
9834
9835 // We need to know when we generating information for the first component
9836 const ValueDecl *VD = Cap->capturesThis()
9837 ? nullptr
9838 : Cap->getCapturedVar()->getCanonicalDecl();
9839
9840 // for map(to: lambda): skip here, processing it in
9841 // generateDefaultMapInfo
9842 if (LambdasMap.count(VD))
9843 return;
9844
9845 // If this declaration appears in a is_device_ptr clause we just have to
9846 // pass the pointer by value. If it is a reference to a declaration, we just
9847 // pass its value.
9848 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9849 CurCaptureVarInfo.Exprs.push_back(VD);
9850 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9851 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9852 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9853 CurCaptureVarInfo.Pointers.push_back(Arg);
9854 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9855 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9856 /*isSigned=*/true));
9857 CurCaptureVarInfo.Types.push_back(
9858 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9859 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9860 CurCaptureVarInfo.Mappers.push_back(nullptr);
9861 return;
9862 }
9863
9864 auto GenerateInfoForComponentLists =
9865 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9866 bool IsEligibleForTargetParamFlag) {
9867 MapCombinedInfoTy CurInfoForComponentLists;
9868 StructRangeInfoTy PartialStruct;
9869 AttachInfoTy AttachInfo;
9870
9871 if (DeclComponentListsFromClauses.empty())
9872 return;
9873
9874 generateInfoForCaptureFromComponentLists(
9875 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9876 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9877
9878 // If there is an entry in PartialStruct it means we have a
9879 // struct with individual members mapped. Emit an extra combined
9880 // entry.
9881 if (PartialStruct.Base.isValid()) {
9882 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9883 emitCombinedEntry(
9884 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9885 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9886 /*VD=*/nullptr, OffsetForMemberOfFlag,
9887 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9888 }
9889
9890 // We do the appends to get the entries in the following order:
9891 // combined-entry -> individual-field-entries -> attach-entry,
9892 CurCaptureVarInfo.append(CurInfoForComponentLists);
9893 if (AttachInfo.isValid())
9894 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9895 };
9896
9897 // Group component lists by their AttachPtrExpr and process them in order
9898 // of increasing complexity (nullptr first, then simple expressions like p,
9899 // then more complex ones like p[0], etc.)
9900 //
9901 // This ensure that we:
9902 // * handle maps that can contribute towards setting the kernel argument,
9903 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9904 // * allocate a single contiguous storage for all exprs with the same
9905 // captured var and having the same attach-ptr.
9906 //
9907 // Example: The map clauses below should be handled grouped together based
9908 // on their attachable-base-pointers:
9909 // map-clause | attachable-base-pointer
9910 // --------------------------+------------------------
9911 // map(p, ps) | nullptr
9912 // map(p[0]) | p
9913 // map(p[0]->b, p[0]->c) | p[0]
9914 // map(ps->d, ps->e, ps->pt) | ps
9915 // map(ps->pt->d, ps->pt->e) | ps->pt
9916
9917 // First, collect all MapData entries with their attach-ptr exprs.
9918 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9919
9920 for (const MapData &L : DeclComponentListsFromClauses) {
9922 std::get<0>(L);
9923 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9924 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9925 }
9926
9927 // Next, sort by increasing order of their complexity.
9928 llvm::stable_sort(AttachPtrMapDataPairs,
9929 [this](const auto &LHS, const auto &RHS) {
9930 return AttachPtrComparator(LHS.first, RHS.first);
9931 });
9932
9933 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9934 bool IsFirstGroup = true;
9935
9936 // And finally, process them all in order, grouping those with
9937 // equivalent attach-ptr exprs together.
9938 auto *It = AttachPtrMapDataPairs.begin();
9939 while (It != AttachPtrMapDataPairs.end()) {
9940 const Expr *AttachPtrExpr = It->first;
9941
9942 MapDataArrayTy GroupLists;
9943 while (It != AttachPtrMapDataPairs.end() &&
9944 (It->first == AttachPtrExpr ||
9945 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9946 GroupLists.push_back(It->second);
9947 ++It;
9948 }
9949 assert(!GroupLists.empty() && "GroupLists should not be empty");
9950
9951 // Determine if this group of component-lists is eligible for TARGET_PARAM
9952 // flag. Only the first group processed should be eligible, and only if no
9953 // default mapping was done.
9954 bool IsEligibleForTargetParamFlag =
9955 IsFirstGroup && NoDefaultMappingDoneForVD;
9956
9957 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9958 IsFirstGroup = false;
9959 }
9960 }
9961
9962 /// Generate the base pointers, section pointers, sizes, map types, and
9963 /// mappers associated to \a DeclComponentLists for a given capture
9964 /// \a VD (all included in \a CurComponentListInfo).
9965 void generateInfoForCaptureFromComponentLists(
9966 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9967 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9968 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
9969 // Find overlapping elements (including the offset from the base element).
9970 llvm::SmallDenseMap<
9971 const MapData *,
9972 llvm::SmallVector<
9974 4>
9975 OverlappedData;
9976 size_t Count = 0;
9977 for (const MapData &L : DeclComponentLists) {
9979 OpenMPMapClauseKind MapType;
9980 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9981 bool IsImplicit;
9982 const ValueDecl *Mapper;
9983 const Expr *VarRef;
9984 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9985 L;
9986 ++Count;
9987 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9989 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9990 VarRef) = L1;
9991 auto CI = Components.rbegin();
9992 auto CE = Components.rend();
9993 auto SI = Components1.rbegin();
9994 auto SE = Components1.rend();
9995 for (; CI != CE && SI != SE; ++CI, ++SI) {
9996 if (CI->getAssociatedExpression()->getStmtClass() !=
9997 SI->getAssociatedExpression()->getStmtClass())
9998 break;
9999 // Are we dealing with different variables/fields?
10000 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10001 break;
10002 }
10003 // Found overlapping if, at least for one component, reached the head
10004 // of the components list.
10005 if (CI == CE || SI == SE) {
10006 // Ignore it if it is the same component.
10007 if (CI == CE && SI == SE)
10008 continue;
10009 const auto It = (SI == SE) ? CI : SI;
10010 // If one component is a pointer and another one is a kind of
10011 // dereference of this pointer (array subscript, section, dereference,
10012 // etc.), it is not an overlapping.
10013 // Same, if one component is a base and another component is a
10014 // dereferenced pointer memberexpr with the same base.
10015 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
10016 (std::prev(It)->getAssociatedDeclaration() &&
10017 std::prev(It)
10018 ->getAssociatedDeclaration()
10019 ->getType()
10020 ->isPointerType()) ||
10021 (It->getAssociatedDeclaration() &&
10022 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10023 std::next(It) != CE && std::next(It) != SE))
10024 continue;
10025 const MapData &BaseData = CI == CE ? L : L1;
10027 SI == SE ? Components : Components1;
10028 OverlappedData[&BaseData].push_back(SubData);
10029 }
10030 }
10031 }
10032 // Sort the overlapped elements for each item.
10033 llvm::SmallVector<const FieldDecl *, 4> Layout;
10034 if (!OverlappedData.empty()) {
10035 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10036 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10037 while (BaseType != OrigType) {
10038 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10039 OrigType = BaseType->getPointeeOrArrayElementType();
10040 }
10041
10042 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10043 getPlainLayout(CRD, Layout, /*AsBase=*/false);
10044 else {
10045 const auto *RD = BaseType->getAsRecordDecl();
10046 Layout.append(RD->field_begin(), RD->field_end());
10047 }
10048 }
10049 for (auto &Pair : OverlappedData) {
10050 llvm::stable_sort(
10051 Pair.getSecond(),
10052 [&Layout](
10055 Second) {
10056 auto CI = First.rbegin();
10057 auto CE = First.rend();
10058 auto SI = Second.rbegin();
10059 auto SE = Second.rend();
10060 for (; CI != CE && SI != SE; ++CI, ++SI) {
10061 if (CI->getAssociatedExpression()->getStmtClass() !=
10062 SI->getAssociatedExpression()->getStmtClass())
10063 break;
10064 // Are we dealing with different variables/fields?
10065 if (CI->getAssociatedDeclaration() !=
10066 SI->getAssociatedDeclaration())
10067 break;
10068 }
10069
10070 // Lists contain the same elements.
10071 if (CI == CE && SI == SE)
10072 return false;
10073
10074 // List with less elements is less than list with more elements.
10075 if (CI == CE || SI == SE)
10076 return CI == CE;
10077
10078 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
10079 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
10080 if (FD1->getParent() == FD2->getParent())
10081 return FD1->getFieldIndex() < FD2->getFieldIndex();
10082 const auto *It =
10083 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
10084 return FD == FD1 || FD == FD2;
10085 });
10086 return *It == FD1;
10087 });
10088 }
10089
10090 // Associated with a capture, because the mapping flags depend on it.
10091 // Go through all of the elements with the overlapped elements.
10092 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10093 MapCombinedInfoTy StructBaseCombinedInfo;
10094 for (const auto &Pair : OverlappedData) {
10095 const MapData &L = *Pair.getFirst();
10097 OpenMPMapClauseKind MapType;
10098 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10099 bool IsImplicit;
10100 const ValueDecl *Mapper;
10101 const Expr *VarRef;
10102 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10103 L;
10104 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10105 OverlappedComponents = Pair.getSecond();
10106 generateInfoForComponentList(
10107 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10108 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
10109 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10110 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
10111 AddTargetParamFlag = false;
10112 }
10113 // Go through other elements without overlapped elements.
10114 for (const MapData &L : DeclComponentLists) {
10116 OpenMPMapClauseKind MapType;
10117 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10118 bool IsImplicit;
10119 const ValueDecl *Mapper;
10120 const Expr *VarRef;
10121 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10122 L;
10123 auto It = OverlappedData.find(&L);
10124 if (It == OverlappedData.end())
10125 generateInfoForComponentList(
10126 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10127 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10128 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10129 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10130 /*OverlappedElements*/ {});
10131 AddTargetParamFlag = false;
10132 }
10133 }
10134
10135 /// Check if a variable should be treated as firstprivate due to explicit
10136 /// firstprivate clause or defaultmap(firstprivate:...).
10137 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10138 // Check explicit firstprivate clauses (not implicit from defaultmap)
10139 auto I = FirstPrivateDecls.find(VD);
10140 if (I != FirstPrivateDecls.end() && !I->getSecond())
10141 return true; // Explicit firstprivate only
10142
10143 // Check defaultmap(firstprivate:scalar) for scalar types
10144 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10145 if (Type->isScalarType())
10146 return true;
10147 }
10148
10149 // Check defaultmap(firstprivate:pointer) for pointer types
10150 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10151 if (Type->isAnyPointerType())
10152 return true;
10153 }
10154
10155 // Check defaultmap(firstprivate:aggregate) for aggregate types
10156 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10157 if (Type->isAggregateType())
10158 return true;
10159 }
10160
10161 // Check defaultmap(firstprivate:all) for all types
10162 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10163 }
10164
10165 /// Generate the default map information for a given capture \a CI,
10166 /// record field declaration \a RI and captured value \a CV.
10167 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10168 const FieldDecl &RI, llvm::Value *CV,
10169 MapCombinedInfoTy &CombinedInfo) const {
10170 bool IsImplicit = true;
10171 // Do the default mapping.
10172 if (CI.capturesThis()) {
10173 CombinedInfo.Exprs.push_back(nullptr);
10174 CombinedInfo.BasePointers.push_back(CV);
10175 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10176 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10177 CombinedInfo.Pointers.push_back(CV);
10178 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10179 CombinedInfo.Sizes.push_back(
10180 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10181 CGF.Int64Ty, /*isSigned=*/true));
10182 // Default map type.
10183 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10184 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10185 } else if (CI.capturesVariableByCopy()) {
10186 const VarDecl *VD = CI.getCapturedVar();
10187 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10188 CombinedInfo.BasePointers.push_back(CV);
10189 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10190 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10191 CombinedInfo.Pointers.push_back(CV);
10192 bool IsFirstprivate =
10193 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10194
10195 if (!RI.getType()->isAnyPointerType()) {
10196 // We have to signal to the runtime captures passed by value that are
10197 // not pointers.
10198 CombinedInfo.Types.push_back(
10199 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10200 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10201 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10202 } else if (IsFirstprivate) {
10203 // Firstprivate pointers should be passed by value (as literals)
10204 // without performing a present table lookup at runtime.
10205 CombinedInfo.Types.push_back(
10206 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10207 // Use zero size for pointer literals (just passing the pointer value)
10208 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10209 } else {
10210 // Pointers are implicitly mapped with a zero size and no flags
10211 // (other than first map that is added for all implicit maps).
10212 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10213 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10214 }
10215 auto I = FirstPrivateDecls.find(VD);
10216 if (I != FirstPrivateDecls.end())
10217 IsImplicit = I->getSecond();
10218 } else {
10219 assert(CI.capturesVariable() && "Expected captured reference.");
10220 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10221 QualType ElementType = PtrTy->getPointeeType();
10222 const VarDecl *VD = CI.getCapturedVar();
10223 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10224 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10225 CombinedInfo.BasePointers.push_back(CV);
10226 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10227 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10228
10229 // For firstprivate pointers, pass by value instead of dereferencing
10230 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10231 // Treat as a literal value (pass the pointer value itself)
10232 CombinedInfo.Pointers.push_back(CV);
10233 // Use zero size for pointer literals
10234 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10235 CombinedInfo.Types.push_back(
10236 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10237 } else {
10238 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10239 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10240 // The default map type for a scalar/complex type is 'to' because by
10241 // default the value doesn't have to be retrieved. For an aggregate
10242 // type, the default is 'tofrom'.
10243 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10244 CombinedInfo.Pointers.push_back(CV);
10245 }
10246 auto I = FirstPrivateDecls.find(VD);
10247 if (I != FirstPrivateDecls.end())
10248 IsImplicit = I->getSecond();
10249 }
10250 // Every default map produces a single argument which is a target parameter.
10251 CombinedInfo.Types.back() |=
10252 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10253
10254 // Add flag stating this is an implicit map.
10255 if (IsImplicit)
10256 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10257
10258 // No user-defined mapper for default mapping.
10259 CombinedInfo.Mappers.push_back(nullptr);
10260 }
10261};
10262} // anonymous namespace
10263
10264// Try to extract the base declaration from a `this->x` expression if possible.
10266 if (!E)
10267 return nullptr;
10268
10269 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10270 if (const MemberExpr *ME =
10271 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10272 return ME->getMemberDecl();
10273 return nullptr;
10274}
10275
10276/// Emit a string constant containing the names of the values mapped to the
10277/// offloading runtime library.
10278static llvm::Constant *
10279emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10280 MappableExprsHandler::MappingExprInfo &MapExprs) {
10281
10282 uint32_t SrcLocStrSize;
10283 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10284 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10285
10286 SourceLocation Loc;
10287 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10288 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10289 Loc = VD->getLocation();
10290 else
10291 Loc = MapExprs.getMapExpr()->getExprLoc();
10292 } else {
10293 Loc = MapExprs.getMapDecl()->getLocation();
10294 }
10295
10296 std::string ExprName;
10297 if (MapExprs.getMapExpr()) {
10299 llvm::raw_string_ostream OS(ExprName);
10300 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10301 } else {
10302 ExprName = MapExprs.getMapDecl()->getNameAsString();
10303 }
10304
10305 std::string FileName;
10307 if (auto *DbgInfo = CGF.getDebugInfo())
10308 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10309 else
10310 FileName = PLoc.getFilename();
10311 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10312 PLoc.getColumn(), SrcLocStrSize);
10313}
10314/// Emit the arrays used to pass the captures and map information to the
10315/// offloading runtime library. If there is no map or capture information,
10316/// return nullptr by reference.
10318 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10319 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10320 bool IsNonContiguous = false, bool ForEndCall = false) {
10321 CodeGenModule &CGM = CGF.CGM;
10322
10323 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10324 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10325 CGF.AllocaInsertPt->getIterator());
10326 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10327 CGF.Builder.GetInsertPoint());
10328
10329 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10330 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10331 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10332 }
10333 };
10334
10335 auto CustomMapperCB = [&](unsigned int I) {
10336 llvm::Function *MFunc = nullptr;
10337 if (CombinedInfo.Mappers[I]) {
10338 Info.HasMapper = true;
10340 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10341 }
10342 return MFunc;
10343 };
10344 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10345 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10346 IsNonContiguous, ForEndCall, DeviceAddrCB));
10347}
10348
10349/// Check for inner distribute directive.
10350static const OMPExecutableDirective *
10352 const auto *CS = D.getInnermostCapturedStmt();
10353 const auto *Body =
10354 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10355 const Stmt *ChildStmt =
10357
10358 if (const auto *NestedDir =
10359 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10360 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10361 switch (D.getDirectiveKind()) {
10362 case OMPD_target:
10363 // For now, treat 'target' with nested 'teams loop' as if it's
10364 // distributed (target teams distribute).
10365 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10366 return NestedDir;
10367 if (DKind == OMPD_teams) {
10368 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10369 /*IgnoreCaptured=*/true);
10370 if (!Body)
10371 return nullptr;
10372 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10373 if (const auto *NND =
10374 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10375 DKind = NND->getDirectiveKind();
10376 if (isOpenMPDistributeDirective(DKind))
10377 return NND;
10378 }
10379 }
10380 return nullptr;
10381 case OMPD_target_teams:
10382 if (isOpenMPDistributeDirective(DKind))
10383 return NestedDir;
10384 return nullptr;
10385 case OMPD_target_parallel:
10386 case OMPD_target_simd:
10387 case OMPD_target_parallel_for:
10388 case OMPD_target_parallel_for_simd:
10389 return nullptr;
10390 case OMPD_target_teams_distribute:
10391 case OMPD_target_teams_distribute_simd:
10392 case OMPD_target_teams_distribute_parallel_for:
10393 case OMPD_target_teams_distribute_parallel_for_simd:
10394 case OMPD_parallel:
10395 case OMPD_for:
10396 case OMPD_parallel_for:
10397 case OMPD_parallel_master:
10398 case OMPD_parallel_sections:
10399 case OMPD_for_simd:
10400 case OMPD_parallel_for_simd:
10401 case OMPD_cancel:
10402 case OMPD_cancellation_point:
10403 case OMPD_ordered:
10404 case OMPD_threadprivate:
10405 case OMPD_allocate:
10406 case OMPD_task:
10407 case OMPD_simd:
10408 case OMPD_tile:
10409 case OMPD_unroll:
10410 case OMPD_sections:
10411 case OMPD_section:
10412 case OMPD_single:
10413 case OMPD_master:
10414 case OMPD_critical:
10415 case OMPD_taskyield:
10416 case OMPD_barrier:
10417 case OMPD_taskwait:
10418 case OMPD_taskgroup:
10419 case OMPD_atomic:
10420 case OMPD_flush:
10421 case OMPD_depobj:
10422 case OMPD_scan:
10423 case OMPD_teams:
10424 case OMPD_target_data:
10425 case OMPD_target_exit_data:
10426 case OMPD_target_enter_data:
10427 case OMPD_distribute:
10428 case OMPD_distribute_simd:
10429 case OMPD_distribute_parallel_for:
10430 case OMPD_distribute_parallel_for_simd:
10431 case OMPD_teams_distribute:
10432 case OMPD_teams_distribute_simd:
10433 case OMPD_teams_distribute_parallel_for:
10434 case OMPD_teams_distribute_parallel_for_simd:
10435 case OMPD_target_update:
10436 case OMPD_declare_simd:
10437 case OMPD_declare_variant:
10438 case OMPD_begin_declare_variant:
10439 case OMPD_end_declare_variant:
10440 case OMPD_declare_target:
10441 case OMPD_end_declare_target:
10442 case OMPD_declare_reduction:
10443 case OMPD_declare_mapper:
10444 case OMPD_taskloop:
10445 case OMPD_taskloop_simd:
10446 case OMPD_master_taskloop:
10447 case OMPD_master_taskloop_simd:
10448 case OMPD_parallel_master_taskloop:
10449 case OMPD_parallel_master_taskloop_simd:
10450 case OMPD_requires:
10451 case OMPD_metadirective:
10452 case OMPD_unknown:
10453 default:
10454 llvm_unreachable("Unexpected directive.");
10455 }
10456 }
10457
10458 return nullptr;
10459}
10460
10461/// Emit the user-defined mapper function. The code generation follows the
10462/// pattern in the example below.
10463/// \code
10464/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10465/// void *base, void *begin,
10466/// int64_t size, int64_t type,
10467/// void *name = nullptr) {
10468/// // Allocate space for an array section first or add a base/begin for
10469/// // pointer dereference.
10470/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
10471/// !maptype.IsDelete)
10472/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10473/// size*sizeof(Ty), clearToFromMember(type));
10474/// // Map members.
10475/// for (unsigned i = 0; i < size; i++) {
10476/// // For each component specified by this mapper:
10477/// for (auto c : begin[i]->all_components) {
10478/// if (c.hasMapper())
10479/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10480/// c.arg_type, c.arg_name);
10481/// else
10482/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10483/// c.arg_begin, c.arg_size, c.arg_type,
10484/// c.arg_name);
10485/// }
10486/// }
10487/// // Delete the array section.
10488/// if (size > 1 && maptype.IsDelete)
10489/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10490/// size*sizeof(Ty), clearToFromMember(type));
10491/// }
10492/// \endcode
10494 CodeGenFunction *CGF) {
10495 if (UDMMap.count(D) > 0)
10496 return;
10497 ASTContext &C = CGM.getContext();
10498 QualType Ty = D->getType();
10499 auto *MapperVarDecl =
10501 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10502 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10503
10504 CodeGenFunction MapperCGF(CGM);
10505 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10506 auto PrivatizeAndGenMapInfoCB =
10507 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10508 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10509 MapperCGF.Builder.restoreIP(CodeGenIP);
10510
10511 // Privatize the declared variable of mapper to be the current array
10512 // element.
10513 Address PtrCurrent(
10514 PtrPHI, ElemTy,
10515 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10516 .getAlignment()
10517 .alignmentOfArrayElement(ElementSize));
10519 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10520 (void)Scope.Privatize();
10521
10522 // Get map clause information.
10523 MappableExprsHandler MEHandler(*D, MapperCGF);
10524 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10525
10526 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10527 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10528 };
10529 if (CGM.getCodeGenOpts().getDebugInfo() !=
10530 llvm::codegenoptions::NoDebugInfo) {
10531 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10532 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10533 FillInfoMap);
10534 }
10535
10536 return CombinedInfo;
10537 };
10538
10539 auto CustomMapperCB = [&](unsigned I) {
10540 llvm::Function *MapperFunc = nullptr;
10541 if (CombinedInfo.Mappers[I]) {
10542 // Call the corresponding mapper function.
10544 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10545 assert(MapperFunc && "Expect a valid mapper function is available.");
10546 }
10547 return MapperFunc;
10548 };
10549
10550 SmallString<64> TyStr;
10551 llvm::raw_svector_ostream Out(TyStr);
10552 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10553 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10554
10555 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10556 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10557 UDMMap.try_emplace(D, NewFn);
10558 if (CGF)
10559 FunctionUDMMap[CGF->CurFn].push_back(D);
10560}
10561
10563 const OMPDeclareMapperDecl *D) {
10564 auto I = UDMMap.find(D);
10565 if (I != UDMMap.end())
10566 return I->second;
10568 return UDMMap.lookup(D);
10569}
10570
10573 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10574 const OMPLoopDirective &D)>
10575 SizeEmitter) {
10576 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10577 const OMPExecutableDirective *TD = &D;
10578 // Get nested teams distribute kind directive, if any. For now, treat
10579 // 'target_teams_loop' as if it's really a target_teams_distribute.
10580 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10581 Kind != OMPD_target_teams_loop)
10582 TD = getNestedDistributeDirective(CGM.getContext(), D);
10583 if (!TD)
10584 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10585
10586 const auto *LD = cast<OMPLoopDirective>(TD);
10587 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10588 return NumIterations;
10589 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10590}
10591
10592static void
10593emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10594 const OMPExecutableDirective &D,
10596 bool RequiresOuterTask, const CapturedStmt &CS,
10597 bool OffloadingMandatory, CodeGenFunction &CGF) {
10598 if (OffloadingMandatory) {
10599 CGF.Builder.CreateUnreachable();
10600 } else {
10601 if (RequiresOuterTask) {
10602 CapturedVars.clear();
10603 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10604 }
10605 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10606 CapturedVars);
10607 }
10608}
10609
10610static llvm::Value *emitDeviceID(
10611 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10612 CodeGenFunction &CGF) {
10613 // Emit device ID if any.
10614 llvm::Value *DeviceID;
10615 if (Device.getPointer()) {
10616 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10617 Device.getInt() == OMPC_DEVICE_device_num) &&
10618 "Expected device_num modifier.");
10619 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10620 DeviceID =
10621 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10622 } else {
10623 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10624 }
10625 return DeviceID;
10626}
10627
10628static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10630 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10631 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10632
10633 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10634 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10635 llvm::Value *DynGPVal =
10636 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10637 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10638 /*isSigned=*/false);
10639 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10640 switch (FallbackModifier) {
10641 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10642 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10643 break;
10644 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10645 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10646 break;
10647 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10649 // This is the default for dyn_groupprivate.
10650 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10651 break;
10652 default:
10653 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10654 }
10655 } else if (auto *OMPXDynCGClause =
10656 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10657 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10658 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10659 /*IgnoreResultAssign=*/true);
10660 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10661 /*isSigned=*/false);
10662 }
10663 return {DynGP, DynGPFallback};
10664}
10665
10667 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10668 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10669 llvm::OpenMPIRBuilder &OMPBuilder,
10670 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10671 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10672
10673 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10674 auto RI = CS.getCapturedRecordDecl()->field_begin();
10675 auto *CV = CapturedVars.begin();
10677 CE = CS.capture_end();
10678 CI != CE; ++CI, ++RI, ++CV) {
10679 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10680
10681 // VLA sizes are passed to the outlined region by copy and do not have map
10682 // information associated.
10683 if (CI->capturesVariableArrayType()) {
10684 CurInfo.Exprs.push_back(nullptr);
10685 CurInfo.BasePointers.push_back(*CV);
10686 CurInfo.DevicePtrDecls.push_back(nullptr);
10687 CurInfo.DevicePointers.push_back(
10688 MappableExprsHandler::DeviceInfoTy::None);
10689 CurInfo.Pointers.push_back(*CV);
10690 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10691 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10692 // Copy to the device as an argument. No need to retrieve it.
10693 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10694 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10695 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10696 CurInfo.Mappers.push_back(nullptr);
10697 } else {
10698 const ValueDecl *CapturedVD =
10699 CI->capturesThis() ? nullptr
10701 bool HasEntryWithCVAsAttachPtr = false;
10702 if (CapturedVD)
10703 HasEntryWithCVAsAttachPtr =
10704 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10705
10706 // Populate component lists for the captured variable from clauses.
10707 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10710 StorageForImplicitlyAddedComponentLists;
10711 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10712 CapturedVD, DeclComponentLists,
10713 StorageForImplicitlyAddedComponentLists);
10714
10715 // OpenMP 6.0, 15.8, target construct, restrictions:
10716 // * A list item in a map clause that is specified on a target construct
10717 // must have a base variable or base pointer.
10718 //
10719 // Map clauses on a target construct must either have a base pointer, or a
10720 // base-variable. So, if we don't have a base-pointer, that means that it
10721 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10722 // etc. In such cases, we do not need to handle default map generation
10723 // for `s`.
10724 bool HasEntryWithoutAttachPtr =
10725 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10727 Components = std::get<0>(MapData);
10728 return !MEHandler.getAttachPtrExpr(Components);
10729 });
10730
10731 // Generate default map info first if there's no direct map with CV as
10732 // the base-variable, or attach pointer.
10733 if (DeclComponentLists.empty() ||
10734 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10735 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10736
10737 // If we have any information in the map clause, we use it, otherwise we
10738 // just do a default mapping.
10739 MEHandler.generateInfoForCaptureFromClauseInfo(
10740 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10741 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10742
10743 if (!CI->capturesThis())
10744 MappedVarSet.insert(CI->getCapturedVar());
10745 else
10746 MappedVarSet.insert(nullptr);
10747
10748 // Generate correct mapping for variables captured by reference in
10749 // lambdas.
10750 if (CI->capturesVariable())
10751 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10752 CurInfo, LambdaPointers);
10753 }
10754 // We expect to have at least an element of information for this capture.
10755 assert(!CurInfo.BasePointers.empty() &&
10756 "Non-existing map pointer for capture!");
10757 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10758 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10759 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10760 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10761 "Inconsistent map information sizes!");
10762
10763 // We need to append the results of this capture to what we already have.
10764 CombinedInfo.append(CurInfo);
10765 }
10766 // Adjust MEMBER_OF flags for the lambdas captures.
10767 MEHandler.adjustMemberOfForLambdaCaptures(
10768 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10769 CombinedInfo.Pointers, CombinedInfo.Types);
10770}
10771static void
10772genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10773 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10774 llvm::OpenMPIRBuilder &OMPBuilder,
10775 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10776 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10777
10778 CodeGenModule &CGM = CGF.CGM;
10779 // Map any list items in a map clause that were not captures because they
10780 // weren't referenced within the construct.
10781 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10782
10783 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10784 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10785 };
10786 if (CGM.getCodeGenOpts().getDebugInfo() !=
10787 llvm::codegenoptions::NoDebugInfo) {
10788 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10789 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10790 FillInfoMap);
10791 }
10792}
10793
10795 const CapturedStmt &CS,
10797 llvm::OpenMPIRBuilder &OMPBuilder,
10798 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10799 // Get mappable expression information.
10800 MappableExprsHandler MEHandler(D, CGF);
10801 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10802
10803 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10804 MappedVarSet, CombinedInfo);
10805 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10806}
10807
10808template <typename ClauseTy>
10809static void
10811 const OMPExecutableDirective &D,
10813 const auto *C = D.getSingleClause<ClauseTy>();
10814 assert(!C->varlist_empty() &&
10815 "ompx_bare requires explicit num_teams and thread_limit");
10817 for (auto *E : C->varlist()) {
10818 llvm::Value *V = CGF.EmitScalarExpr(E);
10819 Values.push_back(
10820 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10821 }
10822}
10823
10825 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10826 const OMPExecutableDirective &D,
10827 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10828 const CapturedStmt &CS, bool OffloadingMandatory,
10829 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10830 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10831 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10832 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10833 const OMPLoopDirective &D)>
10834 SizeEmitter,
10835 CodeGenFunction &CGF, CodeGenModule &CGM) {
10836 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10837
10838 // Fill up the arrays with all the captured variables.
10839 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10841 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10842
10843 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10844 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10845
10846 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10847 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10848 CGF.VoidPtrTy, CGM.getPointerAlign());
10849 InputInfo.PointersArray =
10850 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10851 InputInfo.SizesArray =
10852 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10853 InputInfo.MappersArray =
10854 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10855 MapTypesArray = Info.RTArgs.MapTypesArray;
10856 MapNamesArray = Info.RTArgs.MapNamesArray;
10857
10858 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10859 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10860 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10861 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10862 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10863
10864 if (IsReverseOffloading) {
10865 // Reverse offloading is not supported, so just execute on the host.
10866 // FIXME: This fallback solution is incorrect since it ignores the
10867 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10868 // assert here and ensure SEMA emits an error.
10869 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10870 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10871 return;
10872 }
10873
10874 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10875 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10876
10877 llvm::Value *BasePointersArray =
10878 InputInfo.BasePointersArray.emitRawPointer(CGF);
10879 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10880 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10881 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10882
10883 auto &&EmitTargetCallFallbackCB =
10884 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10885 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10886 -> llvm::OpenMPIRBuilder::InsertPointTy {
10887 CGF.Builder.restoreIP(IP);
10888 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10889 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10890 return CGF.Builder.saveIP();
10891 };
10892
10893 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10896 if (IsBare) {
10899 NumThreads);
10900 } else {
10901 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10902 NumThreads.push_back(
10903 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10904 }
10905
10906 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10907 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10908 llvm::Value *NumIterations =
10909 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10910 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10911 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10912 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10913
10914 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10915 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10916 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10917
10918 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10919 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10920 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10921
10922 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10923 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10924 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10925 RTLoc, AllocaIP));
10926 CGF.Builder.restoreIP(AfterIP);
10927 };
10928
10929 if (RequiresOuterTask)
10930 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10931 else
10932 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10933}
10934
10935static void
10936emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10937 const OMPExecutableDirective &D,
10939 bool RequiresOuterTask, const CapturedStmt &CS,
10940 bool OffloadingMandatory, CodeGenFunction &CGF) {
10941
10942 // Notify that the host version must be executed.
10943 auto &&ElseGen =
10944 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10945 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10946 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10947 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10948 };
10949
10950 if (RequiresOuterTask) {
10952 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10953 } else {
10954 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10955 }
10956}
10957
10960 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10961 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10962 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10963 const OMPLoopDirective &D)>
10964 SizeEmitter) {
10965 if (!CGF.HaveInsertPoint())
10966 return;
10967
10968 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10969 CGM.getLangOpts().OpenMPOffloadMandatory;
10970
10971 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10972
10973 const bool RequiresOuterTask =
10974 D.hasClausesOfKind<OMPDependClause>() ||
10975 D.hasClausesOfKind<OMPNowaitClause>() ||
10976 D.hasClausesOfKind<OMPInReductionClause>() ||
10977 (CGM.getLangOpts().OpenMP >= 51 &&
10978 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10979 D.hasClausesOfKind<OMPThreadLimitClause>());
10981 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10982 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10983 PrePostActionTy &) {
10984 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10985 };
10986 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10987
10989 llvm::Value *MapTypesArray = nullptr;
10990 llvm::Value *MapNamesArray = nullptr;
10991
10992 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10993 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10994 OutlinedFnID, &InputInfo, &MapTypesArray,
10995 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10996 PrePostActionTy &) {
10997 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10998 RequiresOuterTask, CS, OffloadingMandatory,
10999 Device, OutlinedFnID, InputInfo, MapTypesArray,
11000 MapNamesArray, SizeEmitter, CGF, CGM);
11001 };
11002
11003 auto &&TargetElseGen =
11004 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11005 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11006 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11007 CS, OffloadingMandatory, CGF);
11008 };
11009
11010 // If we have a target function ID it means that we need to support
11011 // offloading, otherwise, just execute on the host. We need to execute on host
11012 // regardless of the conditional in the if clause if, e.g., the user do not
11013 // specify target triples.
11014 if (OutlinedFnID) {
11015 if (IfCond) {
11016 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
11017 } else {
11018 RegionCodeGenTy ThenRCG(TargetThenGen);
11019 ThenRCG(CGF);
11020 }
11021 } else {
11022 RegionCodeGenTy ElseRCG(TargetElseGen);
11023 ElseRCG(CGF);
11024 }
11025}
11026
11028 StringRef ParentName) {
11029 if (!S)
11030 return;
11031
11032 // Register vtable from device for target data and target directives.
11033 // Add this block here since scanForTargetRegionsFunctions ignores
11034 // target data by checking if S is a executable directive (target).
11035 if (auto *E = dyn_cast<OMPExecutableDirective>(S);
11036 E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
11037 // Don't need to check if it's device compile
11038 // since scanForTargetRegionsFunctions currently only called
11039 // in device compilation.
11040 registerVTable(*E);
11041 }
11042
11043 // Codegen OMP target directives that offload compute to the device.
11044 bool RequiresDeviceCodegen =
11047 cast<OMPExecutableDirective>(S)->getDirectiveKind());
11048
11049 if (RequiresDeviceCodegen) {
11050 const auto &E = *cast<OMPExecutableDirective>(S);
11051
11052 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11053 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
11054
11055 // Is this a target region that should not be emitted as an entry point? If
11056 // so just signal we are done with this target region.
11057 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11058 return;
11059
11060 switch (E.getDirectiveKind()) {
11061 case OMPD_target:
11064 break;
11065 case OMPD_target_parallel:
11067 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
11068 break;
11069 case OMPD_target_teams:
11071 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
11072 break;
11073 case OMPD_target_teams_distribute:
11076 break;
11077 case OMPD_target_teams_distribute_simd:
11080 break;
11081 case OMPD_target_parallel_for:
11084 break;
11085 case OMPD_target_parallel_for_simd:
11088 break;
11089 case OMPD_target_simd:
11091 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
11092 break;
11093 case OMPD_target_teams_distribute_parallel_for:
11095 CGM, ParentName,
11097 break;
11098 case OMPD_target_teams_distribute_parallel_for_simd:
11101 CGM, ParentName,
11103 break;
11104 case OMPD_target_teams_loop:
11107 break;
11108 case OMPD_target_parallel_loop:
11111 break;
11112 case OMPD_parallel:
11113 case OMPD_for:
11114 case OMPD_parallel_for:
11115 case OMPD_parallel_master:
11116 case OMPD_parallel_sections:
11117 case OMPD_for_simd:
11118 case OMPD_parallel_for_simd:
11119 case OMPD_cancel:
11120 case OMPD_cancellation_point:
11121 case OMPD_ordered:
11122 case OMPD_threadprivate:
11123 case OMPD_allocate:
11124 case OMPD_task:
11125 case OMPD_simd:
11126 case OMPD_tile:
11127 case OMPD_unroll:
11128 case OMPD_sections:
11129 case OMPD_section:
11130 case OMPD_single:
11131 case OMPD_master:
11132 case OMPD_critical:
11133 case OMPD_taskyield:
11134 case OMPD_barrier:
11135 case OMPD_taskwait:
11136 case OMPD_taskgroup:
11137 case OMPD_atomic:
11138 case OMPD_flush:
11139 case OMPD_depobj:
11140 case OMPD_scan:
11141 case OMPD_teams:
11142 case OMPD_target_data:
11143 case OMPD_target_exit_data:
11144 case OMPD_target_enter_data:
11145 case OMPD_distribute:
11146 case OMPD_distribute_simd:
11147 case OMPD_distribute_parallel_for:
11148 case OMPD_distribute_parallel_for_simd:
11149 case OMPD_teams_distribute:
11150 case OMPD_teams_distribute_simd:
11151 case OMPD_teams_distribute_parallel_for:
11152 case OMPD_teams_distribute_parallel_for_simd:
11153 case OMPD_target_update:
11154 case OMPD_declare_simd:
11155 case OMPD_declare_variant:
11156 case OMPD_begin_declare_variant:
11157 case OMPD_end_declare_variant:
11158 case OMPD_declare_target:
11159 case OMPD_end_declare_target:
11160 case OMPD_declare_reduction:
11161 case OMPD_declare_mapper:
11162 case OMPD_taskloop:
11163 case OMPD_taskloop_simd:
11164 case OMPD_master_taskloop:
11165 case OMPD_master_taskloop_simd:
11166 case OMPD_parallel_master_taskloop:
11167 case OMPD_parallel_master_taskloop_simd:
11168 case OMPD_requires:
11169 case OMPD_metadirective:
11170 case OMPD_unknown:
11171 default:
11172 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11173 }
11174 return;
11175 }
11176
11177 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11178 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11179 return;
11180
11181 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11182 return;
11183 }
11184
11185 // If this is a lambda function, look into its body.
11186 if (const auto *L = dyn_cast<LambdaExpr>(S))
11187 S = L->getBody();
11188
11189 // Keep looking for target regions recursively.
11190 for (const Stmt *II : S->children())
11191 scanForTargetRegionsFunctions(II, ParentName);
11192}
11193
11194static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11195 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11196 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11197 if (!DevTy)
11198 return false;
11199 // Do not emit device_type(nohost) functions for the host.
11200 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11201 return true;
11202 // Do not emit device_type(host) functions for the device.
11203 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11204 return true;
11205 return false;
11206}
11207
11209 // If emitting code for the host, we do not process FD here. Instead we do
11210 // the normal code generation.
11211 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11212 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11214 CGM.getLangOpts().OpenMPIsTargetDevice))
11215 return true;
11216 return false;
11217 }
11218
11219 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11220 // Try to detect target regions in the function.
11221 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11222 StringRef Name = CGM.getMangledName(GD);
11225 CGM.getLangOpts().OpenMPIsTargetDevice))
11226 return true;
11227 }
11228
11229 // Do not to emit function if it is not marked as declare target.
11230 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11231 AlreadyEmittedTargetDecls.count(VD) == 0;
11232}
11233
11236 CGM.getLangOpts().OpenMPIsTargetDevice))
11237 return true;
11238
11239 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11240 return false;
11241
11242 // Check if there are Ctors/Dtors in this declaration and look for target
11243 // regions in it. We use the complete variant to produce the kernel name
11244 // mangling.
11245 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11246 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11247 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11248 StringRef ParentName =
11249 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11250 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11251 }
11252 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11253 StringRef ParentName =
11254 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11255 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11256 }
11257 }
11258
11259 // Do not to emit variable if it is not marked as declare target.
11260 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11261 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11262 cast<VarDecl>(GD.getDecl()));
11263 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11264 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11265 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11268 return true;
11269 }
11270 return false;
11271}
11272
11274 llvm::Constant *Addr) {
11275 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11276 !CGM.getLangOpts().OpenMPIsTargetDevice)
11277 return;
11278
11279 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11280 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11281
11282 // If this is an 'extern' declaration we defer to the canonical definition and
11283 // do not emit an offloading entry.
11284 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11285 VD->hasExternalStorage())
11286 return;
11287
11288 if (!Res) {
11289 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11290 // Register non-target variables being emitted in device code (debug info
11291 // may cause this).
11292 StringRef VarName = CGM.getMangledName(VD);
11293 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11294 }
11295 return;
11296 }
11297
11298 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11299 auto LinkageForVariable = [&VD, this]() {
11300 return CGM.getLLVMLinkageVarDefinition(VD);
11301 };
11302
11303 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11304 OMPBuilder.registerTargetGlobalVariable(
11306 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11307 VD->isExternallyVisible(),
11309 VD->getCanonicalDecl()->getBeginLoc()),
11310 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11311 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11312 CGM.getTypes().ConvertTypeForMem(
11313 CGM.getContext().getPointerType(VD->getType())),
11314 Addr);
11315
11316 for (auto *ref : GeneratedRefs)
11317 CGM.addCompilerUsedGlobal(ref);
11318}
11319
11321 if (isa<FunctionDecl>(GD.getDecl()) ||
11323 return emitTargetFunctions(GD);
11324
11325 return emitTargetGlobalVariable(GD);
11326}
11327
11329 for (const VarDecl *VD : DeferredGlobalVariables) {
11330 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11331 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11332 if (!Res)
11333 continue;
11334 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11335 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11337 CGM.EmitGlobal(VD);
11338 } else {
11339 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11340 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11341 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11343 "Expected link clause or to clause with unified memory.");
11344 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11345 }
11346 }
11347}
11348
11350 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11351 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11352 " Expected target-based directive.");
11353}
11354
11356 for (const OMPClause *Clause : D->clauselists()) {
11357 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11359 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11360 } else if (const auto *AC =
11361 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11362 switch (AC->getAtomicDefaultMemOrderKind()) {
11363 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11364 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11365 break;
11366 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11367 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11368 break;
11369 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11370 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11371 break;
11373 break;
11374 }
11375 }
11376 }
11377}
11378
11379llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11381}
11382
11384 LangAS &AS) {
11385 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11386 return false;
11387 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11388 switch(A->getAllocatorType()) {
11389 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11390 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11391 // Not supported, fallback to the default mem space.
11392 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11393 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11394 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11395 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11396 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11397 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11398 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11399 AS = LangAS::Default;
11400 return true;
11401 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11402 llvm_unreachable("Expected predefined allocator for the variables with the "
11403 "static storage.");
11404 }
11405 return false;
11406}
11407
11411
11413 CodeGenModule &CGM)
11414 : CGM(CGM) {
11415 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11416 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11417 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11418 }
11419}
11420
11422 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11423 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11424}
11425
11427 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11428 return true;
11429
11430 const auto *D = cast<FunctionDecl>(GD.getDecl());
11431 // Do not to emit function if it is marked as declare target as it was already
11432 // emitted.
11433 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11434 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11435 if (auto *F = dyn_cast_or_null<llvm::Function>(
11436 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11437 return !F->isDeclaration();
11438 return false;
11439 }
11440 return true;
11441 }
11442
11443 return !AlreadyEmittedTargetDecls.insert(D).second;
11444}
11445
11447 const OMPExecutableDirective &D,
11448 SourceLocation Loc,
11449 llvm::Function *OutlinedFn,
11450 ArrayRef<llvm::Value *> CapturedVars) {
11451 if (!CGF.HaveInsertPoint())
11452 return;
11453
11454 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11456
11457 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11458 llvm::Value *Args[] = {
11459 RTLoc,
11460 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11461 OutlinedFn};
11463 RealArgs.append(std::begin(Args), std::end(Args));
11464 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11465
11466 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11467 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11468 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11469}
11470
11472 const Expr *NumTeams,
11473 const Expr *ThreadLimit,
11474 SourceLocation Loc) {
11475 if (!CGF.HaveInsertPoint())
11476 return;
11477
11478 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11479
11480 llvm::Value *NumTeamsVal =
11481 NumTeams
11482 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11483 CGF.CGM.Int32Ty, /* isSigned = */ true)
11484 : CGF.Builder.getInt32(0);
11485
11486 llvm::Value *ThreadLimitVal =
11487 ThreadLimit
11488 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11489 CGF.CGM.Int32Ty, /* isSigned = */ true)
11490 : CGF.Builder.getInt32(0);
11491
11492 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11493 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11494 ThreadLimitVal};
11495 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11496 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11497 PushNumTeamsArgs);
11498}
11499
11501 const Expr *ThreadLimit,
11502 SourceLocation Loc) {
11503 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11504 llvm::Value *ThreadLimitVal =
11505 ThreadLimit
11506 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11507 CGF.CGM.Int32Ty, /* isSigned = */ true)
11508 : CGF.Builder.getInt32(0);
11509
11510 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11511 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11512 ThreadLimitVal};
11513 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11514 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11515 ThreadLimitArgs);
11516}
11517
11519 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11520 const Expr *Device, const RegionCodeGenTy &CodeGen,
11522 if (!CGF.HaveInsertPoint())
11523 return;
11524
11525 // Action used to replace the default codegen action and turn privatization
11526 // off.
11527 PrePostActionTy NoPrivAction;
11528
11529 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11530
11531 llvm::Value *IfCondVal = nullptr;
11532 if (IfCond)
11533 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11534
11535 // Emit device ID if any.
11536 llvm::Value *DeviceID = nullptr;
11537 if (Device) {
11538 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11539 CGF.Int64Ty, /*isSigned=*/true);
11540 } else {
11541 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11542 }
11543
11544 // Fill up the arrays with all the mapped variables.
11545 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11546 auto GenMapInfoCB =
11547 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11548 CGF.Builder.restoreIP(CodeGenIP);
11549 // Get map clause information.
11550 MappableExprsHandler MEHandler(D, CGF);
11551 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11552
11553 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11554 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11555 };
11556 if (CGM.getCodeGenOpts().getDebugInfo() !=
11557 llvm::codegenoptions::NoDebugInfo) {
11558 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11559 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11560 FillInfoMap);
11561 }
11562
11563 return CombinedInfo;
11564 };
11565 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11566 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11567 CGF.Builder.restoreIP(CodeGenIP);
11568 switch (BodyGenType) {
11569 case BodyGenTy::Priv:
11570 if (!Info.CaptureDeviceAddrMap.empty())
11571 CodeGen(CGF);
11572 break;
11573 case BodyGenTy::DupNoPriv:
11574 if (!Info.CaptureDeviceAddrMap.empty()) {
11575 CodeGen.setAction(NoPrivAction);
11576 CodeGen(CGF);
11577 }
11578 break;
11579 case BodyGenTy::NoPriv:
11580 if (Info.CaptureDeviceAddrMap.empty()) {
11581 CodeGen.setAction(NoPrivAction);
11582 CodeGen(CGF);
11583 }
11584 break;
11585 }
11586 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11587 CGF.Builder.GetInsertPoint());
11588 };
11589
11590 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11591 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11592 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11593 }
11594 };
11595
11596 auto CustomMapperCB = [&](unsigned int I) {
11597 llvm::Function *MFunc = nullptr;
11598 if (CombinedInfo.Mappers[I]) {
11599 Info.HasMapper = true;
11601 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11602 }
11603 return MFunc;
11604 };
11605
11606 // Source location for the ident struct
11607 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11608
11609 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11610 CGF.AllocaInsertPt->getIterator());
11611 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11612 CGF.Builder.GetInsertPoint());
11613 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11614 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11615 cantFail(OMPBuilder.createTargetData(
11616 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11617 CustomMapperCB,
11618 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11619 CGF.Builder.restoreIP(AfterIP);
11620}
11621
11623 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11624 const Expr *Device) {
11625 if (!CGF.HaveInsertPoint())
11626 return;
11627
11631 "Expecting either target enter, exit data, or update directives.");
11632
11634 llvm::Value *MapTypesArray = nullptr;
11635 llvm::Value *MapNamesArray = nullptr;
11636 // Generate the code for the opening of the data environment.
11637 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11638 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11639 // Emit device ID if any.
11640 llvm::Value *DeviceID = nullptr;
11641 if (Device) {
11642 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11643 CGF.Int64Ty, /*isSigned=*/true);
11644 } else {
11645 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11646 }
11647
11648 // Emit the number of elements in the offloading arrays.
11649 llvm::Constant *PointerNum =
11650 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11651
11652 // Source location for the ident struct
11653 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11654
11655 SmallVector<llvm::Value *, 13> OffloadingArgs(
11656 {RTLoc, DeviceID, PointerNum,
11657 InputInfo.BasePointersArray.emitRawPointer(CGF),
11658 InputInfo.PointersArray.emitRawPointer(CGF),
11659 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11660 InputInfo.MappersArray.emitRawPointer(CGF)});
11661
11662 // Select the right runtime function call for each standalone
11663 // directive.
11664 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11665 RuntimeFunction RTLFn;
11666 switch (D.getDirectiveKind()) {
11667 case OMPD_target_enter_data:
11668 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11669 : OMPRTL___tgt_target_data_begin_mapper;
11670 break;
11671 case OMPD_target_exit_data:
11672 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11673 : OMPRTL___tgt_target_data_end_mapper;
11674 break;
11675 case OMPD_target_update:
11676 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11677 : OMPRTL___tgt_target_data_update_mapper;
11678 break;
11679 case OMPD_parallel:
11680 case OMPD_for:
11681 case OMPD_parallel_for:
11682 case OMPD_parallel_master:
11683 case OMPD_parallel_sections:
11684 case OMPD_for_simd:
11685 case OMPD_parallel_for_simd:
11686 case OMPD_cancel:
11687 case OMPD_cancellation_point:
11688 case OMPD_ordered:
11689 case OMPD_threadprivate:
11690 case OMPD_allocate:
11691 case OMPD_task:
11692 case OMPD_simd:
11693 case OMPD_tile:
11694 case OMPD_unroll:
11695 case OMPD_sections:
11696 case OMPD_section:
11697 case OMPD_single:
11698 case OMPD_master:
11699 case OMPD_critical:
11700 case OMPD_taskyield:
11701 case OMPD_barrier:
11702 case OMPD_taskwait:
11703 case OMPD_taskgroup:
11704 case OMPD_atomic:
11705 case OMPD_flush:
11706 case OMPD_depobj:
11707 case OMPD_scan:
11708 case OMPD_teams:
11709 case OMPD_target_data:
11710 case OMPD_distribute:
11711 case OMPD_distribute_simd:
11712 case OMPD_distribute_parallel_for:
11713 case OMPD_distribute_parallel_for_simd:
11714 case OMPD_teams_distribute:
11715 case OMPD_teams_distribute_simd:
11716 case OMPD_teams_distribute_parallel_for:
11717 case OMPD_teams_distribute_parallel_for_simd:
11718 case OMPD_declare_simd:
11719 case OMPD_declare_variant:
11720 case OMPD_begin_declare_variant:
11721 case OMPD_end_declare_variant:
11722 case OMPD_declare_target:
11723 case OMPD_end_declare_target:
11724 case OMPD_declare_reduction:
11725 case OMPD_declare_mapper:
11726 case OMPD_taskloop:
11727 case OMPD_taskloop_simd:
11728 case OMPD_master_taskloop:
11729 case OMPD_master_taskloop_simd:
11730 case OMPD_parallel_master_taskloop:
11731 case OMPD_parallel_master_taskloop_simd:
11732 case OMPD_target:
11733 case OMPD_target_simd:
11734 case OMPD_target_teams_distribute:
11735 case OMPD_target_teams_distribute_simd:
11736 case OMPD_target_teams_distribute_parallel_for:
11737 case OMPD_target_teams_distribute_parallel_for_simd:
11738 case OMPD_target_teams:
11739 case OMPD_target_parallel:
11740 case OMPD_target_parallel_for:
11741 case OMPD_target_parallel_for_simd:
11742 case OMPD_requires:
11743 case OMPD_metadirective:
11744 case OMPD_unknown:
11745 default:
11746 llvm_unreachable("Unexpected standalone target data directive.");
11747 break;
11748 }
11749 if (HasNowait) {
11750 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11751 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11752 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11753 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11754 }
11755 CGF.EmitRuntimeCall(
11756 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11757 OffloadingArgs);
11758 };
11759
11760 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11761 &MapNamesArray](CodeGenFunction &CGF,
11762 PrePostActionTy &) {
11763 // Fill up the arrays with all the mapped variables.
11764 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11766 MappableExprsHandler MEHandler(D, CGF);
11767 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11768 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11769 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11770
11771 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11772 D.hasClausesOfKind<OMPNowaitClause>();
11773
11774 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11775 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11776 CGF.VoidPtrTy, CGM.getPointerAlign());
11777 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11778 CGM.getPointerAlign());
11779 InputInfo.SizesArray =
11780 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11781 InputInfo.MappersArray =
11782 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11783 MapTypesArray = Info.RTArgs.MapTypesArray;
11784 MapNamesArray = Info.RTArgs.MapNamesArray;
11785 if (RequiresOuterTask)
11786 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11787 else
11788 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11789 };
11790
11791 if (IfCond) {
11792 emitIfClause(CGF, IfCond, TargetThenGen,
11793 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11794 } else {
11795 RegionCodeGenTy ThenRCG(TargetThenGen);
11796 ThenRCG(CGF);
11797 }
11798}
11799
11800namespace {
11801 /// Kind of parameter in a function with 'declare simd' directive.
11802enum ParamKindTy {
11803 Linear,
11804 LinearRef,
11805 LinearUVal,
11806 LinearVal,
11807 Uniform,
11808 Vector,
11809};
11810/// Attribute set of the parameter.
11811struct ParamAttrTy {
11812 ParamKindTy Kind = Vector;
11813 llvm::APSInt StrideOrArg;
11814 llvm::APSInt Alignment;
11815 bool HasVarStride = false;
11816};
11817} // namespace
11818
11819static unsigned evaluateCDTSize(const FunctionDecl *FD,
11820 ArrayRef<ParamAttrTy> ParamAttrs) {
11821 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11822 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11823 // of that clause. The VLEN value must be power of 2.
11824 // In other case the notion of the function`s "characteristic data type" (CDT)
11825 // is used to compute the vector length.
11826 // CDT is defined in the following order:
11827 // a) For non-void function, the CDT is the return type.
11828 // b) If the function has any non-uniform, non-linear parameters, then the
11829 // CDT is the type of the first such parameter.
11830 // c) If the CDT determined by a) or b) above is struct, union, or class
11831 // type which is pass-by-value (except for the type that maps to the
11832 // built-in complex data type), the characteristic data type is int.
11833 // d) If none of the above three cases is applicable, the CDT is int.
11834 // The VLEN is then determined based on the CDT and the size of vector
11835 // register of that ISA for which current vector version is generated. The
11836 // VLEN is computed using the formula below:
11837 // VLEN = sizeof(vector_register) / sizeof(CDT),
11838 // where vector register size specified in section 3.2.1 Registers and the
11839 // Stack Frame of original AMD64 ABI document.
11840 QualType RetType = FD->getReturnType();
11841 if (RetType.isNull())
11842 return 0;
11843 ASTContext &C = FD->getASTContext();
11844 QualType CDT;
11845 if (!RetType.isNull() && !RetType->isVoidType()) {
11846 CDT = RetType;
11847 } else {
11848 unsigned Offset = 0;
11849 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11850 if (ParamAttrs[Offset].Kind == Vector)
11851 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11852 ++Offset;
11853 }
11854 if (CDT.isNull()) {
11855 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11856 if (ParamAttrs[I + Offset].Kind == Vector) {
11857 CDT = FD->getParamDecl(I)->getType();
11858 break;
11859 }
11860 }
11861 }
11862 }
11863 if (CDT.isNull())
11864 CDT = C.IntTy;
11865 CDT = CDT->getCanonicalTypeUnqualified();
11866 if (CDT->isRecordType() || CDT->isUnionType())
11867 CDT = C.IntTy;
11868 return C.getTypeSize(CDT);
11869}
11870
11871/// Mangle the parameter part of the vector function name according to
11872/// their OpenMP classification. The mangling function is defined in
11873/// section 4.5 of the AAVFABI(2021Q1).
11874static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11875 SmallString<256> Buffer;
11876 llvm::raw_svector_ostream Out(Buffer);
11877 for (const auto &ParamAttr : ParamAttrs) {
11878 switch (ParamAttr.Kind) {
11879 case Linear:
11880 Out << 'l';
11881 break;
11882 case LinearRef:
11883 Out << 'R';
11884 break;
11885 case LinearUVal:
11886 Out << 'U';
11887 break;
11888 case LinearVal:
11889 Out << 'L';
11890 break;
11891 case Uniform:
11892 Out << 'u';
11893 break;
11894 case Vector:
11895 Out << 'v';
11896 break;
11897 }
11898 if (ParamAttr.HasVarStride)
11899 Out << "s" << ParamAttr.StrideOrArg;
11900 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11901 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11902 // Don't print the step value if it is not present or if it is
11903 // equal to 1.
11904 if (ParamAttr.StrideOrArg < 0)
11905 Out << 'n' << -ParamAttr.StrideOrArg;
11906 else if (ParamAttr.StrideOrArg != 1)
11907 Out << ParamAttr.StrideOrArg;
11908 }
11909
11910 if (!!ParamAttr.Alignment)
11911 Out << 'a' << ParamAttr.Alignment;
11912 }
11913
11914 return std::string(Out.str());
11915}
11916
11917static void
11918emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11919 const llvm::APSInt &VLENVal,
11920 ArrayRef<ParamAttrTy> ParamAttrs,
11921 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11922 struct ISADataTy {
11923 char ISA;
11924 unsigned VecRegSize;
11925 };
11926 ISADataTy ISAData[] = {
11927 {
11928 'b', 128
11929 }, // SSE
11930 {
11931 'c', 256
11932 }, // AVX
11933 {
11934 'd', 256
11935 }, // AVX2
11936 {
11937 'e', 512
11938 }, // AVX512
11939 };
11941 switch (State) {
11942 case OMPDeclareSimdDeclAttr::BS_Undefined:
11943 Masked.push_back('N');
11944 Masked.push_back('M');
11945 break;
11946 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11947 Masked.push_back('N');
11948 break;
11949 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11950 Masked.push_back('M');
11951 break;
11952 }
11953 for (char Mask : Masked) {
11954 for (const ISADataTy &Data : ISAData) {
11955 SmallString<256> Buffer;
11956 llvm::raw_svector_ostream Out(Buffer);
11957 Out << "_ZGV" << Data.ISA << Mask;
11958 if (!VLENVal) {
11959 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11960 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11961 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11962 } else {
11963 Out << VLENVal;
11964 }
11965 Out << mangleVectorParameters(ParamAttrs);
11966 Out << '_' << Fn->getName();
11967 Fn->addFnAttr(Out.str());
11968 }
11969 }
11970}
11971
11972// This are the Functions that are needed to mangle the name of the
11973// vector functions generated by the compiler, according to the rules
11974// defined in the "Vector Function ABI specifications for AArch64",
11975// available at
11976// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11977
11978/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11979static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11980 QT = QT.getCanonicalType();
11981
11982 if (QT->isVoidType())
11983 return false;
11984
11985 if (Kind == ParamKindTy::Uniform)
11986 return false;
11987
11988 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11989 return false;
11990
11991 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11992 !QT->isReferenceType())
11993 return false;
11994
11995 return true;
11996}
11997
11998/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
12000 QT = QT.getCanonicalType();
12001 unsigned Size = C.getTypeSize(QT);
12002
12003 // Only scalars and complex within 16 bytes wide set PVB to true.
12004 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
12005 return false;
12006
12007 if (QT->isFloatingType())
12008 return true;
12009
12010 if (QT->isIntegerType())
12011 return true;
12012
12013 if (QT->isPointerType())
12014 return true;
12015
12016 // TODO: Add support for complex types (section 3.1.2, item 2).
12017
12018 return false;
12019}
12020
12021/// Computes the lane size (LS) of a return type or of an input parameter,
12022/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
12023/// TODO: Add support for references, section 3.2.1, item 1.
12024static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
12025 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
12027 if (getAArch64PBV(PTy, C))
12028 return C.getTypeSize(PTy);
12029 }
12030 if (getAArch64PBV(QT, C))
12031 return C.getTypeSize(QT);
12032
12033 return C.getTypeSize(C.getUIntPtrType());
12034}
12035
12036// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12037// signature of the scalar function, as defined in 3.2.2 of the
12038// AAVFABI.
12039static std::tuple<unsigned, unsigned, bool>
12041 QualType RetType = FD->getReturnType().getCanonicalType();
12042
12043 ASTContext &C = FD->getASTContext();
12044
12045 bool OutputBecomesInput = false;
12046
12048 if (!RetType->isVoidType()) {
12049 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
12050 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
12051 OutputBecomesInput = true;
12052 }
12053 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12055 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
12056 }
12057
12058 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12059 // The LS of a function parameter / return value can only be a power
12060 // of 2, starting from 8 bits, up to 128.
12061 assert(llvm::all_of(Sizes,
12062 [](unsigned Size) {
12063 return Size == 8 || Size == 16 || Size == 32 ||
12064 Size == 64 || Size == 128;
12065 }) &&
12066 "Invalid size");
12067
12068 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
12069 OutputBecomesInput);
12070}
12071
12072// Function used to add the attribute. The parameter `VLEN` is
12073// templated to allow the use of "x" when targeting scalable functions
12074// for SVE.
12075template <typename T>
12076static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
12077 char ISA, StringRef ParSeq,
12078 StringRef MangledName, bool OutputBecomesInput,
12079 llvm::Function *Fn) {
12080 SmallString<256> Buffer;
12081 llvm::raw_svector_ostream Out(Buffer);
12082 Out << Prefix << ISA << LMask << VLEN;
12083 if (OutputBecomesInput)
12084 Out << "v";
12085 Out << ParSeq << "_" << MangledName;
12086 Fn->addFnAttr(Out.str());
12087}
12088
12089// Helper function to generate the Advanced SIMD names depending on
12090// the value of the NDS when simdlen is not present.
12091static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
12092 StringRef Prefix, char ISA,
12093 StringRef ParSeq, StringRef MangledName,
12094 bool OutputBecomesInput,
12095 llvm::Function *Fn) {
12096 switch (NDS) {
12097 case 8:
12098 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
12099 OutputBecomesInput, Fn);
12100 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
12101 OutputBecomesInput, Fn);
12102 break;
12103 case 16:
12104 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
12105 OutputBecomesInput, Fn);
12106 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
12107 OutputBecomesInput, Fn);
12108 break;
12109 case 32:
12110 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
12111 OutputBecomesInput, Fn);
12112 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
12113 OutputBecomesInput, Fn);
12114 break;
12115 case 64:
12116 case 128:
12117 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
12118 OutputBecomesInput, Fn);
12119 break;
12120 default:
12121 llvm_unreachable("Scalar type is too wide.");
12122 }
12123}
12124
12125/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
12127 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
12128 ArrayRef<ParamAttrTy> ParamAttrs,
12129 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
12130 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
12131
12132 // Get basic data for building the vector signature.
12133 const auto Data = getNDSWDS(FD, ParamAttrs);
12134 const unsigned NDS = std::get<0>(Data);
12135 const unsigned WDS = std::get<1>(Data);
12136 const bool OutputBecomesInput = std::get<2>(Data);
12137
12138 // Check the values provided via `simdlen` by the user.
12139 // 1. A `simdlen(1)` doesn't produce vector signatures,
12140 if (UserVLEN == 1) {
12141 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12142 return;
12143 }
12144
12145 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
12146 // Advanced SIMD output.
12147 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12148 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12149 return;
12150 }
12151
12152 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
12153 // limits.
12154 if (ISA == 's' && UserVLEN != 0) {
12155 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
12156 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12157 return;
12158 }
12159 }
12160
12161 // Sort out parameter sequence.
12162 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
12163 StringRef Prefix = "_ZGV";
12164 // Generate simdlen from user input (if any).
12165 if (UserVLEN) {
12166 if (ISA == 's') {
12167 // SVE generates only a masked function.
12168 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12169 OutputBecomesInput, Fn);
12170 } else {
12171 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12172 // Advanced SIMD generates one or two functions, depending on
12173 // the `[not]inbranch` clause.
12174 switch (State) {
12175 case OMPDeclareSimdDeclAttr::BS_Undefined:
12176 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12177 OutputBecomesInput, Fn);
12178 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12179 OutputBecomesInput, Fn);
12180 break;
12181 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12182 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12183 OutputBecomesInput, Fn);
12184 break;
12185 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12186 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12187 OutputBecomesInput, Fn);
12188 break;
12189 }
12190 }
12191 } else {
12192 // If no user simdlen is provided, follow the AAVFABI rules for
12193 // generating the vector length.
12194 if (ISA == 's') {
12195 // SVE, section 3.4.1, item 1.
12196 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
12197 OutputBecomesInput, Fn);
12198 } else {
12199 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12200 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
12201 // two vector names depending on the use of the clause
12202 // `[not]inbranch`.
12203 switch (State) {
12204 case OMPDeclareSimdDeclAttr::BS_Undefined:
12205 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12206 OutputBecomesInput, Fn);
12207 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12208 OutputBecomesInput, Fn);
12209 break;
12210 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12211 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12212 OutputBecomesInput, Fn);
12213 break;
12214 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12215 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12216 OutputBecomesInput, Fn);
12217 break;
12218 }
12219 }
12220 }
12221}
12222
12224 llvm::Function *Fn) {
12225 ASTContext &C = CGM.getContext();
12226 FD = FD->getMostRecentDecl();
12227 while (FD) {
12228 // Map params to their positions in function decl.
12229 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12230 if (isa<CXXMethodDecl>(FD))
12231 ParamPositions.try_emplace(FD, 0);
12232 unsigned ParamPos = ParamPositions.size();
12233 for (const ParmVarDecl *P : FD->parameters()) {
12234 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12235 ++ParamPos;
12236 }
12237 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12238 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12239 // Mark uniform parameters.
12240 for (const Expr *E : Attr->uniforms()) {
12241 E = E->IgnoreParenImpCasts();
12242 unsigned Pos;
12243 if (isa<CXXThisExpr>(E)) {
12244 Pos = ParamPositions[FD];
12245 } else {
12246 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12247 ->getCanonicalDecl();
12248 auto It = ParamPositions.find(PVD);
12249 assert(It != ParamPositions.end() && "Function parameter not found");
12250 Pos = It->second;
12251 }
12252 ParamAttrs[Pos].Kind = Uniform;
12253 }
12254 // Get alignment info.
12255 auto *NI = Attr->alignments_begin();
12256 for (const Expr *E : Attr->aligneds()) {
12257 E = E->IgnoreParenImpCasts();
12258 unsigned Pos;
12259 QualType ParmTy;
12260 if (isa<CXXThisExpr>(E)) {
12261 Pos = ParamPositions[FD];
12262 ParmTy = E->getType();
12263 } else {
12264 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12265 ->getCanonicalDecl();
12266 auto It = ParamPositions.find(PVD);
12267 assert(It != ParamPositions.end() && "Function parameter not found");
12268 Pos = It->second;
12269 ParmTy = PVD->getType();
12270 }
12271 ParamAttrs[Pos].Alignment =
12272 (*NI)
12273 ? (*NI)->EvaluateKnownConstInt(C)
12274 : llvm::APSInt::getUnsigned(
12275 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12276 .getQuantity());
12277 ++NI;
12278 }
12279 // Mark linear parameters.
12280 auto *SI = Attr->steps_begin();
12281 auto *MI = Attr->modifiers_begin();
12282 for (const Expr *E : Attr->linears()) {
12283 E = E->IgnoreParenImpCasts();
12284 unsigned Pos;
12285 bool IsReferenceType = false;
12286 // Rescaling factor needed to compute the linear parameter
12287 // value in the mangled name.
12288 unsigned PtrRescalingFactor = 1;
12289 if (isa<CXXThisExpr>(E)) {
12290 Pos = ParamPositions[FD];
12291 auto *P = cast<PointerType>(E->getType());
12292 PtrRescalingFactor = CGM.getContext()
12293 .getTypeSizeInChars(P->getPointeeType())
12294 .getQuantity();
12295 } else {
12296 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12297 ->getCanonicalDecl();
12298 auto It = ParamPositions.find(PVD);
12299 assert(It != ParamPositions.end() && "Function parameter not found");
12300 Pos = It->second;
12301 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12302 PtrRescalingFactor = CGM.getContext()
12303 .getTypeSizeInChars(P->getPointeeType())
12304 .getQuantity();
12305 else if (PVD->getType()->isReferenceType()) {
12306 IsReferenceType = true;
12307 PtrRescalingFactor =
12308 CGM.getContext()
12309 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12310 .getQuantity();
12311 }
12312 }
12313 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12314 if (*MI == OMPC_LINEAR_ref)
12315 ParamAttr.Kind = LinearRef;
12316 else if (*MI == OMPC_LINEAR_uval)
12317 ParamAttr.Kind = LinearUVal;
12318 else if (IsReferenceType)
12319 ParamAttr.Kind = LinearVal;
12320 else
12321 ParamAttr.Kind = Linear;
12322 // Assuming a stride of 1, for `linear` without modifiers.
12323 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12324 if (*SI) {
12326 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12327 if (const auto *DRE =
12328 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12329 if (const auto *StridePVD =
12330 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12331 ParamAttr.HasVarStride = true;
12332 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12333 assert(It != ParamPositions.end() &&
12334 "Function parameter not found");
12335 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12336 }
12337 }
12338 } else {
12339 ParamAttr.StrideOrArg = Result.Val.getInt();
12340 }
12341 }
12342 // If we are using a linear clause on a pointer, we need to
12343 // rescale the value of linear_step with the byte size of the
12344 // pointee type.
12345 if (!ParamAttr.HasVarStride &&
12346 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
12347 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12348 ++SI;
12349 ++MI;
12350 }
12351 llvm::APSInt VLENVal;
12352 SourceLocation ExprLoc;
12353 const Expr *VLENExpr = Attr->getSimdlen();
12354 if (VLENExpr) {
12355 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12356 ExprLoc = VLENExpr->getExprLoc();
12357 }
12358 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12359 if (CGM.getTriple().isX86()) {
12360 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12361 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12362 unsigned VLEN = VLENVal.getExtValue();
12363 StringRef MangledName = Fn->getName();
12364 if (CGM.getTarget().hasFeature("sve"))
12365 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12366 MangledName, 's', 128, Fn, ExprLoc);
12367 else if (CGM.getTarget().hasFeature("neon"))
12368 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12369 MangledName, 'n', 128, Fn, ExprLoc);
12370 }
12371 }
12372 FD = FD->getPreviousDecl();
12373 }
12374}
12375
12376namespace {
12377/// Cleanup action for doacross support.
12378class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12379public:
12380 static const int DoacrossFinArgs = 2;
12381
12382private:
12383 llvm::FunctionCallee RTLFn;
12384 llvm::Value *Args[DoacrossFinArgs];
12385
12386public:
12387 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12388 ArrayRef<llvm::Value *> CallArgs)
12389 : RTLFn(RTLFn) {
12390 assert(CallArgs.size() == DoacrossFinArgs);
12391 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12392 }
12393 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12394 if (!CGF.HaveInsertPoint())
12395 return;
12396 CGF.EmitRuntimeCall(RTLFn, Args);
12397 }
12398};
12399} // namespace
12400
12402 const OMPLoopDirective &D,
12403 ArrayRef<Expr *> NumIterations) {
12404 if (!CGF.HaveInsertPoint())
12405 return;
12406
12407 ASTContext &C = CGM.getContext();
12408 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12409 RecordDecl *RD;
12410 if (KmpDimTy.isNull()) {
12411 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12412 // kmp_int64 lo; // lower
12413 // kmp_int64 up; // upper
12414 // kmp_int64 st; // stride
12415 // };
12416 RD = C.buildImplicitRecord("kmp_dim");
12417 RD->startDefinition();
12418 addFieldToRecordDecl(C, RD, Int64Ty);
12419 addFieldToRecordDecl(C, RD, Int64Ty);
12420 addFieldToRecordDecl(C, RD, Int64Ty);
12421 RD->completeDefinition();
12422 KmpDimTy = C.getCanonicalTagType(RD);
12423 } else {
12424 RD = KmpDimTy->castAsRecordDecl();
12425 }
12426 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12427 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12429
12430 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12431 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12432 enum { LowerFD = 0, UpperFD, StrideFD };
12433 // Fill dims with data.
12434 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12435 LValue DimsLVal = CGF.MakeAddrLValue(
12436 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12437 // dims.upper = num_iterations;
12438 LValue UpperLVal = CGF.EmitLValueForField(
12439 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12440 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12441 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12442 Int64Ty, NumIterations[I]->getExprLoc());
12443 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12444 // dims.stride = 1;
12445 LValue StrideLVal = CGF.EmitLValueForField(
12446 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12447 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12448 StrideLVal);
12449 }
12450
12451 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12452 // kmp_int32 num_dims, struct kmp_dim * dims);
12453 llvm::Value *Args[] = {
12454 emitUpdateLocation(CGF, D.getBeginLoc()),
12455 getThreadID(CGF, D.getBeginLoc()),
12456 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12458 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12459 CGM.VoidPtrTy)};
12460
12461 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12462 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12463 CGF.EmitRuntimeCall(RTLFn, Args);
12464 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12465 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12466 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12467 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12468 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12469 llvm::ArrayRef(FiniArgs));
12470}
12471
12472template <typename T>
12474 const T *C, llvm::Value *ULoc,
12475 llvm::Value *ThreadID) {
12476 QualType Int64Ty =
12477 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12478 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12480 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12481 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12482 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12483 const Expr *CounterVal = C->getLoopData(I);
12484 assert(CounterVal);
12485 llvm::Value *CntVal = CGF.EmitScalarConversion(
12486 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12487 CounterVal->getExprLoc());
12488 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12489 /*Volatile=*/false, Int64Ty);
12490 }
12491 llvm::Value *Args[] = {
12492 ULoc, ThreadID,
12493 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12494 llvm::FunctionCallee RTLFn;
12495 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12496 OMPDoacrossKind<T> ODK;
12497 if (ODK.isSource(C)) {
12498 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12499 OMPRTL___kmpc_doacross_post);
12500 } else {
12501 assert(ODK.isSink(C) && "Expect sink modifier.");
12502 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12503 OMPRTL___kmpc_doacross_wait);
12504 }
12505 CGF.EmitRuntimeCall(RTLFn, Args);
12506}
12507
12509 const OMPDependClause *C) {
12511 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12512 getThreadID(CGF, C->getBeginLoc()));
12513}
12514
12516 const OMPDoacrossClause *C) {
12518 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12519 getThreadID(CGF, C->getBeginLoc()));
12520}
12521
12523 llvm::FunctionCallee Callee,
12524 ArrayRef<llvm::Value *> Args) const {
12525 assert(Loc.isValid() && "Outlined function call location must be valid.");
12527
12528 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12529 if (Fn->doesNotThrow()) {
12530 CGF.EmitNounwindRuntimeCall(Fn, Args);
12531 return;
12532 }
12533 }
12534 CGF.EmitRuntimeCall(Callee, Args);
12535}
12536
12538 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12539 ArrayRef<llvm::Value *> Args) const {
12540 emitCall(CGF, Loc, OutlinedFn, Args);
12541}
12542
12544 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12545 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12547}
12548
12550 const VarDecl *NativeParam,
12551 const VarDecl *TargetParam) const {
12552 return CGF.GetAddrOfLocalVar(NativeParam);
12553}
12554
12555/// Return allocator value from expression, or return a null allocator (default
12556/// when no allocator specified).
12557static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12558 const Expr *Allocator) {
12559 llvm::Value *AllocVal;
12560 if (Allocator) {
12561 AllocVal = CGF.EmitScalarExpr(Allocator);
12562 // According to the standard, the original allocator type is a enum
12563 // (integer). Convert to pointer type, if required.
12564 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12565 CGF.getContext().VoidPtrTy,
12566 Allocator->getExprLoc());
12567 } else {
12568 // If no allocator specified, it defaults to the null allocator.
12569 AllocVal = llvm::Constant::getNullValue(
12571 }
12572 return AllocVal;
12573}
12574
12575/// Return the alignment from an allocate directive if present.
12576static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12577 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12578
12579 if (!AllocateAlignment)
12580 return nullptr;
12581
12582 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12583}
12584
12586 const VarDecl *VD) {
12587 if (!VD)
12588 return Address::invalid();
12589 Address UntiedAddr = Address::invalid();
12590 Address UntiedRealAddr = Address::invalid();
12591 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12592 if (It != FunctionToUntiedTaskStackMap.end()) {
12593 const UntiedLocalVarsAddressesMap &UntiedData =
12594 UntiedLocalVarsStack[It->second];
12595 auto I = UntiedData.find(VD);
12596 if (I != UntiedData.end()) {
12597 UntiedAddr = I->second.first;
12598 UntiedRealAddr = I->second.second;
12599 }
12600 }
12601 const VarDecl *CVD = VD->getCanonicalDecl();
12602 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12603 // Use the default allocation.
12604 if (!isAllocatableDecl(VD))
12605 return UntiedAddr;
12606 llvm::Value *Size;
12607 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12608 if (CVD->getType()->isVariablyModifiedType()) {
12609 Size = CGF.getTypeSize(CVD->getType());
12610 // Align the size: ((size + align - 1) / align) * align
12611 Size = CGF.Builder.CreateNUWAdd(
12612 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12613 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12614 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12615 } else {
12616 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12617 Size = CGM.getSize(Sz.alignTo(Align));
12618 }
12619 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12620 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12621 const Expr *Allocator = AA->getAllocator();
12622 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12623 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12625 Args.push_back(ThreadID);
12626 if (Alignment)
12627 Args.push_back(Alignment);
12628 Args.push_back(Size);
12629 Args.push_back(AllocVal);
12630 llvm::omp::RuntimeFunction FnID =
12631 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12632 llvm::Value *Addr = CGF.EmitRuntimeCall(
12633 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12634 getName({CVD->getName(), ".void.addr"}));
12635 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12636 CGM.getModule(), OMPRTL___kmpc_free);
12637 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12639 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12640 if (UntiedAddr.isValid())
12641 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12642
12643 // Cleanup action for allocate support.
12644 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12645 llvm::FunctionCallee RTLFn;
12646 SourceLocation::UIntTy LocEncoding;
12647 Address Addr;
12648 const Expr *AllocExpr;
12649
12650 public:
12651 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12652 SourceLocation::UIntTy LocEncoding, Address Addr,
12653 const Expr *AllocExpr)
12654 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12655 AllocExpr(AllocExpr) {}
12656 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12657 if (!CGF.HaveInsertPoint())
12658 return;
12659 llvm::Value *Args[3];
12660 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12661 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12663 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12664 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12665 Args[2] = AllocVal;
12666 CGF.EmitRuntimeCall(RTLFn, Args);
12667 }
12668 };
12669 Address VDAddr =
12670 UntiedRealAddr.isValid()
12671 ? UntiedRealAddr
12672 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12673 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12674 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12675 VDAddr, Allocator);
12676 if (UntiedRealAddr.isValid())
12677 if (auto *Region =
12678 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12679 Region->emitUntiedSwitch(CGF);
12680 return VDAddr;
12681 }
12682 return UntiedAddr;
12683}
12684
12686 const VarDecl *VD) const {
12687 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12688 if (It == FunctionToUntiedTaskStackMap.end())
12689 return false;
12690 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12691}
12692
12694 CodeGenModule &CGM, const OMPLoopDirective &S)
12695 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12696 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12697 if (!NeedToPush)
12698 return;
12700 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12701 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12702 for (const Stmt *Ref : C->private_refs()) {
12703 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12704 const ValueDecl *VD;
12705 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12706 VD = DRE->getDecl();
12707 } else {
12708 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12709 assert((ME->isImplicitCXXThis() ||
12710 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12711 "Expected member of current class.");
12712 VD = ME->getMemberDecl();
12713 }
12714 DS.insert(VD);
12715 }
12716 }
12717}
12718
12720 if (!NeedToPush)
12721 return;
12722 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12723}
12724
12726 CodeGenFunction &CGF,
12727 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12728 std::pair<Address, Address>> &LocalVars)
12729 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12730 if (!NeedToPush)
12731 return;
12732 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12733 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12734 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12735}
12736
12738 if (!NeedToPush)
12739 return;
12740 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12741}
12742
12744 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12745
12746 return llvm::any_of(
12747 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12748 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12749}
12750
12751void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12752 const OMPExecutableDirective &S,
12753 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12754 const {
12755 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12756 // Vars in target/task regions must be excluded completely.
12757 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12758 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12760 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12761 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12762 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12763 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12764 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12765 }
12766 }
12767 // Exclude vars in private clauses.
12768 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12769 for (const Expr *Ref : C->varlist()) {
12770 if (!Ref->getType()->isScalarType())
12771 continue;
12772 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12773 if (!DRE)
12774 continue;
12775 NeedToCheckForLPCs.insert(DRE->getDecl());
12776 }
12777 }
12778 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12779 for (const Expr *Ref : C->varlist()) {
12780 if (!Ref->getType()->isScalarType())
12781 continue;
12782 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12783 if (!DRE)
12784 continue;
12785 NeedToCheckForLPCs.insert(DRE->getDecl());
12786 }
12787 }
12788 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12789 for (const Expr *Ref : C->varlist()) {
12790 if (!Ref->getType()->isScalarType())
12791 continue;
12792 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12793 if (!DRE)
12794 continue;
12795 NeedToCheckForLPCs.insert(DRE->getDecl());
12796 }
12797 }
12798 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12799 for (const Expr *Ref : C->varlist()) {
12800 if (!Ref->getType()->isScalarType())
12801 continue;
12802 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12803 if (!DRE)
12804 continue;
12805 NeedToCheckForLPCs.insert(DRE->getDecl());
12806 }
12807 }
12808 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12809 for (const Expr *Ref : C->varlist()) {
12810 if (!Ref->getType()->isScalarType())
12811 continue;
12812 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12813 if (!DRE)
12814 continue;
12815 NeedToCheckForLPCs.insert(DRE->getDecl());
12816 }
12817 }
12818 for (const Decl *VD : NeedToCheckForLPCs) {
12819 for (const LastprivateConditionalData &Data :
12820 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12821 if (Data.DeclToUniqueName.count(VD) > 0) {
12822 if (!Data.Disabled)
12823 NeedToAddForLPCsAsDisabled.insert(VD);
12824 break;
12825 }
12826 }
12827 }
12828}
12829
12830CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12831 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12832 : CGM(CGF.CGM),
12833 Action((CGM.getLangOpts().OpenMP >= 50 &&
12834 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12835 [](const OMPLastprivateClause *C) {
12836 return C->getKind() ==
12837 OMPC_LASTPRIVATE_conditional;
12838 }))
12839 ? ActionToDo::PushAsLastprivateConditional
12840 : ActionToDo::DoNotPush) {
12841 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12842 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12843 return;
12844 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12845 "Expected a push action.");
12847 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12848 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12849 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12850 continue;
12851
12852 for (const Expr *Ref : C->varlist()) {
12853 Data.DeclToUniqueName.insert(std::make_pair(
12854 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12855 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12856 }
12857 }
12858 Data.IVLVal = IVLVal;
12859 Data.Fn = CGF.CurFn;
12860}
12861
12862CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12864 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12865 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12866 if (CGM.getLangOpts().OpenMP < 50)
12867 return;
12868 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12869 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12870 if (!NeedToAddForLPCsAsDisabled.empty()) {
12871 Action = ActionToDo::DisableLastprivateConditional;
12872 LastprivateConditionalData &Data =
12874 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12875 Data.DeclToUniqueName.try_emplace(VD);
12876 Data.Fn = CGF.CurFn;
12877 Data.Disabled = true;
12878 }
12879}
12880
12881CGOpenMPRuntime::LastprivateConditionalRAII
12883 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12884 return LastprivateConditionalRAII(CGF, S);
12885}
12886
12888 if (CGM.getLangOpts().OpenMP < 50)
12889 return;
12890 if (Action == ActionToDo::DisableLastprivateConditional) {
12891 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12892 "Expected list of disabled private vars.");
12893 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12894 }
12895 if (Action == ActionToDo::PushAsLastprivateConditional) {
12896 assert(
12897 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12898 "Expected list of lastprivate conditional vars.");
12899 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12900 }
12901}
12902
12904 const VarDecl *VD) {
12905 ASTContext &C = CGM.getContext();
12906 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12907 QualType NewType;
12908 const FieldDecl *VDField;
12909 const FieldDecl *FiredField;
12910 LValue BaseLVal;
12911 auto VI = I->getSecond().find(VD);
12912 if (VI == I->getSecond().end()) {
12913 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12914 RD->startDefinition();
12915 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12916 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12917 RD->completeDefinition();
12918 NewType = C.getCanonicalTagType(RD);
12919 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12920 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12921 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12922 } else {
12923 NewType = std::get<0>(VI->getSecond());
12924 VDField = std::get<1>(VI->getSecond());
12925 FiredField = std::get<2>(VI->getSecond());
12926 BaseLVal = std::get<3>(VI->getSecond());
12927 }
12928 LValue FiredLVal =
12929 CGF.EmitLValueForField(BaseLVal, FiredField);
12931 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12932 FiredLVal);
12933 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12934}
12935
12936namespace {
12937/// Checks if the lastprivate conditional variable is referenced in LHS.
12938class LastprivateConditionalRefChecker final
12939 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12941 const Expr *FoundE = nullptr;
12942 const Decl *FoundD = nullptr;
12943 StringRef UniqueDeclName;
12944 LValue IVLVal;
12945 llvm::Function *FoundFn = nullptr;
12946 SourceLocation Loc;
12947
12948public:
12949 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12951 llvm::reverse(LPM)) {
12952 auto It = D.DeclToUniqueName.find(E->getDecl());
12953 if (It == D.DeclToUniqueName.end())
12954 continue;
12955 if (D.Disabled)
12956 return false;
12957 FoundE = E;
12958 FoundD = E->getDecl()->getCanonicalDecl();
12959 UniqueDeclName = It->second;
12960 IVLVal = D.IVLVal;
12961 FoundFn = D.Fn;
12962 break;
12963 }
12964 return FoundE == E;
12965 }
12966 bool VisitMemberExpr(const MemberExpr *E) {
12968 return false;
12970 llvm::reverse(LPM)) {
12971 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12972 if (It == D.DeclToUniqueName.end())
12973 continue;
12974 if (D.Disabled)
12975 return false;
12976 FoundE = E;
12977 FoundD = E->getMemberDecl()->getCanonicalDecl();
12978 UniqueDeclName = It->second;
12979 IVLVal = D.IVLVal;
12980 FoundFn = D.Fn;
12981 break;
12982 }
12983 return FoundE == E;
12984 }
12985 bool VisitStmt(const Stmt *S) {
12986 for (const Stmt *Child : S->children()) {
12987 if (!Child)
12988 continue;
12989 if (const auto *E = dyn_cast<Expr>(Child))
12990 if (!E->isGLValue())
12991 continue;
12992 if (Visit(Child))
12993 return true;
12994 }
12995 return false;
12996 }
12997 explicit LastprivateConditionalRefChecker(
12998 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12999 : LPM(LPM) {}
13000 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
13001 getFoundData() const {
13002 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
13003 }
13004};
13005} // namespace
13006
13008 LValue IVLVal,
13009 StringRef UniqueDeclName,
13010 LValue LVal,
13011 SourceLocation Loc) {
13012 // Last updated loop counter for the lastprivate conditional var.
13013 // int<xx> last_iv = 0;
13014 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
13015 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
13016 LLIVTy, getName({UniqueDeclName, "iv"}));
13017 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
13018 IVLVal.getAlignment().getAsAlign());
13019 LValue LastIVLVal =
13020 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
13021
13022 // Last value of the lastprivate conditional.
13023 // decltype(priv_a) last_a;
13024 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
13025 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
13026 cast<llvm::GlobalVariable>(Last)->setAlignment(
13027 LVal.getAlignment().getAsAlign());
13028 LValue LastLVal =
13029 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
13030
13031 // Global loop counter. Required to handle inner parallel-for regions.
13032 // iv
13033 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
13034
13035 // #pragma omp critical(a)
13036 // if (last_iv <= iv) {
13037 // last_iv = iv;
13038 // last_a = priv_a;
13039 // }
13040 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
13041 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
13042 Action.Enter(CGF);
13043 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
13044 // (last_iv <= iv) ? Check if the variable is updated and store new
13045 // value in global var.
13046 llvm::Value *CmpRes;
13047 if (IVLVal.getType()->isSignedIntegerType()) {
13048 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
13049 } else {
13050 assert(IVLVal.getType()->isUnsignedIntegerType() &&
13051 "Loop iteration variable must be integer.");
13052 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
13053 }
13054 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
13055 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
13056 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
13057 // {
13058 CGF.EmitBlock(ThenBB);
13059
13060 // last_iv = iv;
13061 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
13062
13063 // last_a = priv_a;
13064 switch (CGF.getEvaluationKind(LVal.getType())) {
13065 case TEK_Scalar: {
13066 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
13067 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
13068 break;
13069 }
13070 case TEK_Complex: {
13071 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
13072 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
13073 break;
13074 }
13075 case TEK_Aggregate:
13076 llvm_unreachable(
13077 "Aggregates are not supported in lastprivate conditional.");
13078 }
13079 // }
13080 CGF.EmitBranch(ExitBB);
13081 // There is no need to emit line number for unconditional branch.
13083 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
13084 };
13085
13086 if (CGM.getLangOpts().OpenMPSimd) {
13087 // Do not emit as a critical region as no parallel region could be emitted.
13088 RegionCodeGenTy ThenRCG(CodeGen);
13089 ThenRCG(CGF);
13090 } else {
13091 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
13092 }
13093}
13094
13096 const Expr *LHS) {
13097 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13098 return;
13099 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
13100 if (!Checker.Visit(LHS))
13101 return;
13102 const Expr *FoundE;
13103 const Decl *FoundD;
13104 StringRef UniqueDeclName;
13105 LValue IVLVal;
13106 llvm::Function *FoundFn;
13107 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
13108 Checker.getFoundData();
13109 if (FoundFn != CGF.CurFn) {
13110 // Special codegen for inner parallel regions.
13111 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
13112 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
13113 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
13114 "Lastprivate conditional is not found in outer region.");
13115 QualType StructTy = std::get<0>(It->getSecond());
13116 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
13117 LValue PrivLVal = CGF.EmitLValue(FoundE);
13119 PrivLVal.getAddress(),
13120 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
13121 CGF.ConvertTypeForMem(StructTy));
13122 LValue BaseLVal =
13123 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
13124 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
13125 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
13126 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
13127 FiredLVal, llvm::AtomicOrdering::Unordered,
13128 /*IsVolatile=*/true, /*isInit=*/false);
13129 return;
13130 }
13131
13132 // Private address of the lastprivate conditional in the current context.
13133 // priv_a
13134 LValue LVal = CGF.EmitLValue(FoundE);
13135 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13136 FoundE->getExprLoc());
13137}
13138
13141 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13142 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13143 return;
13144 auto Range = llvm::reverse(LastprivateConditionalStack);
13145 auto It = llvm::find_if(
13146 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13147 if (It == Range.end() || It->Fn != CGF.CurFn)
13148 return;
13149 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13150 assert(LPCI != LastprivateConditionalToTypes.end() &&
13151 "Lastprivates must be registered already.");
13153 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13154 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13155 for (const auto &Pair : It->DeclToUniqueName) {
13156 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13157 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13158 continue;
13159 auto I = LPCI->getSecond().find(Pair.first);
13160 assert(I != LPCI->getSecond().end() &&
13161 "Lastprivate must be rehistered already.");
13162 // bool Cmp = priv_a.Fired != 0;
13163 LValue BaseLVal = std::get<3>(I->getSecond());
13164 LValue FiredLVal =
13165 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13166 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13167 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13168 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13170 // if (Cmp) {
13171 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13172 CGF.EmitBlock(ThenBB);
13173 Address Addr = CGF.GetAddrOfLocalVar(VD);
13174 LValue LVal;
13175 if (VD->getType()->isReferenceType())
13176 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13178 else
13179 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13181 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13182 D.getBeginLoc());
13184 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13185 // }
13186 }
13187}
13188
13190 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13191 SourceLocation Loc) {
13192 if (CGF.getLangOpts().OpenMP < 50)
13193 return;
13194 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13195 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13196 "Unknown lastprivate conditional variable.");
13197 StringRef UniqueName = It->second;
13198 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13199 // The variable was not updated in the region - exit.
13200 if (!GV)
13201 return;
13202 LValue LPLVal = CGF.MakeRawAddrLValue(
13203 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13204 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13205 CGF.EmitStoreOfScalar(Res, PrivLVal);
13206}
13207
13210 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13211 const RegionCodeGenTy &CodeGen) {
13212 llvm_unreachable("Not supported in SIMD-only mode");
13213}
13214
13217 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13218 const RegionCodeGenTy &CodeGen) {
13219 llvm_unreachable("Not supported in SIMD-only mode");
13220}
13221
13223 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13224 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13225 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13226 bool Tied, unsigned &NumberOfParts) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13231 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13232 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13233 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13234 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13235 llvm_unreachable("Not supported in SIMD-only mode");
13236}
13237
13239 CodeGenFunction &CGF, StringRef CriticalName,
13240 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13241 const Expr *Hint) {
13242 llvm_unreachable("Not supported in SIMD-only mode");
13243}
13244
13246 const RegionCodeGenTy &MasterOpGen,
13247 SourceLocation Loc) {
13248 llvm_unreachable("Not supported in SIMD-only mode");
13249}
13250
13252 const RegionCodeGenTy &MasterOpGen,
13253 SourceLocation Loc,
13254 const Expr *Filter) {
13255 llvm_unreachable("Not supported in SIMD-only mode");
13256}
13257
13259 SourceLocation Loc) {
13260 llvm_unreachable("Not supported in SIMD-only mode");
13261}
13262
13264 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13265 SourceLocation Loc) {
13266 llvm_unreachable("Not supported in SIMD-only mode");
13267}
13268
13270 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13271 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13273 ArrayRef<const Expr *> AssignmentOps) {
13274 llvm_unreachable("Not supported in SIMD-only mode");
13275}
13276
13278 const RegionCodeGenTy &OrderedOpGen,
13279 SourceLocation Loc,
13280 bool IsThreads) {
13281 llvm_unreachable("Not supported in SIMD-only mode");
13282}
13283
13285 SourceLocation Loc,
13287 bool EmitChecks,
13288 bool ForceSimpleCall) {
13289 llvm_unreachable("Not supported in SIMD-only mode");
13290}
13291
13294 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13295 bool Ordered, const DispatchRTInput &DispatchValues) {
13296 llvm_unreachable("Not supported in SIMD-only mode");
13297}
13298
13300 SourceLocation Loc) {
13301 llvm_unreachable("Not supported in SIMD-only mode");
13302}
13303
13306 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13307 llvm_unreachable("Not supported in SIMD-only mode");
13308}
13309
13312 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13313 llvm_unreachable("Not supported in SIMD-only mode");
13314}
13315
13317 SourceLocation Loc,
13318 unsigned IVSize,
13319 bool IVSigned) {
13320 llvm_unreachable("Not supported in SIMD-only mode");
13321}
13322
13324 SourceLocation Loc,
13325 OpenMPDirectiveKind DKind) {
13326 llvm_unreachable("Not supported in SIMD-only mode");
13327}
13328
13330 SourceLocation Loc,
13331 unsigned IVSize, bool IVSigned,
13332 Address IL, Address LB,
13333 Address UB, Address ST) {
13334 llvm_unreachable("Not supported in SIMD-only mode");
13335}
13336
13338 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13340 SourceLocation SeverityLoc, const Expr *Message,
13341 SourceLocation MessageLoc) {
13342 llvm_unreachable("Not supported in SIMD-only mode");
13343}
13344
13346 ProcBindKind ProcBind,
13347 SourceLocation Loc) {
13348 llvm_unreachable("Not supported in SIMD-only mode");
13349}
13350
13352 const VarDecl *VD,
13353 Address VDAddr,
13354 SourceLocation Loc) {
13355 llvm_unreachable("Not supported in SIMD-only mode");
13356}
13357
13359 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13360 CodeGenFunction *CGF) {
13361 llvm_unreachable("Not supported in SIMD-only mode");
13362}
13363
13365 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13366 llvm_unreachable("Not supported in SIMD-only mode");
13367}
13368
13371 SourceLocation Loc,
13372 llvm::AtomicOrdering AO) {
13373 llvm_unreachable("Not supported in SIMD-only mode");
13374}
13375
13377 const OMPExecutableDirective &D,
13378 llvm::Function *TaskFunction,
13379 QualType SharedsTy, Address Shareds,
13380 const Expr *IfCond,
13381 const OMPTaskDataTy &Data) {
13382 llvm_unreachable("Not supported in SIMD-only mode");
13383}
13384
13387 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13388 const Expr *IfCond, const OMPTaskDataTy &Data) {
13389 llvm_unreachable("Not supported in SIMD-only mode");
13390}
13391
13395 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13396 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13397 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13398 ReductionOps, Options);
13399}
13400
13403 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13404 llvm_unreachable("Not supported in SIMD-only mode");
13405}
13406
13408 SourceLocation Loc,
13409 bool IsWorksharingReduction) {
13410 llvm_unreachable("Not supported in SIMD-only mode");
13411}
13412
13414 SourceLocation Loc,
13415 ReductionCodeGen &RCG,
13416 unsigned N) {
13417 llvm_unreachable("Not supported in SIMD-only mode");
13418}
13419
13421 SourceLocation Loc,
13422 llvm::Value *ReductionsPtr,
13423 LValue SharedLVal) {
13424 llvm_unreachable("Not supported in SIMD-only mode");
13425}
13426
13428 SourceLocation Loc,
13429 const OMPTaskDataTy &Data) {
13430 llvm_unreachable("Not supported in SIMD-only mode");
13431}
13432
13435 OpenMPDirectiveKind CancelRegion) {
13436 llvm_unreachable("Not supported in SIMD-only mode");
13437}
13438
13440 SourceLocation Loc, const Expr *IfCond,
13441 OpenMPDirectiveKind CancelRegion) {
13442 llvm_unreachable("Not supported in SIMD-only mode");
13443}
13444
13446 const OMPExecutableDirective &D, StringRef ParentName,
13447 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13448 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13449 llvm_unreachable("Not supported in SIMD-only mode");
13450}
13451
13454 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13455 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13456 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13457 const OMPLoopDirective &D)>
13458 SizeEmitter) {
13459 llvm_unreachable("Not supported in SIMD-only mode");
13460}
13461
13463 llvm_unreachable("Not supported in SIMD-only mode");
13464}
13465
13467 llvm_unreachable("Not supported in SIMD-only mode");
13468}
13469
13471 return false;
13472}
13473
13475 const OMPExecutableDirective &D,
13476 SourceLocation Loc,
13477 llvm::Function *OutlinedFn,
13478 ArrayRef<llvm::Value *> CapturedVars) {
13479 llvm_unreachable("Not supported in SIMD-only mode");
13480}
13481
13483 const Expr *NumTeams,
13484 const Expr *ThreadLimit,
13485 SourceLocation Loc) {
13486 llvm_unreachable("Not supported in SIMD-only mode");
13487}
13488
13490 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13491 const Expr *Device, const RegionCodeGenTy &CodeGen,
13493 llvm_unreachable("Not supported in SIMD-only mode");
13494}
13495
13497 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13498 const Expr *Device) {
13499 llvm_unreachable("Not supported in SIMD-only mode");
13500}
13501
13503 const OMPLoopDirective &D,
13504 ArrayRef<Expr *> NumIterations) {
13505 llvm_unreachable("Not supported in SIMD-only mode");
13506}
13507
13509 const OMPDependClause *C) {
13510 llvm_unreachable("Not supported in SIMD-only mode");
13511}
13512
13514 const OMPDoacrossClause *C) {
13515 llvm_unreachable("Not supported in SIMD-only mode");
13516}
13517
13518const VarDecl *
13520 const VarDecl *NativeParam) const {
13521 llvm_unreachable("Not supported in SIMD-only mode");
13522}
13523
13524Address
13526 const VarDecl *NativeParam,
13527 const VarDecl *TargetParam) const {
13528 llvm_unreachable("Not supported in SIMD-only mode");
13529}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
This represents clause 'affinity' in the 'pragma omp task'-based directives.
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
ArrayRef< MappableComponent > MappableExprComponentListRef
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents clause 'map' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:851
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:944
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5270
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3723
Attr - This represents one attribute.
Definition Attr.h:46
Represents a base class of a C++ class.
Definition DeclCXX.h:146
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
Represents a C++ struct/union/class.
Definition DeclCXX.h:258
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3942
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3976
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1386
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3982
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3970
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3973
This captures a statement into a function.
Definition Stmt.h:3929
const Capture * const_capture_iterator
Definition Stmt.h:4063
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4080
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4050
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4033
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1512
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4075
capture_range captures()
Definition Stmt.h:4067
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, const VarDecl *VD)
Register VTable to OpenMP offload entry.
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitAndRegisterVTable(CodeGenModule &CGM, CXXRecordDecl *CXXRecord, const VarDecl *VD)
Emit and register VTable for the C++ class in OpenMP offload entry.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
virtual void registerVTable(const OMPExecutableDirective &D)
Emit code for registering vtable by scanning through map clause in OpenMP target region.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3284
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3293
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5563
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:177
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:245
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2406
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5108
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:226
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5737
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2642
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3303
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:296
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1575
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:676
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1633
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5373
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1691
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:656
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:740
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::GlobalVariable * GetAddrOfVTable(const CXXRecordDecl *RD)
GetAddrOfVTable - Get the address of the VTable for the given record decl.
Definition CGVTables.cpp:41
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:355
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:350
Address getAddress() const
Definition CGValue.h:373
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:358
QualType getType() const
Definition CGValue.h:303
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:347
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3116
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3089
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3669
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4299
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4035
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4700
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3747
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3826
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5597
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:974
An lvalue reference type, per C++11 [dcl.ref].
Definition TypeBase.h:3618
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3364
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3447
Expr * getBase() const
Definition Expr.h:3441
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5478
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'threadset' clause in the 'pragma omp task ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8292
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8332
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8477
QualType getCanonicalType() const
Definition TypeBase.h:8344
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4324
field_iterator field_end() const
Definition Decl.h:4530
field_range fields() const
Definition Decl.h:4527
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5286
bool field_empty() const
Definition Decl.h:4535
field_iterator field_begin() const
Definition Decl.cpp:5270
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1485
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4906
bool isUnion() const
Definition Decl.h:3925
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8891
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9071
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2206
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8628
bool isPointerType() const
Definition TypeBase.h:8529
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8935
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9178
bool isReferenceType() const
Definition TypeBase.h:8553
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
bool isLValueReferenceType() const
Definition TypeBase.h:8557
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2412
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3120
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9064
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2801
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9164
bool isFloatingType() const
Definition Type.cpp:2305
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2254
bool isAnyPointerType() const
Definition TypeBase.h:8537
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9111
bool isRecordType() const
Definition TypeBase.h:8656
bool isUnionType() const
Definition Type.cpp:719
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2268
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2377
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2386
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3967
Expr * getSizeExpr() const
Definition TypeBase.h:3981
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:817
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
static bool classof(const OMPClause *T)
@ Conditional
A conditional (?:) operator.
Definition Sema.h:668
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5901
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:563
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5354
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.