clang 23.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress();
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 Config.setDefaultTargetAS(
1043 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1044 Config.setRuntimeCC(CGM.getRuntimeCC());
1045
1046 OMPBuilder.setConfig(Config);
1047 OMPBuilder.initialize();
1048 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1049 CGM.getLangOpts().OpenMPIsTargetDevice
1050 ? CGM.getLangOpts().OMPHostIRFile
1051 : StringRef{});
1052
1053 // The user forces the compiler to behave as if omp requires
1054 // unified_shared_memory was given.
1055 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1058 }
1059}
1060
1062 InternalVars.clear();
1063 // Clean non-target variable declarations possibly used only in debug info.
1064 for (const auto &Data : EmittedNonTargetVariables) {
1065 if (!Data.getValue().pointsToAliveValue())
1066 continue;
1067 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1068 if (!GV)
1069 continue;
1070 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1071 continue;
1072 GV->eraseFromParent();
1073 }
1074}
1075
1077 return OMPBuilder.createPlatformSpecificName(Parts);
1078}
1079
1080static llvm::Function *
1082 const Expr *CombinerInitializer, const VarDecl *In,
1083 const VarDecl *Out, bool IsCombiner) {
1084 // void .omp_combiner.(Ty *in, Ty *out);
1085 ASTContext &C = CGM.getContext();
1086 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1087 FunctionArgList Args;
1088 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1089 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1090 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1091 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1092 Args.push_back(&OmpOutParm);
1093 Args.push_back(&OmpInParm);
1094 const CGFunctionInfo &FnInfo =
1095 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1096 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1097 std::string Name = CGM.getOpenMPRuntime().getName(
1098 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1099 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1100 Name, &CGM.getModule());
1101 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1102 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1103 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
1104 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1105 Fn->removeFnAttr(llvm::Attribute::NoInline);
1106 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1107 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1108 }
1109 CodeGenFunction CGF(CGM);
1110 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1111 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1112 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1113 Out->getLocation());
1115 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1116 Scope.addPrivate(
1117 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1118 .getAddress());
1119 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1120 Scope.addPrivate(
1121 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1122 .getAddress());
1123 (void)Scope.Privatize();
1124 if (!IsCombiner && Out->hasInit() &&
1125 !CGF.isTrivialInitializer(Out->getInit())) {
1126 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1127 Out->getType().getQualifiers(),
1128 /*IsInitializer=*/true);
1129 }
1130 if (CombinerInitializer)
1131 CGF.EmitIgnoredExpr(CombinerInitializer);
1132 Scope.ForceCleanup();
1133 CGF.FinishFunction();
1134 return Fn;
1135}
1136
1139 if (UDRMap.count(D) > 0)
1140 return;
1141 llvm::Function *Combiner = emitCombinerOrInitializer(
1142 CGM, D->getType(), D->getCombiner(),
1145 /*IsCombiner=*/true);
1146 llvm::Function *Initializer = nullptr;
1147 if (const Expr *Init = D->getInitializer()) {
1149 CGM, D->getType(),
1151 : nullptr,
1154 /*IsCombiner=*/false);
1155 }
1156 UDRMap.try_emplace(D, Combiner, Initializer);
1157 if (CGF)
1158 FunctionUDRMap[CGF->CurFn].push_back(D);
1159}
1160
1161std::pair<llvm::Function *, llvm::Function *>
1163 auto I = UDRMap.find(D);
1164 if (I != UDRMap.end())
1165 return I->second;
1166 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1167 return UDRMap.lookup(D);
1168}
1169
1170namespace {
1171// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1172// Builder if one is present.
1173struct PushAndPopStackRAII {
1174 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1175 bool HasCancel, llvm::omp::Directive Kind)
1176 : OMPBuilder(OMPBuilder) {
1177 if (!OMPBuilder)
1178 return;
1179
1180 // The following callback is the crucial part of clangs cleanup process.
1181 //
1182 // NOTE:
1183 // Once the OpenMPIRBuilder is used to create parallel regions (and
1184 // similar), the cancellation destination (Dest below) is determined via
1185 // IP. That means if we have variables to finalize we split the block at IP,
1186 // use the new block (=BB) as destination to build a JumpDest (via
1187 // getJumpDestInCurrentScope(BB)) which then is fed to
1188 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1189 // to push & pop an FinalizationInfo object.
1190 // The FiniCB will still be needed but at the point where the
1191 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1192 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1193 assert(IP.getBlock()->end() == IP.getPoint() &&
1194 "Clang CG should cause non-terminated block!");
1195 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1196 CGF.Builder.restoreIP(IP);
1198 CGF.getOMPCancelDestination(OMPD_parallel);
1199 CGF.EmitBranchThroughCleanup(Dest);
1200 return llvm::Error::success();
1201 };
1202
1203 // TODO: Remove this once we emit parallel regions through the
1204 // OpenMPIRBuilder as it can do this setup internally.
1205 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1206 OMPBuilder->pushFinalizationCB(std::move(FI));
1207 }
1208 ~PushAndPopStackRAII() {
1209 if (OMPBuilder)
1210 OMPBuilder->popFinalizationCB();
1211 }
1212 llvm::OpenMPIRBuilder *OMPBuilder;
1213};
1214} // namespace
1215
1217 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1218 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1219 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1220 assert(ThreadIDVar->getType()->isPointerType() &&
1221 "thread id variable must be of type kmp_int32 *");
1222 CodeGenFunction CGF(CGM, true);
1223 bool HasCancel = false;
1224 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1225 HasCancel = OPD->hasCancel();
1226 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1227 HasCancel = OPD->hasCancel();
1228 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1229 HasCancel = OPSD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236 else if (const auto *OPFD =
1237 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1238 HasCancel = OPFD->hasCancel();
1239 else if (const auto *OPFD =
1240 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1241 HasCancel = OPFD->hasCancel();
1242
1243 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1244 // parallel region to make cancellation barriers work properly.
1245 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1246 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1247 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1248 HasCancel, OutlinedHelperName);
1249 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1250 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1251}
1252
1253std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1254 std::string Suffix = getName({"omp_outlined"});
1255 return (Name + Suffix).str();
1256}
1257
1259 return getOutlinedHelperName(CGF.CurFn->getName());
1260}
1261
1262std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1263 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1264 return (Name + Suffix).str();
1265}
1266
1269 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270 const RegionCodeGenTy &CodeGen) {
1271 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1274 CodeGen);
1275}
1276
1279 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1280 const RegionCodeGenTy &CodeGen) {
1281 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1283 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1284 CodeGen);
1285}
1286
1288 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1289 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1290 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1291 bool Tied, unsigned &NumberOfParts) {
1292 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1293 PrePostActionTy &) {
1294 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1295 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1296 llvm::Value *TaskArgs[] = {
1297 UpLoc, ThreadID,
1298 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1299 TaskTVar->getType()->castAs<PointerType>())
1300 .getPointer(CGF)};
1301 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1302 CGM.getModule(), OMPRTL___kmpc_omp_task),
1303 TaskArgs);
1304 };
1305 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1306 UntiedCodeGen);
1307 CodeGen.setAction(Action);
1308 assert(!ThreadIDVar->getType()->isPointerType() &&
1309 "thread id variable must be of type kmp_int32 for tasks");
1310 const OpenMPDirectiveKind Region =
1311 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1312 : OMPD_task;
1313 const CapturedStmt *CS = D.getCapturedStmt(Region);
1314 bool HasCancel = false;
1315 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1321 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1322 HasCancel = TD->hasCancel();
1323
1324 CodeGenFunction CGF(CGM, true);
1325 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1326 InnermostKind, HasCancel, Action);
1327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1328 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1329 if (!Tied)
1330 NumberOfParts = Action.getNumberOfParts();
1331 return Res;
1332}
1333
1335 bool AtCurrentPoint) {
1336 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1337 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1338
1339 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1340 if (AtCurrentPoint) {
1341 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1342 CGF.Builder.GetInsertBlock());
1343 } else {
1344 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1345 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1346 }
1347}
1348
1350 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1351 if (Elem.ServiceInsertPt) {
1352 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1353 Elem.ServiceInsertPt = nullptr;
1354 Ptr->eraseFromParent();
1355 }
1356}
1357
1359 SourceLocation Loc,
1360 SmallString<128> &Buffer) {
1361 llvm::raw_svector_ostream OS(Buffer);
1362 // Build debug location
1364 OS << ";";
1365 if (auto *DbgInfo = CGF.getDebugInfo())
1366 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1367 else
1368 OS << PLoc.getFilename();
1369 OS << ";";
1370 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1371 OS << FD->getQualifiedNameAsString();
1372 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1373 return OS.str();
1374}
1375
1377 SourceLocation Loc,
1378 unsigned Flags, bool EmitLoc) {
1379 uint32_t SrcLocStrSize;
1380 llvm::Constant *SrcLocStr;
1381 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1382 llvm::codegenoptions::NoDebugInfo) ||
1383 Loc.isInvalid()) {
1384 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1385 } else {
1386 std::string FunctionName;
1387 std::string FileName;
1388 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1389 FunctionName = FD->getQualifiedNameAsString();
1391 if (auto *DbgInfo = CGF.getDebugInfo())
1392 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1393 else
1394 FileName = PLoc.getFilename();
1395 unsigned Line = PLoc.getLine();
1396 unsigned Column = PLoc.getColumn();
1397 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1398 Column, SrcLocStrSize);
1399 }
1400 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1401 return OMPBuilder.getOrCreateIdent(
1402 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1403}
1404
1406 SourceLocation Loc) {
1407 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1408 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1409 // the clang invariants used below might be broken.
1410 if (CGM.getLangOpts().OpenMPIRBuilder) {
1411 SmallString<128> Buffer;
1412 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1413 uint32_t SrcLocStrSize;
1414 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1415 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1416 return OMPBuilder.getOrCreateThreadID(
1417 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1418 }
1419
1420 llvm::Value *ThreadID = nullptr;
1421 // Check whether we've already cached a load of the thread id in this
1422 // function.
1423 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1424 if (I != OpenMPLocThreadIDMap.end()) {
1425 ThreadID = I->second.ThreadID;
1426 if (ThreadID != nullptr)
1427 return ThreadID;
1428 }
1429 // If exceptions are enabled, do not use parameter to avoid possible crash.
1430 if (auto *OMPRegionInfo =
1431 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1432 if (OMPRegionInfo->getThreadIDVariable()) {
1433 // Check if this an outlined function with thread id passed as argument.
1434 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1435 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1436 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1437 !CGF.getLangOpts().CXXExceptions ||
1438 CGF.Builder.GetInsertBlock() == TopBlock ||
1439 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1440 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1441 TopBlock ||
1442 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1443 CGF.Builder.GetInsertBlock()) {
1444 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1445 // If value loaded in entry block, cache it and use it everywhere in
1446 // function.
1447 if (CGF.Builder.GetInsertBlock() == TopBlock)
1448 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1449 return ThreadID;
1450 }
1451 }
1452 }
1453
1454 // This is not an outlined function region - need to call __kmpc_int32
1455 // kmpc_global_thread_num(ident_t *loc).
1456 // Generate thread id value and cache this value for use across the
1457 // function.
1458 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1459 if (!Elem.ServiceInsertPt)
1461 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1462 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1464 llvm::CallInst *Call = CGF.Builder.CreateCall(
1465 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1466 OMPRTL___kmpc_global_thread_num),
1467 emitUpdateLocation(CGF, Loc));
1468 Call->setCallingConv(CGF.getRuntimeCC());
1469 Elem.ThreadID = Call;
1470 return Call;
1471}
1472
1474 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1475 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1477 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1478 }
1479 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1480 for (const auto *D : I->second)
1481 UDRMap.erase(D);
1482 FunctionUDRMap.erase(I);
1483 }
1484 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1485 for (const auto *D : I->second)
1486 UDMMap.erase(D);
1487 FunctionUDMMap.erase(I);
1488 }
1491}
1492
1494 return OMPBuilder.IdentPtr;
1495}
1496
1497static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1500 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1501 if (!DevTy)
1502 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1503
1504 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1505 case OMPDeclareTargetDeclAttr::DT_Host:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1507 break;
1508 case OMPDeclareTargetDeclAttr::DT_NoHost:
1509 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1510 break;
1511 case OMPDeclareTargetDeclAttr::DT_Any:
1512 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1513 break;
1514 default:
1515 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1516 break;
1517 }
1518}
1519
1520static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1523 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1524 if (!MapType)
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1526 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1527 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1529 break;
1530 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1531 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1532 break;
1533 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1535 break;
1536 default:
1537 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1538 break;
1539 }
1540}
1541
1542static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1543 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1544 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1545
1546 auto FileInfoCallBack = [&]() {
1548 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1549
1550 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552
1553 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1554 };
1555
1556 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1557 *CGM.getFileSystem(), ParentName);
1558}
1559
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1564 return CGM.getLLVMLinkageVarDefinition(VD);
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1570 CGM.getContext().getPointerType(VD->getType()));
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1573 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1574 VD->isExternallyVisible(),
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579 LinkageForVariable);
1580
1581 if (!addr)
1582 return ConstantAddress::invalid();
1583 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1589 !CGM.getContext().getTargetInfo().isTLSSupported());
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName({"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1601 CGM.getContext().getTargetInfo().isTLSSupported())
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1608 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 OMPBuilder.getOrCreateRuntimeFunction(
1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614 Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626 OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc,
1631 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1632 Ctor, CopyCtor, Dtor};
1633 CGF.EmitRuntimeCall(
1634 OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1636 Args);
1637}
1638
1640 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1641 bool PerformInit, CodeGenFunction *CGF) {
1642 if (CGM.getLangOpts().OpenMPUseTLS &&
1643 CGM.getContext().getTargetInfo().isTLSSupported())
1644 return nullptr;
1645
1646 VD = VD->getDefinition(CGM.getContext());
1647 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1648 QualType ASTTy = VD->getType();
1649
1650 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1651 const Expr *Init = VD->getAnyInitializer();
1652 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1653 // Generate function that re-emits the declaration's initializer into the
1654 // threadprivate copy of the variable VD
1655 CodeGenFunction CtorCGF(CGM);
1656 FunctionArgList Args;
1657 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1658 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1660 Args.push_back(&Dst);
1661
1662 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1663 CGM.getContext().VoidPtrTy, Args);
1664 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1665 std::string Name = getName({"__kmpc_global_ctor_", ""});
1666 llvm::Function *Fn =
1667 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1668 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1669 Args, Loc, Loc);
1670 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672 CGM.getContext().VoidPtrTy, Dst.getLocation());
1673 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1674 VDAddr.getAlignment());
1675 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1676 /*IsInitializer=*/true);
1677 ArgVal = CtorCGF.EmitLoadOfScalar(
1678 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1679 CGM.getContext().VoidPtrTy, Dst.getLocation());
1680 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1681 CtorCGF.FinishFunction();
1682 Ctor = Fn;
1683 }
1685 // Generate function that emits destructor call for the threadprivate copy
1686 // of the variable VD
1687 CodeGenFunction DtorCGF(CGM);
1688 FunctionArgList Args;
1689 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1690 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1692 Args.push_back(&Dst);
1693
1694 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1695 CGM.getContext().VoidTy, Args);
1696 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1697 std::string Name = getName({"__kmpc_global_dtor_", ""});
1698 llvm::Function *Fn =
1699 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1700 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1701 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1702 Loc, Loc);
1703 // Create a scope with an artificial location for the body of this function.
1704 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1705 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1706 DtorCGF.GetAddrOfLocalVar(&Dst),
1707 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1708 DtorCGF.emitDestroy(
1709 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1710 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1711 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1712 DtorCGF.FinishFunction();
1713 Dtor = Fn;
1714 }
1715 // Do not emit init function if it is not required.
1716 if (!Ctor && !Dtor)
1717 return nullptr;
1718
1719 // Copying constructor for the threadprivate variable.
1720 // Must be NULL - reserved by runtime, but currently it requires that this
1721 // parameter is always NULL. Otherwise it fires assertion.
1722 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1723 if (Ctor == nullptr) {
1724 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1725 }
1726 if (Dtor == nullptr) {
1727 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1728 }
1729 if (!CGF) {
1730 auto *InitFunctionTy =
1731 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1732 std::string Name = getName({"__omp_threadprivate_init_", ""});
1733 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1734 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1735 CodeGenFunction InitCGF(CGM);
1736 FunctionArgList ArgList;
1737 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1738 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1739 Loc, Loc);
1740 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1741 InitCGF.FinishFunction();
1742 return InitFunction;
1743 }
1744 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1745 }
1746 return nullptr;
1747}
1748
1750 llvm::GlobalValue *GV) {
1751 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1752 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1753
1754 // We only need to handle active 'indirect' declare target functions.
1755 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1756 return;
1757
1758 // Get a mangled name to store the new device global in.
1759 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1761 SmallString<128> Name;
1762 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1763
1764 // We need to generate a new global to hold the address of the indirectly
1765 // called device function. Doing this allows us to keep the visibility and
1766 // linkage of the associated function unchanged while allowing the runtime to
1767 // access its value.
1768 llvm::GlobalValue *Addr = GV;
1769 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1770 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1771 CGM.getLLVMContext(),
1772 CGM.getModule().getDataLayout().getProgramAddressSpace());
1773 Addr = new llvm::GlobalVariable(
1774 CGM.getModule(), FnPtrTy,
1775 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1776 nullptr, llvm::GlobalValue::NotThreadLocal,
1777 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1778 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1779 }
1780
1781 // Register the indirect Vtable:
1782 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1783 // size field refers to the size of memory pointed to, not the size of
1784 // the pointer symbol itself (which is implicitly the size of a pointer).
1785 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1786 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1787 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1788 llvm::GlobalValue::WeakODRLinkage);
1789}
1790
1791void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1792 const VarDecl *VD) {
1793 // TODO: add logic to avoid duplicate vtable registrations per
1794 // translation unit; though for external linkage, this should no
1795 // longer be an issue - or at least we can avoid the issue by
1796 // checking for an existing offloading entry. But, perhaps the
1797 // better approach is to defer emission of the vtables and offload
1798 // entries until later (by tracking a list of items that need to be
1799 // emitted).
1800
1801 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1802
1803 // Generate a new externally visible global to point to the
1804 // internally visible vtable. Doing this allows us to keep the
1805 // visibility and linkage of the associated vtable unchanged while
1806 // allowing the runtime to access its value. The externally
1807 // visible global var needs to be emitted with a unique mangled
1808 // name that won't conflict with similarly named (internal)
1809 // vtables in other translation units.
1810
1811 // Register vtable with source location of dynamic object in map
1812 // clause.
1813 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1815 VTable->getName());
1816
1817 llvm::GlobalVariable *Addr = VTable;
1818 SmallString<128> AddrName;
1819 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1820 AddrName.append("addr");
1821
1822 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1823 Addr = new llvm::GlobalVariable(
1824 CGM.getModule(), VTable->getType(),
1825 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1826 AddrName,
1827 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1828 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1829 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1830 }
1831 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1832 AddrName, VTable,
1833 CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1834 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1835 llvm::GlobalValue::WeakODRLinkage);
1836}
1837
1840 const VarDecl *VD) {
1841 // Register C++ VTable to OpenMP Offload Entry if it's a new
1842 // CXXRecordDecl.
1843 if (CXXRecord && CXXRecord->isDynamicClass() &&
1844 !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1845 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1846 if (Res.second) {
1847 CGM.EmitVTable(CXXRecord);
1848 CodeGenVTables VTables = CGM.getVTables();
1849 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1850 assert(VTablesAddr && "Expected non-null VTable address");
1851 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1852 // Emit VTable for all the fields containing dynamic CXXRecord
1853 for (const FieldDecl *Field : CXXRecord->fields()) {
1854 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1856 }
1857 // Emit VTable for all dynamic parent class
1858 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1859 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1860 emitAndRegisterVTable(CGM, BaseDecl, VD);
1861 }
1862 }
1863 }
1864}
1865
1867 // Register VTable by scanning through the map clause of OpenMP target region.
1868 // Get CXXRecordDecl and VarDecl from Expr.
1869 auto GetVTableDecl = [](const Expr *E) {
1870 QualType VDTy = E->getType();
1871 CXXRecordDecl *CXXRecord = nullptr;
1872 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1873 VDTy = RefType->getPointeeType();
1874 if (VDTy->isPointerType())
1876 else
1877 CXXRecord = VDTy->getAsCXXRecordDecl();
1878
1879 const VarDecl *VD = nullptr;
1880 if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
1881 VD = cast<VarDecl>(DRE->getDecl());
1882 } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
1883 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
1884 if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
1885 VD = BaseVD;
1886 }
1887 }
1888 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1889 };
1890 // Collect VTable from OpenMP map clause.
1891 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1892 for (const auto *E : C->varlist()) {
1893 auto DeclPair = GetVTableDecl(E);
1894 // Ensure VD is not null
1895 if (DeclPair.second)
1896 emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1897 }
1898 }
1899}
1900
1902 QualType VarType,
1903 StringRef Name) {
1904 std::string Suffix = getName({"artificial", ""});
1905 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1906 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1907 VarLVType, Twine(Name).concat(Suffix).str());
1908 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1909 CGM.getTarget().isTLSSupported()) {
1910 GAddr->setThreadLocal(/*Val=*/true);
1911 return Address(GAddr, GAddr->getValueType(),
1912 CGM.getContext().getTypeAlignInChars(VarType));
1913 }
1914 std::string CacheSuffix = getName({"cache", ""});
1915 llvm::Value *Args[] = {
1918 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1919 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1920 /*isSigned=*/false),
1921 OMPBuilder.getOrCreateInternalVariable(
1922 CGM.VoidPtrPtrTy,
1923 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1924 return Address(
1926 CGF.EmitRuntimeCall(
1927 OMPBuilder.getOrCreateRuntimeFunction(
1928 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1929 Args),
1930 CGF.Builder.getPtrTy(0)),
1931 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1932}
1933
1935 const RegionCodeGenTy &ThenGen,
1936 const RegionCodeGenTy &ElseGen) {
1937 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1938
1939 // If the condition constant folds and can be elided, try to avoid emitting
1940 // the condition and the dead arm of the if/else.
1941 bool CondConstant;
1942 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1943 if (CondConstant)
1944 ThenGen(CGF);
1945 else
1946 ElseGen(CGF);
1947 return;
1948 }
1949
1950 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1951 // emit the conditional branch.
1952 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1953 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1954 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1955 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1956
1957 // Emit the 'then' code.
1958 CGF.EmitBlock(ThenBlock);
1959 ThenGen(CGF);
1960 CGF.EmitBranch(ContBlock);
1961 // Emit the 'else' code if present.
1962 // There is no need to emit line number for unconditional branch.
1964 CGF.EmitBlock(ElseBlock);
1965 ElseGen(CGF);
1966 // There is no need to emit line number for unconditional branch.
1968 CGF.EmitBranch(ContBlock);
1969 // Emit the continuation block for code after the if.
1970 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1971}
1972
1974 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1975 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1976 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1977 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1978 if (!CGF.HaveInsertPoint())
1979 return;
1980 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1981 auto &M = CGM.getModule();
1982 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1983 this](CodeGenFunction &CGF, PrePostActionTy &) {
1984 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1985 llvm::Value *Args[] = {
1986 RTLoc,
1987 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1988 OutlinedFn};
1990 RealArgs.append(std::begin(Args), std::end(Args));
1991 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1992
1993 llvm::FunctionCallee RTLFn =
1994 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1995 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1996 };
1997 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1998 this](CodeGenFunction &CGF, PrePostActionTy &) {
2000 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2001 // Build calls:
2002 // __kmpc_serialized_parallel(&Loc, GTid);
2003 llvm::Value *Args[] = {RTLoc, ThreadID};
2004 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2005 M, OMPRTL___kmpc_serialized_parallel),
2006 Args);
2007
2008 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2009 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2010 RawAddress ZeroAddrBound =
2012 /*Name=*/".bound.zero.addr");
2013 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2015 // ThreadId for serialized parallels is 0.
2016 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
2017 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2018 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2019
2020 // Ensure we do not inline the function. This is trivially true for the ones
2021 // passed to __kmpc_fork_call but the ones called in serialized regions
2022 // could be inlined. This is not a perfect but it is closer to the invariant
2023 // we want, namely, every data environment starts with a new function.
2024 // TODO: We should pass the if condition to the runtime function and do the
2025 // handling there. Much cleaner code.
2026 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2027 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2028 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2029
2030 // __kmpc_end_serialized_parallel(&Loc, GTid);
2031 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2032 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2033 M, OMPRTL___kmpc_end_serialized_parallel),
2034 EndArgs);
2035 };
2036 if (IfCond) {
2037 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2038 } else {
2039 RegionCodeGenTy ThenRCG(ThenGen);
2040 ThenRCG(CGF);
2041 }
2042}
2043
2044// If we're inside an (outlined) parallel region, use the region info's
2045// thread-ID variable (it is passed in a first argument of the outlined function
2046// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2047// regular serial code region, get thread ID by calling kmp_int32
2048// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2049// return the address of that temp.
2051 SourceLocation Loc) {
2052 if (auto *OMPRegionInfo =
2053 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2054 if (OMPRegionInfo->getThreadIDVariable())
2055 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2056
2057 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2058 QualType Int32Ty =
2059 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2060 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2061 CGF.EmitStoreOfScalar(ThreadID,
2062 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2063
2064 return ThreadIDTemp;
2065}
2066
2067llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2068 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2069 std::string Name = getName({Prefix, "var"});
2070 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2071}
2072
2073namespace {
2074/// Common pre(post)-action for different OpenMP constructs.
2075class CommonActionTy final : public PrePostActionTy {
2076 llvm::FunctionCallee EnterCallee;
2077 ArrayRef<llvm::Value *> EnterArgs;
2078 llvm::FunctionCallee ExitCallee;
2079 ArrayRef<llvm::Value *> ExitArgs;
2080 bool Conditional;
2081 llvm::BasicBlock *ContBlock = nullptr;
2082
2083public:
2084 CommonActionTy(llvm::FunctionCallee EnterCallee,
2085 ArrayRef<llvm::Value *> EnterArgs,
2086 llvm::FunctionCallee ExitCallee,
2087 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2088 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2089 ExitArgs(ExitArgs), Conditional(Conditional) {}
2090 void Enter(CodeGenFunction &CGF) override {
2091 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2092 if (Conditional) {
2093 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2094 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2095 ContBlock = CGF.createBasicBlock("omp_if.end");
2096 // Generate the branch (If-stmt)
2097 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2098 CGF.EmitBlock(ThenBlock);
2099 }
2100 }
2101 void Done(CodeGenFunction &CGF) {
2102 // Emit the rest of blocks/branches
2103 CGF.EmitBranch(ContBlock);
2104 CGF.EmitBlock(ContBlock, true);
2105 }
2106 void Exit(CodeGenFunction &CGF) override {
2107 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2108 }
2109};
2110} // anonymous namespace
2111
2113 StringRef CriticalName,
2114 const RegionCodeGenTy &CriticalOpGen,
2115 SourceLocation Loc, const Expr *Hint) {
2116 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2117 // CriticalOpGen();
2118 // __kmpc_end_critical(ident_t *, gtid, Lock);
2119 // Prepare arguments and build a call to __kmpc_critical
2120 if (!CGF.HaveInsertPoint())
2121 return;
2122 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2123 CGM.getModule(),
2124 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2125 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2126 unsigned LockVarArgIdx = 2;
2127 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2128 RuntimeFcn.getFunctionType()
2129 ->getParamType(LockVarArgIdx)
2130 ->getPointerAddressSpace())
2131 LockVar = CGF.Builder.CreateAddrSpaceCast(
2132 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2133 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2134 LockVar};
2135 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2136 std::end(Args));
2137 if (Hint) {
2138 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2139 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2140 }
2141 CommonActionTy Action(RuntimeFcn, EnterArgs,
2142 OMPBuilder.getOrCreateRuntimeFunction(
2143 CGM.getModule(), OMPRTL___kmpc_end_critical),
2144 Args);
2145 CriticalOpGen.setAction(Action);
2146 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2147}
2148
2150 const RegionCodeGenTy &MasterOpGen,
2151 SourceLocation Loc) {
2152 if (!CGF.HaveInsertPoint())
2153 return;
2154 // if(__kmpc_master(ident_t *, gtid)) {
2155 // MasterOpGen();
2156 // __kmpc_end_master(ident_t *, gtid);
2157 // }
2158 // Prepare arguments and build a call to __kmpc_master
2159 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2160 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2161 CGM.getModule(), OMPRTL___kmpc_master),
2162 Args,
2163 OMPBuilder.getOrCreateRuntimeFunction(
2164 CGM.getModule(), OMPRTL___kmpc_end_master),
2165 Args,
2166 /*Conditional=*/true);
2167 MasterOpGen.setAction(Action);
2168 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2169 Action.Done(CGF);
2170}
2171
2173 const RegionCodeGenTy &MaskedOpGen,
2174 SourceLocation Loc, const Expr *Filter) {
2175 if (!CGF.HaveInsertPoint())
2176 return;
2177 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2178 // MaskedOpGen();
2179 // __kmpc_end_masked(iden_t *, gtid);
2180 // }
2181 // Prepare arguments and build a call to __kmpc_masked
2182 llvm::Value *FilterVal = Filter
2183 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2184 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2185 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2186 FilterVal};
2187 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2188 getThreadID(CGF, Loc)};
2189 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2190 CGM.getModule(), OMPRTL___kmpc_masked),
2191 Args,
2192 OMPBuilder.getOrCreateRuntimeFunction(
2193 CGM.getModule(), OMPRTL___kmpc_end_masked),
2194 ArgsEnd,
2195 /*Conditional=*/true);
2196 MaskedOpGen.setAction(Action);
2197 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2198 Action.Done(CGF);
2199}
2200
2202 SourceLocation Loc) {
2203 if (!CGF.HaveInsertPoint())
2204 return;
2205 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2206 OMPBuilder.createTaskyield(CGF.Builder);
2207 } else {
2208 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2209 llvm::Value *Args[] = {
2210 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2211 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2212 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2213 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2214 Args);
2215 }
2216
2217 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2218 Region->emitUntiedSwitch(CGF);
2219}
2220
2222 const RegionCodeGenTy &TaskgroupOpGen,
2223 SourceLocation Loc) {
2224 if (!CGF.HaveInsertPoint())
2225 return;
2226 // __kmpc_taskgroup(ident_t *, gtid);
2227 // TaskgroupOpGen();
2228 // __kmpc_end_taskgroup(ident_t *, gtid);
2229 // Prepare arguments and build a call to __kmpc_taskgroup
2230 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2232 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2233 Args,
2234 OMPBuilder.getOrCreateRuntimeFunction(
2235 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2236 Args);
2237 TaskgroupOpGen.setAction(Action);
2238 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2239}
2240
2241/// Given an array of pointers to variables, project the address of a
2242/// given variable.
2244 unsigned Index, const VarDecl *Var) {
2245 // Pull out the pointer to the variable.
2246 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2247 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2248
2249 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2250 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2251}
2252
2254 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2255 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2256 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2257 SourceLocation Loc) {
2258 ASTContext &C = CGM.getContext();
2259 // void copy_func(void *LHSArg, void *RHSArg);
2260 FunctionArgList Args;
2261 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2263 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2265 Args.push_back(&LHSArg);
2266 Args.push_back(&RHSArg);
2267 const auto &CGFI =
2268 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2269 std::string Name =
2270 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2271 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2272 llvm::GlobalValue::InternalLinkage, Name,
2273 &CGM.getModule());
2275 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2276 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
2277 Fn->setDoesNotRecurse();
2278 CodeGenFunction CGF(CGM);
2279 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2280 // Dest = (void*[n])(LHSArg);
2281 // Src = (void*[n])(RHSArg);
2283 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2284 CGF.Builder.getPtrTy(0)),
2285 ArgsElemType, CGF.getPointerAlign());
2287 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2288 CGF.Builder.getPtrTy(0)),
2289 ArgsElemType, CGF.getPointerAlign());
2290 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2291 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2292 // ...
2293 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2294 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2295 const auto *DestVar =
2296 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2297 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2298
2299 const auto *SrcVar =
2300 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2301 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2302
2303 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2304 QualType Type = VD->getType();
2305 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2306 }
2307 CGF.FinishFunction();
2308 return Fn;
2309}
2310
2312 const RegionCodeGenTy &SingleOpGen,
2313 SourceLocation Loc,
2314 ArrayRef<const Expr *> CopyprivateVars,
2315 ArrayRef<const Expr *> SrcExprs,
2316 ArrayRef<const Expr *> DstExprs,
2317 ArrayRef<const Expr *> AssignmentOps) {
2318 if (!CGF.HaveInsertPoint())
2319 return;
2320 assert(CopyprivateVars.size() == SrcExprs.size() &&
2321 CopyprivateVars.size() == DstExprs.size() &&
2322 CopyprivateVars.size() == AssignmentOps.size());
2323 ASTContext &C = CGM.getContext();
2324 // int32 did_it = 0;
2325 // if(__kmpc_single(ident_t *, gtid)) {
2326 // SingleOpGen();
2327 // __kmpc_end_single(ident_t *, gtid);
2328 // did_it = 1;
2329 // }
2330 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2331 // <copy_func>, did_it);
2332
2333 Address DidIt = Address::invalid();
2334 if (!CopyprivateVars.empty()) {
2335 // int32 did_it = 0;
2336 QualType KmpInt32Ty =
2337 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2338 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2339 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2340 }
2341 // Prepare arguments and build a call to __kmpc_single
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2343 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2344 CGM.getModule(), OMPRTL___kmpc_single),
2345 Args,
2346 OMPBuilder.getOrCreateRuntimeFunction(
2347 CGM.getModule(), OMPRTL___kmpc_end_single),
2348 Args,
2349 /*Conditional=*/true);
2350 SingleOpGen.setAction(Action);
2351 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2352 if (DidIt.isValid()) {
2353 // did_it = 1;
2354 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2355 }
2356 Action.Done(CGF);
2357 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2358 // <copy_func>, did_it);
2359 if (DidIt.isValid()) {
2360 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2361 QualType CopyprivateArrayTy = C.getConstantArrayType(
2362 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2363 /*IndexTypeQuals=*/0);
2364 // Create a list of all private variables for copyprivate.
2365 Address CopyprivateList =
2366 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2367 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2368 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2369 CGF.Builder.CreateStore(
2371 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2372 CGF.VoidPtrTy),
2373 Elem);
2374 }
2375 // Build function that copies private values from single region to all other
2376 // threads in the corresponding parallel region.
2377 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2378 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2379 SrcExprs, DstExprs, AssignmentOps, Loc);
2380 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2382 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2383 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2384 llvm::Value *Args[] = {
2385 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2386 getThreadID(CGF, Loc), // i32 <gtid>
2387 BufSize, // size_t <buf_size>
2388 CL.emitRawPointer(CGF), // void *<copyprivate list>
2389 CpyFn, // void (*) (void *, void *) <copy_func>
2390 DidItVal // i32 did_it
2391 };
2392 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2393 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2394 Args);
2395 }
2396}
2397
2399 const RegionCodeGenTy &OrderedOpGen,
2400 SourceLocation Loc, bool IsThreads) {
2401 if (!CGF.HaveInsertPoint())
2402 return;
2403 // __kmpc_ordered(ident_t *, gtid);
2404 // OrderedOpGen();
2405 // __kmpc_end_ordered(ident_t *, gtid);
2406 // Prepare arguments and build a call to __kmpc_ordered
2407 if (IsThreads) {
2408 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2409 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2410 CGM.getModule(), OMPRTL___kmpc_ordered),
2411 Args,
2412 OMPBuilder.getOrCreateRuntimeFunction(
2413 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2414 Args);
2415 OrderedOpGen.setAction(Action);
2416 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2417 return;
2418 }
2419 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2420}
2421
2423 unsigned Flags;
2424 if (Kind == OMPD_for)
2425 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2426 else if (Kind == OMPD_sections)
2427 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2428 else if (Kind == OMPD_single)
2429 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2430 else if (Kind == OMPD_barrier)
2431 Flags = OMP_IDENT_BARRIER_EXPL;
2432 else
2433 Flags = OMP_IDENT_BARRIER_IMPL;
2434 return Flags;
2435}
2436
2438 CodeGenFunction &CGF, const OMPLoopDirective &S,
2439 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2440 // Check if the loop directive is actually a doacross loop directive. In this
2441 // case choose static, 1 schedule.
2442 if (llvm::any_of(
2443 S.getClausesOfKind<OMPOrderedClause>(),
2444 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2445 ScheduleKind = OMPC_SCHEDULE_static;
2446 // Chunk size is 1 in this case.
2447 llvm::APInt ChunkSize(32, 1);
2448 ChunkExpr = IntegerLiteral::Create(
2449 CGF.getContext(), ChunkSize,
2450 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2451 SourceLocation());
2452 }
2453}
2454
2456 OpenMPDirectiveKind Kind, bool EmitChecks,
2457 bool ForceSimpleCall) {
2458 // Check if we should use the OMPBuilder
2459 auto *OMPRegionInfo =
2460 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2461 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2462 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2463 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2464 EmitChecks));
2465 CGF.Builder.restoreIP(AfterIP);
2466 return;
2467 }
2468
2469 if (!CGF.HaveInsertPoint())
2470 return;
2471 // Build call __kmpc_cancel_barrier(loc, thread_id);
2472 // Build call __kmpc_barrier(loc, thread_id);
2473 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2474 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2475 // thread_id);
2476 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2477 getThreadID(CGF, Loc)};
2478 if (OMPRegionInfo) {
2479 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2480 llvm::Value *Result = CGF.EmitRuntimeCall(
2481 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2482 OMPRTL___kmpc_cancel_barrier),
2483 Args);
2484 if (EmitChecks) {
2485 // if (__kmpc_cancel_barrier()) {
2486 // exit from construct;
2487 // }
2488 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2489 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2490 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2491 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2492 CGF.EmitBlock(ExitBB);
2493 // exit from construct;
2494 CodeGenFunction::JumpDest CancelDestination =
2495 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2496 CGF.EmitBranchThroughCleanup(CancelDestination);
2497 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2498 }
2499 return;
2500 }
2501 }
2502 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2503 CGM.getModule(), OMPRTL___kmpc_barrier),
2504 Args);
2505}
2506
2508 Expr *ME, bool IsFatal) {
2509 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2510 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2511 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2512 // *message)
2513 llvm::Value *Args[] = {
2514 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2515 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2516 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2517 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2518 CGM.getModule(), OMPRTL___kmpc_error),
2519 Args);
2520}
2521
2522/// Map the OpenMP loop schedule to the runtime enumeration.
2523static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2524 bool Chunked, bool Ordered) {
2525 switch (ScheduleKind) {
2526 case OMPC_SCHEDULE_static:
2527 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2528 : (Ordered ? OMP_ord_static : OMP_sch_static);
2529 case OMPC_SCHEDULE_dynamic:
2530 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2531 case OMPC_SCHEDULE_guided:
2532 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2533 case OMPC_SCHEDULE_runtime:
2534 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2535 case OMPC_SCHEDULE_auto:
2536 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2538 assert(!Chunked && "chunk was specified but schedule kind not known");
2539 return Ordered ? OMP_ord_static : OMP_sch_static;
2540 }
2541 llvm_unreachable("Unexpected runtime schedule");
2542}
2543
2544/// Map the OpenMP distribute schedule to the runtime enumeration.
2545static OpenMPSchedType
2547 // only static is allowed for dist_schedule
2548 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2549}
2550
2552 bool Chunked) const {
2553 OpenMPSchedType Schedule =
2554 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2555 return Schedule == OMP_sch_static;
2556}
2557
2559 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2560 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2561 return Schedule == OMP_dist_sch_static;
2562}
2563
2565 bool Chunked) const {
2566 OpenMPSchedType Schedule =
2567 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2568 return Schedule == OMP_sch_static_chunked;
2569}
2570
2572 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2573 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2574 return Schedule == OMP_dist_sch_static_chunked;
2575}
2576
2578 OpenMPSchedType Schedule =
2579 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2580 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2581 return Schedule != OMP_sch_static;
2582}
2583
2584static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2587 int Modifier = 0;
2588 switch (M1) {
2589 case OMPC_SCHEDULE_MODIFIER_monotonic:
2590 Modifier = OMP_sch_modifier_monotonic;
2591 break;
2592 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2593 Modifier = OMP_sch_modifier_nonmonotonic;
2594 break;
2595 case OMPC_SCHEDULE_MODIFIER_simd:
2596 if (Schedule == OMP_sch_static_chunked)
2597 Schedule = OMP_sch_static_balanced_chunked;
2598 break;
2601 break;
2602 }
2603 switch (M2) {
2604 case OMPC_SCHEDULE_MODIFIER_monotonic:
2605 Modifier = OMP_sch_modifier_monotonic;
2606 break;
2607 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2608 Modifier = OMP_sch_modifier_nonmonotonic;
2609 break;
2610 case OMPC_SCHEDULE_MODIFIER_simd:
2611 if (Schedule == OMP_sch_static_chunked)
2612 Schedule = OMP_sch_static_balanced_chunked;
2613 break;
2616 break;
2617 }
2618 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2619 // If the static schedule kind is specified or if the ordered clause is
2620 // specified, and if the nonmonotonic modifier is not specified, the effect is
2621 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2622 // modifier is specified, the effect is as if the nonmonotonic modifier is
2623 // specified.
2624 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2625 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2626 Schedule == OMP_sch_static_balanced_chunked ||
2627 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2628 Schedule == OMP_dist_sch_static_chunked ||
2629 Schedule == OMP_dist_sch_static))
2630 Modifier = OMP_sch_modifier_nonmonotonic;
2631 }
2632 return Schedule | Modifier;
2633}
2634
2637 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2638 bool Ordered, const DispatchRTInput &DispatchValues) {
2639 if (!CGF.HaveInsertPoint())
2640 return;
2641 OpenMPSchedType Schedule = getRuntimeSchedule(
2642 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2643 assert(Ordered ||
2644 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2645 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2646 Schedule != OMP_sch_static_balanced_chunked));
2647 // Call __kmpc_dispatch_init(
2648 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2649 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2650 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2651
2652 // If the Chunk was not specified in the clause - use default value 1.
2653 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2654 : CGF.Builder.getIntN(IVSize, 1);
2655 llvm::Value *Args[] = {
2656 emitUpdateLocation(CGF, Loc),
2657 getThreadID(CGF, Loc),
2658 CGF.Builder.getInt32(addMonoNonMonoModifier(
2659 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2660 DispatchValues.LB, // Lower
2661 DispatchValues.UB, // Upper
2662 CGF.Builder.getIntN(IVSize, 1), // Stride
2663 Chunk // Chunk
2664 };
2665 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2666 Args);
2667}
2668
2670 SourceLocation Loc) {
2671 if (!CGF.HaveInsertPoint())
2672 return;
2673 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2674 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2676}
2677
2679 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2680 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2682 const CGOpenMPRuntime::StaticRTInput &Values) {
2683 if (!CGF.HaveInsertPoint())
2684 return;
2685
2686 assert(!Values.Ordered);
2687 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2688 Schedule == OMP_sch_static_balanced_chunked ||
2689 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2690 Schedule == OMP_dist_sch_static ||
2691 Schedule == OMP_dist_sch_static_chunked);
2692
2693 // Call __kmpc_for_static_init(
2694 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2695 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2696 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2697 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2698 llvm::Value *Chunk = Values.Chunk;
2699 if (Chunk == nullptr) {
2700 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2701 Schedule == OMP_dist_sch_static) &&
2702 "expected static non-chunked schedule");
2703 // If the Chunk was not specified in the clause - use default value 1.
2704 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2705 } else {
2706 assert((Schedule == OMP_sch_static_chunked ||
2707 Schedule == OMP_sch_static_balanced_chunked ||
2708 Schedule == OMP_ord_static_chunked ||
2709 Schedule == OMP_dist_sch_static_chunked) &&
2710 "expected static chunked schedule");
2711 }
2712 llvm::Value *Args[] = {
2713 UpdateLocation,
2714 ThreadId,
2715 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2716 M2)), // Schedule type
2717 Values.IL.emitRawPointer(CGF), // &isLastIter
2718 Values.LB.emitRawPointer(CGF), // &LB
2719 Values.UB.emitRawPointer(CGF), // &UB
2720 Values.ST.emitRawPointer(CGF), // &Stride
2721 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2722 Chunk // Chunk
2723 };
2724 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2725}
2726
2728 SourceLocation Loc,
2729 OpenMPDirectiveKind DKind,
2730 const OpenMPScheduleTy &ScheduleKind,
2731 const StaticRTInput &Values) {
2732 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2733 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2734 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2735 "Expected loop-based or sections-based directive.");
2736 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2738 ? OMP_IDENT_WORK_LOOP
2739 : OMP_IDENT_WORK_SECTIONS);
2740 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2741 llvm::FunctionCallee StaticInitFunction =
2742 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2743 false);
2745 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2746 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2747}
2748
2752 const CGOpenMPRuntime::StaticRTInput &Values) {
2753 OpenMPSchedType ScheduleNum =
2754 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2755 llvm::Value *UpdatedLocation =
2756 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2757 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2758 llvm::FunctionCallee StaticInitFunction;
2759 bool isGPUDistribute =
2760 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2761 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2762 Values.IVSize, Values.IVSigned, isGPUDistribute);
2763
2764 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2765 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2767}
2768
2770 SourceLocation Loc,
2771 OpenMPDirectiveKind DKind) {
2772 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2773 DKind == OMPD_sections) &&
2774 "Expected distribute, for, or sections directive kind");
2775 if (!CGF.HaveInsertPoint())
2776 return;
2777 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2778 llvm::Value *Args[] = {
2779 emitUpdateLocation(CGF, Loc,
2781 (DKind == OMPD_target_teams_loop)
2782 ? OMP_IDENT_WORK_DISTRIBUTE
2783 : isOpenMPLoopDirective(DKind)
2784 ? OMP_IDENT_WORK_LOOP
2785 : OMP_IDENT_WORK_SECTIONS),
2786 getThreadID(CGF, Loc)};
2788 if (isOpenMPDistributeDirective(DKind) &&
2789 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2790 CGF.EmitRuntimeCall(
2791 OMPBuilder.getOrCreateRuntimeFunction(
2792 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2793 Args);
2794 else
2795 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2796 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2797 Args);
2798}
2799
2801 SourceLocation Loc,
2802 unsigned IVSize,
2803 bool IVSigned) {
2804 if (!CGF.HaveInsertPoint())
2805 return;
2806 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2807 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2808 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2809 Args);
2810}
2811
2813 SourceLocation Loc, unsigned IVSize,
2814 bool IVSigned, Address IL,
2815 Address LB, Address UB,
2816 Address ST) {
2817 // Call __kmpc_dispatch_next(
2818 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2819 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2820 // kmp_int[32|64] *p_stride);
2821 llvm::Value *Args[] = {
2822 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2823 IL.emitRawPointer(CGF), // &isLastIter
2824 LB.emitRawPointer(CGF), // &Lower
2825 UB.emitRawPointer(CGF), // &Upper
2826 ST.emitRawPointer(CGF) // &Stride
2827 };
2828 llvm::Value *Call = CGF.EmitRuntimeCall(
2829 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2830 return CGF.EmitScalarConversion(
2831 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2832 CGF.getContext().BoolTy, Loc);
2833}
2834
2836 const Expr *Message,
2837 SourceLocation Loc) {
2838 if (!Message)
2839 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2840 return CGF.EmitScalarExpr(Message);
2841}
2842
2843llvm::Value *
2845 SourceLocation Loc) {
2846 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2847 // as if sev-level is fatal."
2848 return llvm::ConstantInt::get(CGM.Int32Ty,
2849 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2850}
2851
2853 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2855 SourceLocation SeverityLoc, const Expr *Message,
2856 SourceLocation MessageLoc) {
2857 if (!CGF.HaveInsertPoint())
2858 return;
2860 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2861 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2862 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2863 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2864 // messsage) if strict modifier is used.
2865 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2866 if (Modifier == OMPC_NUMTHREADS_strict) {
2867 FnID = OMPRTL___kmpc_push_num_threads_strict;
2868 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2869 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2870 }
2871 CGF.EmitRuntimeCall(
2872 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2873}
2874
2876 ProcBindKind ProcBind,
2877 SourceLocation Loc) {
2878 if (!CGF.HaveInsertPoint())
2879 return;
2880 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2881 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2882 llvm::Value *Args[] = {
2883 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2884 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2887 Args);
2888}
2889
2891 SourceLocation Loc, llvm::AtomicOrdering AO) {
2892 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2893 OMPBuilder.createFlush(CGF.Builder);
2894 } else {
2895 if (!CGF.HaveInsertPoint())
2896 return;
2897 // Build call void __kmpc_flush(ident_t *loc)
2898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2899 CGM.getModule(), OMPRTL___kmpc_flush),
2900 emitUpdateLocation(CGF, Loc));
2901 }
2902}
2903
2904namespace {
2905/// Indexes of fields for type kmp_task_t.
2906enum KmpTaskTFields {
2907 /// List of shared variables.
2908 KmpTaskTShareds,
2909 /// Task routine.
2910 KmpTaskTRoutine,
2911 /// Partition id for the untied tasks.
2912 KmpTaskTPartId,
2913 /// Function with call of destructors for private variables.
2914 Data1,
2915 /// Task priority.
2916 Data2,
2917 /// (Taskloops only) Lower bound.
2918 KmpTaskTLowerBound,
2919 /// (Taskloops only) Upper bound.
2920 KmpTaskTUpperBound,
2921 /// (Taskloops only) Stride.
2922 KmpTaskTStride,
2923 /// (Taskloops only) Is last iteration flag.
2924 KmpTaskTLastIter,
2925 /// (Taskloops only) Reduction data.
2926 KmpTaskTReductions,
2927};
2928} // anonymous namespace
2929
2931 // If we are in simd mode or there are no entries, we don't need to do
2932 // anything.
2933 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2934 return;
2935
2936 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2937 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2938 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2939 SourceLocation Loc;
2940 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2941 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2942 E = CGM.getContext().getSourceManager().fileinfo_end();
2943 I != E; ++I) {
2944 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2945 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2946 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2947 I->getFirst(), EntryInfo.Line, 1);
2948 break;
2949 }
2950 }
2951 }
2952 switch (Kind) {
2953 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2954 CGM.getDiags().Report(Loc,
2955 diag::err_target_region_offloading_entry_incorrect)
2956 << EntryInfo.ParentName;
2957 } break;
2958 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2959 CGM.getDiags().Report(
2960 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2961 << EntryInfo.ParentName;
2962 } break;
2963 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2964 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2965 } break;
2966 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2967 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2968 DiagnosticsEngine::Error, "Offloading entry for indirect declare "
2969 "target variable is incorrect: the "
2970 "address is invalid.");
2971 CGM.getDiags().Report(DiagID);
2972 } break;
2973 }
2974 };
2975
2976 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2977}
2978
2980 if (!KmpRoutineEntryPtrTy) {
2981 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2982 ASTContext &C = CGM.getContext();
2983 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2985 KmpRoutineEntryPtrQTy = C.getPointerType(
2986 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2987 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2988 }
2989}
2990
2991namespace {
2992struct PrivateHelpersTy {
2993 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2994 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2995 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2996 PrivateElemInit(PrivateElemInit) {}
2997 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2998 const Expr *OriginalRef = nullptr;
2999 const VarDecl *Original = nullptr;
3000 const VarDecl *PrivateCopy = nullptr;
3001 const VarDecl *PrivateElemInit = nullptr;
3002 bool isLocalPrivate() const {
3003 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3004 }
3005};
3006typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3007} // anonymous namespace
3008
3009static bool isAllocatableDecl(const VarDecl *VD) {
3010 const VarDecl *CVD = VD->getCanonicalDecl();
3011 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3012 return false;
3013 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3014 // Use the default allocation.
3015 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3016 !AA->getAllocator());
3017}
3018
3019static RecordDecl *
3021 if (!Privates.empty()) {
3022 ASTContext &C = CGM.getContext();
3023 // Build struct .kmp_privates_t. {
3024 // /* private vars */
3025 // };
3026 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3027 RD->startDefinition();
3028 for (const auto &Pair : Privates) {
3029 const VarDecl *VD = Pair.second.Original;
3031 // If the private variable is a local variable with lvalue ref type,
3032 // allocate the pointer instead of the pointee type.
3033 if (Pair.second.isLocalPrivate()) {
3034 if (VD->getType()->isLValueReferenceType())
3035 Type = C.getPointerType(Type);
3036 if (isAllocatableDecl(VD))
3037 Type = C.getPointerType(Type);
3038 }
3040 if (VD->hasAttrs()) {
3041 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3042 E(VD->getAttrs().end());
3043 I != E; ++I)
3044 FD->addAttr(*I);
3045 }
3046 }
3047 RD->completeDefinition();
3048 return RD;
3049 }
3050 return nullptr;
3051}
3052
3053static RecordDecl *
3055 QualType KmpInt32Ty,
3056 QualType KmpRoutineEntryPointerQTy) {
3057 ASTContext &C = CGM.getContext();
3058 // Build struct kmp_task_t {
3059 // void * shareds;
3060 // kmp_routine_entry_t routine;
3061 // kmp_int32 part_id;
3062 // kmp_cmplrdata_t data1;
3063 // kmp_cmplrdata_t data2;
3064 // For taskloops additional fields:
3065 // kmp_uint64 lb;
3066 // kmp_uint64 ub;
3067 // kmp_int64 st;
3068 // kmp_int32 liter;
3069 // void * reductions;
3070 // };
3071 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
3072 UD->startDefinition();
3073 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3074 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3075 UD->completeDefinition();
3076 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
3077 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3078 RD->startDefinition();
3079 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3080 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3081 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3082 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3083 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3084 if (isOpenMPTaskLoopDirective(Kind)) {
3085 QualType KmpUInt64Ty =
3086 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3087 QualType KmpInt64Ty =
3088 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3089 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3090 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3091 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3092 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3093 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3094 }
3095 RD->completeDefinition();
3096 return RD;
3097}
3098
3099static RecordDecl *
3102 ASTContext &C = CGM.getContext();
3103 // Build struct kmp_task_t_with_privates {
3104 // kmp_task_t task_data;
3105 // .kmp_privates_t. privates;
3106 // };
3107 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3108 RD->startDefinition();
3109 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3110 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3111 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
3112 RD->completeDefinition();
3113 return RD;
3114}
3115
3116/// Emit a proxy function which accepts kmp_task_t as the second
3117/// argument.
3118/// \code
3119/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3120/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3121/// For taskloops:
3122/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3123/// tt->reductions, tt->shareds);
3124/// return 0;
3125/// }
3126/// \endcode
3127static llvm::Function *
3129 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3130 QualType KmpTaskTWithPrivatesPtrQTy,
3131 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3132 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3133 llvm::Value *TaskPrivatesMap) {
3134 ASTContext &C = CGM.getContext();
3135 FunctionArgList Args;
3136 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3138 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3139 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3141 Args.push_back(&GtidArg);
3142 Args.push_back(&TaskTypeArg);
3143 const auto &TaskEntryFnInfo =
3144 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3145 llvm::FunctionType *TaskEntryTy =
3146 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3147 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3148 auto *TaskEntry = llvm::Function::Create(
3149 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3150 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3151 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3152 TaskEntry->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3153 TaskEntry->setDoesNotRecurse();
3154 CodeGenFunction CGF(CGM);
3155 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3156 Loc, Loc);
3157
3158 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3159 // tt,
3160 // For taskloops:
3161 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3162 // tt->task_data.shareds);
3163 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3164 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3165 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3166 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3167 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3168 const auto *KmpTaskTWithPrivatesQTyRD =
3169 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3170 LValue Base =
3171 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3172 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3173 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3174 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3175 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3176
3177 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3178 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3179 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3180 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3181 CGF.ConvertTypeForMem(SharedsPtrTy));
3182
3183 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3184 llvm::Value *PrivatesParam;
3185 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3186 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3187 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3188 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3189 } else {
3190 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3191 }
3192
3193 llvm::Value *CommonArgs[] = {
3194 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3195 CGF.Builder
3196 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3197 CGF.VoidPtrTy, CGF.Int8Ty)
3198 .emitRawPointer(CGF)};
3199 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3200 std::end(CommonArgs));
3201 if (isOpenMPTaskLoopDirective(Kind)) {
3202 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3203 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3204 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3205 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3206 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3207 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3208 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3209 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3210 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3211 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3212 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3213 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3214 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3215 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3216 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3217 CallArgs.push_back(LBParam);
3218 CallArgs.push_back(UBParam);
3219 CallArgs.push_back(StParam);
3220 CallArgs.push_back(LIParam);
3221 CallArgs.push_back(RParam);
3222 }
3223 CallArgs.push_back(SharedsParam);
3224
3225 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3226 CallArgs);
3227 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3228 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3229 CGF.FinishFunction();
3230 return TaskEntry;
3231}
3232
3234 SourceLocation Loc,
3235 QualType KmpInt32Ty,
3236 QualType KmpTaskTWithPrivatesPtrQTy,
3237 QualType KmpTaskTWithPrivatesQTy) {
3238 ASTContext &C = CGM.getContext();
3239 FunctionArgList Args;
3240 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3242 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3243 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3245 Args.push_back(&GtidArg);
3246 Args.push_back(&TaskTypeArg);
3247 const auto &DestructorFnInfo =
3248 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3249 llvm::FunctionType *DestructorFnTy =
3250 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3251 std::string Name =
3252 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3253 auto *DestructorFn =
3254 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3255 Name, &CGM.getModule());
3256 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3257 DestructorFnInfo);
3258 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3259 DestructorFn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3260 DestructorFn->setDoesNotRecurse();
3261 CodeGenFunction CGF(CGM);
3262 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3263 Args, Loc, Loc);
3264
3265 LValue Base = CGF.EmitLoadOfPointerLValue(
3266 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3267 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3268 const auto *KmpTaskTWithPrivatesQTyRD =
3269 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3270 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3271 Base = CGF.EmitLValueForField(Base, *FI);
3272 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3273 if (QualType::DestructionKind DtorKind =
3274 Field->getType().isDestructedType()) {
3275 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3276 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3277 }
3278 }
3279 CGF.FinishFunction();
3280 return DestructorFn;
3281}
3282
3283/// Emit a privates mapping function for correct handling of private and
3284/// firstprivate variables.
3285/// \code
3286/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3287/// **noalias priv1,..., <tyn> **noalias privn) {
3288/// *priv1 = &.privates.priv1;
3289/// ...;
3290/// *privn = &.privates.privn;
3291/// }
3292/// \endcode
3293static llvm::Value *
3295 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3297 ASTContext &C = CGM.getContext();
3298 FunctionArgList Args;
3299 ImplicitParamDecl TaskPrivatesArg(
3300 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3301 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3303 Args.push_back(&TaskPrivatesArg);
3304 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3305 unsigned Counter = 1;
3306 for (const Expr *E : Data.PrivateVars) {
3307 Args.push_back(ImplicitParamDecl::Create(
3308 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3309 C.getPointerType(C.getPointerType(E->getType()))
3310 .withConst()
3311 .withRestrict(),
3313 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3314 PrivateVarsPos[VD] = Counter;
3315 ++Counter;
3316 }
3317 for (const Expr *E : Data.FirstprivateVars) {
3318 Args.push_back(ImplicitParamDecl::Create(
3319 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3320 C.getPointerType(C.getPointerType(E->getType()))
3321 .withConst()
3322 .withRestrict(),
3324 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3325 PrivateVarsPos[VD] = Counter;
3326 ++Counter;
3327 }
3328 for (const Expr *E : Data.LastprivateVars) {
3329 Args.push_back(ImplicitParamDecl::Create(
3330 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3331 C.getPointerType(C.getPointerType(E->getType()))
3332 .withConst()
3333 .withRestrict(),
3335 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3336 PrivateVarsPos[VD] = Counter;
3337 ++Counter;
3338 }
3339 for (const VarDecl *VD : Data.PrivateLocals) {
3341 if (VD->getType()->isLValueReferenceType())
3342 Ty = C.getPointerType(Ty);
3343 if (isAllocatableDecl(VD))
3344 Ty = C.getPointerType(Ty);
3345 Args.push_back(ImplicitParamDecl::Create(
3346 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3347 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3349 PrivateVarsPos[VD] = Counter;
3350 ++Counter;
3351 }
3352 const auto &TaskPrivatesMapFnInfo =
3353 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3354 llvm::FunctionType *TaskPrivatesMapTy =
3355 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3356 std::string Name =
3357 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3358 auto *TaskPrivatesMap = llvm::Function::Create(
3359 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3360 &CGM.getModule());
3361 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3362 TaskPrivatesMapFnInfo);
3363 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3364 TaskPrivatesMap->addFnAttr("sample-profile-suffix-elision-policy",
3365 "selected");
3366 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3367 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3368 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3369 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3370 }
3371 CodeGenFunction CGF(CGM);
3372 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3373 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3374
3375 // *privi = &.privates.privi;
3376 LValue Base = CGF.EmitLoadOfPointerLValue(
3377 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3378 TaskPrivatesArg.getType()->castAs<PointerType>());
3379 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3380 Counter = 0;
3381 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3382 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3383 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3384 LValue RefLVal =
3385 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3386 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3387 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3388 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3389 ++Counter;
3390 }
3391 CGF.FinishFunction();
3392 return TaskPrivatesMap;
3393}
3394
3395/// Emit initialization for private variables in task-based directives.
3397 const OMPExecutableDirective &D,
3398 Address KmpTaskSharedsPtr, LValue TDBase,
3399 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3400 QualType SharedsTy, QualType SharedsPtrTy,
3401 const OMPTaskDataTy &Data,
3402 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3403 ASTContext &C = CGF.getContext();
3404 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3405 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3406 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3407 ? OMPD_taskloop
3408 : OMPD_task;
3409 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3410 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3411 LValue SrcBase;
3412 bool IsTargetTask =
3413 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3414 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3415 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3416 // PointersArray, SizesArray, and MappersArray. The original variables for
3417 // these arrays are not captured and we get their addresses explicitly.
3418 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3419 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3420 SrcBase = CGF.MakeAddrLValue(
3422 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3423 CGF.ConvertTypeForMem(SharedsTy)),
3424 SharedsTy);
3425 }
3426 FI = FI->getType()->castAsRecordDecl()->field_begin();
3427 for (const PrivateDataTy &Pair : Privates) {
3428 // Do not initialize private locals.
3429 if (Pair.second.isLocalPrivate()) {
3430 ++FI;
3431 continue;
3432 }
3433 const VarDecl *VD = Pair.second.PrivateCopy;
3434 const Expr *Init = VD->getAnyInitializer();
3435 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3436 !CGF.isTrivialInitializer(Init)))) {
3437 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3438 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3439 const VarDecl *OriginalVD = Pair.second.Original;
3440 // Check if the variable is the target-based BasePointersArray,
3441 // PointersArray, SizesArray, or MappersArray.
3442 LValue SharedRefLValue;
3443 QualType Type = PrivateLValue.getType();
3444 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3445 if (IsTargetTask && !SharedField) {
3446 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3447 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3448 cast<CapturedDecl>(OriginalVD->getDeclContext())
3449 ->getNumParams() == 0 &&
3451 cast<CapturedDecl>(OriginalVD->getDeclContext())
3452 ->getDeclContext()) &&
3453 "Expected artificial target data variable.");
3454 SharedRefLValue =
3455 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3456 } else if (ForDup) {
3457 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3458 SharedRefLValue = CGF.MakeAddrLValue(
3459 SharedRefLValue.getAddress().withAlignment(
3460 C.getDeclAlign(OriginalVD)),
3461 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3462 SharedRefLValue.getTBAAInfo());
3463 } else if (CGF.LambdaCaptureFields.count(
3464 Pair.second.Original->getCanonicalDecl()) > 0 ||
3465 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3466 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3467 } else {
3468 // Processing for implicitly captured variables.
3469 InlinedOpenMPRegionRAII Region(
3470 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3471 /*HasCancel=*/false, /*NoInheritance=*/true);
3472 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3473 }
3474 if (Type->isArrayType()) {
3475 // Initialize firstprivate array.
3477 // Perform simple memcpy.
3478 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3479 } else {
3480 // Initialize firstprivate array using element-by-element
3481 // initialization.
3483 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3484 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3485 Address SrcElement) {
3486 // Clean up any temporaries needed by the initialization.
3487 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3488 InitScope.addPrivate(Elem, SrcElement);
3489 (void)InitScope.Privatize();
3490 // Emit initialization for single element.
3491 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3492 CGF, &CapturesInfo);
3493 CGF.EmitAnyExprToMem(Init, DestElement,
3494 Init->getType().getQualifiers(),
3495 /*IsInitializer=*/false);
3496 });
3497 }
3498 } else {
3499 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3500 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3501 (void)InitScope.Privatize();
3502 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3503 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3504 /*capturedByInit=*/false);
3505 }
3506 } else {
3507 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3508 }
3509 }
3510 ++FI;
3511 }
3512}
3513
3514/// Check if duplication function is required for taskloops.
3517 bool InitRequired = false;
3518 for (const PrivateDataTy &Pair : Privates) {
3519 if (Pair.second.isLocalPrivate())
3520 continue;
3521 const VarDecl *VD = Pair.second.PrivateCopy;
3522 const Expr *Init = VD->getAnyInitializer();
3523 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3525 if (InitRequired)
3526 break;
3527 }
3528 return InitRequired;
3529}
3530
3531
3532/// Emit task_dup function (for initialization of
3533/// private/firstprivate/lastprivate vars and last_iter flag)
3534/// \code
3535/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3536/// lastpriv) {
3537/// // setup lastprivate flag
3538/// task_dst->last = lastpriv;
3539/// // could be constructor calls here...
3540/// }
3541/// \endcode
3542static llvm::Value *
3544 const OMPExecutableDirective &D,
3545 QualType KmpTaskTWithPrivatesPtrQTy,
3546 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3547 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3548 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3549 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3550 ASTContext &C = CGM.getContext();
3551 FunctionArgList Args;
3552 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3553 KmpTaskTWithPrivatesPtrQTy,
3555 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3556 KmpTaskTWithPrivatesPtrQTy,
3558 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3560 Args.push_back(&DstArg);
3561 Args.push_back(&SrcArg);
3562 Args.push_back(&LastprivArg);
3563 const auto &TaskDupFnInfo =
3564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3565 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3566 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3567 auto *TaskDup = llvm::Function::Create(
3568 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3569 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3570 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3571 TaskDup->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3572 TaskDup->setDoesNotRecurse();
3573 CodeGenFunction CGF(CGM);
3574 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3575 Loc);
3576
3577 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3578 CGF.GetAddrOfLocalVar(&DstArg),
3579 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3580 // task_dst->liter = lastpriv;
3581 if (WithLastIter) {
3582 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3583 LValue Base = CGF.EmitLValueForField(
3584 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3585 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3586 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3587 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3588 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3589 }
3590
3591 // Emit initial values for private copies (if any).
3592 assert(!Privates.empty());
3593 Address KmpTaskSharedsPtr = Address::invalid();
3594 if (!Data.FirstprivateVars.empty()) {
3595 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596 CGF.GetAddrOfLocalVar(&SrcArg),
3597 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598 LValue Base = CGF.EmitLValueForField(
3599 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3600 KmpTaskSharedsPtr = Address(
3602 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3603 KmpTaskTShareds)),
3604 Loc),
3605 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3606 }
3607 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3608 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3609 CGF.FinishFunction();
3610 return TaskDup;
3611}
3612
3613/// Checks if destructor function is required to be generated.
3614/// \return true if cleanups are required, false otherwise.
3615static bool
3616checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3618 for (const PrivateDataTy &P : Privates) {
3619 if (P.second.isLocalPrivate())
3620 continue;
3621 QualType Ty = P.second.Original->getType().getNonReferenceType();
3622 if (Ty.isDestructedType())
3623 return true;
3624 }
3625 return false;
3626}
3627
3628namespace {
3629/// Loop generator for OpenMP iterator expression.
3630class OMPIteratorGeneratorScope final
3632 CodeGenFunction &CGF;
3633 const OMPIteratorExpr *E = nullptr;
3634 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3635 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3636 OMPIteratorGeneratorScope() = delete;
3637 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3638
3639public:
3640 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3641 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3642 if (!E)
3643 return;
3644 SmallVector<llvm::Value *, 4> Uppers;
3645 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3646 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3647 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3648 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3649 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3650 addPrivate(
3651 HelperData.CounterVD,
3652 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3653 }
3654 Privatize();
3655
3656 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3657 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3658 LValue CLVal =
3659 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3660 HelperData.CounterVD->getType());
3661 // Counter = 0;
3662 CGF.EmitStoreOfScalar(
3663 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3664 CLVal);
3665 CodeGenFunction::JumpDest &ContDest =
3666 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3667 CodeGenFunction::JumpDest &ExitDest =
3668 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3669 // N = <number-of_iterations>;
3670 llvm::Value *N = Uppers[I];
3671 // cont:
3672 // if (Counter < N) goto body; else goto exit;
3673 CGF.EmitBlock(ContDest.getBlock());
3674 auto *CVal =
3675 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3676 llvm::Value *Cmp =
3677 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3678 ? CGF.Builder.CreateICmpSLT(CVal, N)
3679 : CGF.Builder.CreateICmpULT(CVal, N);
3680 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3681 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3682 // body:
3683 CGF.EmitBlock(BodyBB);
3684 // Iteri = Begini + Counter * Stepi;
3685 CGF.EmitIgnoredExpr(HelperData.Update);
3686 }
3687 }
3688 ~OMPIteratorGeneratorScope() {
3689 if (!E)
3690 return;
3691 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3692 // Counter = Counter + 1;
3693 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3694 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3695 // goto cont;
3696 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3697 // exit:
3698 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3699 }
3700 }
3701};
3702} // namespace
3703
3704static std::pair<llvm::Value *, llvm::Value *>
3706 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3707 llvm::Value *Addr;
3708 if (OASE) {
3709 const Expr *Base = OASE->getBase();
3710 Addr = CGF.EmitScalarExpr(Base);
3711 } else {
3712 Addr = CGF.EmitLValue(E).getPointer(CGF);
3713 }
3714 llvm::Value *SizeVal;
3715 QualType Ty = E->getType();
3716 if (OASE) {
3717 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3718 for (const Expr *SE : OASE->getDimensions()) {
3719 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3720 Sz = CGF.EmitScalarConversion(
3721 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3722 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3723 }
3724 } else if (const auto *ASE =
3725 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3726 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3727 Address UpAddrAddress = UpAddrLVal.getAddress();
3728 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3729 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3730 /*Idx0=*/1);
3731 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3732 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3733 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3734 } else {
3735 SizeVal = CGF.getTypeSize(Ty);
3736 }
3737 return std::make_pair(Addr, SizeVal);
3738}
3739
3740/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3741static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3742 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3743 if (KmpTaskAffinityInfoTy.isNull()) {
3744 RecordDecl *KmpAffinityInfoRD =
3745 C.buildImplicitRecord("kmp_task_affinity_info_t");
3746 KmpAffinityInfoRD->startDefinition();
3747 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3748 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3749 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3750 KmpAffinityInfoRD->completeDefinition();
3751 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3752 }
3753}
3754
3757 const OMPExecutableDirective &D,
3758 llvm::Function *TaskFunction, QualType SharedsTy,
3759 Address Shareds, const OMPTaskDataTy &Data) {
3760 ASTContext &C = CGM.getContext();
3762 // Aggregate privates and sort them by the alignment.
3763 const auto *I = Data.PrivateCopies.begin();
3764 for (const Expr *E : Data.PrivateVars) {
3765 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3766 Privates.emplace_back(
3767 C.getDeclAlign(VD),
3768 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3769 /*PrivateElemInit=*/nullptr));
3770 ++I;
3771 }
3772 I = Data.FirstprivateCopies.begin();
3773 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3774 for (const Expr *E : Data.FirstprivateVars) {
3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776 Privates.emplace_back(
3777 C.getDeclAlign(VD),
3778 PrivateHelpersTy(
3779 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3780 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3781 ++I;
3782 ++IElemInitRef;
3783 }
3784 I = Data.LastprivateCopies.begin();
3785 for (const Expr *E : Data.LastprivateVars) {
3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787 Privates.emplace_back(
3788 C.getDeclAlign(VD),
3789 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3790 /*PrivateElemInit=*/nullptr));
3791 ++I;
3792 }
3793 for (const VarDecl *VD : Data.PrivateLocals) {
3794 if (isAllocatableDecl(VD))
3795 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3796 else
3797 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3798 }
3799 llvm::stable_sort(Privates,
3800 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3801 return L.first > R.first;
3802 });
3803 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3804 // Build type kmp_routine_entry_t (if not built yet).
3805 emitKmpRoutineEntryT(KmpInt32Ty);
3806 // Build type kmp_task_t (if not built yet).
3807 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3808 if (SavedKmpTaskloopTQTy.isNull()) {
3809 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3810 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3811 }
3813 } else {
3814 assert((D.getDirectiveKind() == OMPD_task ||
3815 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3816 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3817 "Expected taskloop, task or target directive");
3818 if (SavedKmpTaskTQTy.isNull()) {
3819 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3820 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3821 }
3823 }
3824 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3825 // Build particular struct kmp_task_t for the given task.
3826 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3828 CanQualType KmpTaskTWithPrivatesQTy =
3829 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3830 QualType KmpTaskTWithPrivatesPtrQTy =
3831 C.getPointerType(KmpTaskTWithPrivatesQTy);
3832 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3833 llvm::Value *KmpTaskTWithPrivatesTySize =
3834 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3835 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3836
3837 // Emit initial values for private copies (if any).
3838 llvm::Value *TaskPrivatesMap = nullptr;
3839 llvm::Type *TaskPrivatesMapTy =
3840 std::next(TaskFunction->arg_begin(), 3)->getType();
3841 if (!Privates.empty()) {
3842 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3843 TaskPrivatesMap =
3844 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3845 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3846 TaskPrivatesMap, TaskPrivatesMapTy);
3847 } else {
3848 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3849 cast<llvm::PointerType>(TaskPrivatesMapTy));
3850 }
3851 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3852 // kmp_task_t *tt);
3853 llvm::Function *TaskEntry = emitProxyTaskFunction(
3854 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3855 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3856 TaskPrivatesMap);
3857
3858 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3859 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3860 // kmp_routine_entry_t *task_entry);
3861 // Task flags. Format is taken from
3862 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3863 // description of kmp_tasking_flags struct.
3864 enum {
3865 TiedFlag = 0x1,
3866 FinalFlag = 0x2,
3867 DestructorsFlag = 0x8,
3868 PriorityFlag = 0x20,
3869 DetachableFlag = 0x40,
3870 FreeAgentFlag = 0x80,
3871 TransparentFlag = 0x100,
3872 };
3873 unsigned Flags = Data.Tied ? TiedFlag : 0;
3874 bool NeedsCleanup = false;
3875 if (!Privates.empty()) {
3876 NeedsCleanup =
3877 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3878 if (NeedsCleanup)
3879 Flags = Flags | DestructorsFlag;
3880 }
3881 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3882 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3883 if (Kind == OMPC_THREADSET_omp_pool)
3884 Flags = Flags | FreeAgentFlag;
3885 }
3886 if (D.getSingleClause<OMPTransparentClause>())
3887 Flags |= TransparentFlag;
3888
3889 if (Data.Priority.getInt())
3890 Flags = Flags | PriorityFlag;
3891 if (D.hasClausesOfKind<OMPDetachClause>())
3892 Flags = Flags | DetachableFlag;
3893 llvm::Value *TaskFlags =
3894 Data.Final.getPointer()
3895 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3896 CGF.Builder.getInt32(FinalFlag),
3897 CGF.Builder.getInt32(/*C=*/0))
3898 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3899 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3900 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3902 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3904 TaskEntry, KmpRoutineEntryPtrTy)};
3905 llvm::Value *NewTask;
3906 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3907 // Check if we have any device clause associated with the directive.
3908 const Expr *Device = nullptr;
3909 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3910 Device = C->getDevice();
3911 // Emit device ID if any otherwise use default value.
3912 llvm::Value *DeviceID;
3913 if (Device)
3914 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3915 CGF.Int64Ty, /*isSigned=*/true);
3916 else
3917 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3918 AllocArgs.push_back(DeviceID);
3919 NewTask = CGF.EmitRuntimeCall(
3920 OMPBuilder.getOrCreateRuntimeFunction(
3921 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3922 AllocArgs);
3923 } else {
3924 NewTask =
3925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3926 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3927 AllocArgs);
3928 }
3929 // Emit detach clause initialization.
3930 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3931 // task_descriptor);
3932 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3933 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3934 LValue EvtLVal = CGF.EmitLValue(Evt);
3935
3936 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3937 // int gtid, kmp_task_t *task);
3938 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3939 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3940 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3941 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3942 OMPBuilder.getOrCreateRuntimeFunction(
3943 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3944 {Loc, Tid, NewTask});
3945 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3946 Evt->getExprLoc());
3947 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3948 }
3949 // Process affinity clauses.
3950 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3951 // Process list of affinity data.
3952 ASTContext &C = CGM.getContext();
3953 Address AffinitiesArray = Address::invalid();
3954 // Calculate number of elements to form the array of affinity data.
3955 llvm::Value *NumOfElements = nullptr;
3956 unsigned NumAffinities = 0;
3957 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3958 if (const Expr *Modifier = C->getModifier()) {
3959 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3960 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3961 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3962 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3963 NumOfElements =
3964 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3965 }
3966 } else {
3967 NumAffinities += C->varlist_size();
3968 }
3969 }
3971 // Fields ids in kmp_task_affinity_info record.
3972 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3973
3974 QualType KmpTaskAffinityInfoArrayTy;
3975 if (NumOfElements) {
3976 NumOfElements = CGF.Builder.CreateNUWAdd(
3977 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3978 auto *OVE = new (C) OpaqueValueExpr(
3979 Loc,
3980 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3981 VK_PRValue);
3982 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3983 RValue::get(NumOfElements));
3984 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3986 /*IndexTypeQuals=*/0);
3987 // Properly emit variable-sized array.
3988 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3990 CGF.EmitVarDecl(*PD);
3991 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3992 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3993 /*isSigned=*/false);
3994 } else {
3995 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3997 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3998 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3999 AffinitiesArray =
4000 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4001 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4002 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4003 /*isSigned=*/false);
4004 }
4005
4006 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4007 // Fill array by elements without iterators.
4008 unsigned Pos = 0;
4009 bool HasIterator = false;
4010 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4011 if (C->getModifier()) {
4012 HasIterator = true;
4013 continue;
4014 }
4015 for (const Expr *E : C->varlist()) {
4016 llvm::Value *Addr;
4017 llvm::Value *Size;
4018 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4019 LValue Base =
4020 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4022 // affs[i].base_addr = &<Affinities[i].second>;
4023 LValue BaseAddrLVal = CGF.EmitLValueForField(
4024 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4025 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4026 BaseAddrLVal);
4027 // affs[i].len = sizeof(<Affinities[i].second>);
4028 LValue LenLVal = CGF.EmitLValueForField(
4029 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4030 CGF.EmitStoreOfScalar(Size, LenLVal);
4031 ++Pos;
4032 }
4033 }
4034 LValue PosLVal;
4035 if (HasIterator) {
4036 PosLVal = CGF.MakeAddrLValue(
4037 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4038 C.getSizeType());
4039 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4040 }
4041 // Process elements with iterators.
4042 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4043 const Expr *Modifier = C->getModifier();
4044 if (!Modifier)
4045 continue;
4046 OMPIteratorGeneratorScope IteratorScope(
4047 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4048 for (const Expr *E : C->varlist()) {
4049 llvm::Value *Addr;
4050 llvm::Value *Size;
4051 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4052 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4053 LValue Base =
4054 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
4056 // affs[i].base_addr = &<Affinities[i].second>;
4057 LValue BaseAddrLVal = CGF.EmitLValueForField(
4058 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4059 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4060 BaseAddrLVal);
4061 // affs[i].len = sizeof(<Affinities[i].second>);
4062 LValue LenLVal = CGF.EmitLValueForField(
4063 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4064 CGF.EmitStoreOfScalar(Size, LenLVal);
4065 Idx = CGF.Builder.CreateNUWAdd(
4066 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4067 CGF.EmitStoreOfScalar(Idx, PosLVal);
4068 }
4069 }
4070 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4071 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4072 // naffins, kmp_task_affinity_info_t *affin_list);
4073 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4074 llvm::Value *GTid = getThreadID(CGF, Loc);
4075 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
4077 // FIXME: Emit the function and ignore its result for now unless the
4078 // runtime function is properly implemented.
4079 (void)CGF.EmitRuntimeCall(
4080 OMPBuilder.getOrCreateRuntimeFunction(
4081 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4082 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4083 }
4084 llvm::Value *NewTaskNewTaskTTy =
4086 NewTask, KmpTaskTWithPrivatesPtrTy);
4087 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
4088 KmpTaskTWithPrivatesQTy);
4089 LValue TDBase =
4090 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4091 // Fill the data in the resulting kmp_task_t record.
4092 // Copy shareds if there are any.
4093 Address KmpTaskSharedsPtr = Address::invalid();
4094 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4095 KmpTaskSharedsPtr = Address(
4096 CGF.EmitLoadOfScalar(
4098 TDBase,
4099 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4100 Loc),
4101 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4102 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4103 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4104 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4105 }
4106 // Emit initial values for private copies (if any).
4108 if (!Privates.empty()) {
4109 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4110 SharedsTy, SharedsPtrTy, Data, Privates,
4111 /*ForDup=*/false);
4112 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4113 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4114 Result.TaskDupFn = emitTaskDupFunction(
4115 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4116 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4117 /*WithLastIter=*/!Data.LastprivateVars.empty());
4118 }
4119 }
4120 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4121 enum { Priority = 0, Destructors = 1 };
4122 // Provide pointer to function with destructors for privates.
4123 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4124 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4125 assert(KmpCmplrdataUD->isUnion());
4126 if (NeedsCleanup) {
4127 llvm::Value *DestructorFn = emitDestructorsFunction(
4128 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4129 KmpTaskTWithPrivatesQTy);
4130 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4131 LValue DestructorsLV = CGF.EmitLValueForField(
4132 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4134 DestructorFn, KmpRoutineEntryPtrTy),
4135 DestructorsLV);
4136 }
4137 // Set priority.
4138 if (Data.Priority.getInt()) {
4139 LValue Data2LV = CGF.EmitLValueForField(
4140 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4141 LValue PriorityLV = CGF.EmitLValueForField(
4142 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4143 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4144 }
4145 Result.NewTask = NewTask;
4146 Result.TaskEntry = TaskEntry;
4147 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4148 Result.TDBase = TDBase;
4149 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4150 return Result;
4151}
4152
4153/// Translates internal dependency kind into the runtime kind.
4155 RTLDependenceKindTy DepKind;
4156 switch (K) {
4157 case OMPC_DEPEND_in:
4158 DepKind = RTLDependenceKindTy::DepIn;
4159 break;
4160 // Out and InOut dependencies must use the same code.
4161 case OMPC_DEPEND_out:
4162 case OMPC_DEPEND_inout:
4163 DepKind = RTLDependenceKindTy::DepInOut;
4164 break;
4165 case OMPC_DEPEND_mutexinoutset:
4166 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4167 break;
4168 case OMPC_DEPEND_inoutset:
4169 DepKind = RTLDependenceKindTy::DepInOutSet;
4170 break;
4171 case OMPC_DEPEND_outallmemory:
4172 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4173 break;
4174 case OMPC_DEPEND_source:
4175 case OMPC_DEPEND_sink:
4176 case OMPC_DEPEND_depobj:
4177 case OMPC_DEPEND_inoutallmemory:
4179 llvm_unreachable("Unknown task dependence type");
4180 }
4181 return DepKind;
4182}
4183
4184/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4185static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4186 QualType &FlagsTy) {
4187 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4188 if (KmpDependInfoTy.isNull()) {
4189 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4190 KmpDependInfoRD->startDefinition();
4191 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4192 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4193 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4194 KmpDependInfoRD->completeDefinition();
4195 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4196 }
4197}
4198
4199std::pair<llvm::Value *, LValue>
4201 SourceLocation Loc) {
4202 ASTContext &C = CGM.getContext();
4203 QualType FlagsTy;
4204 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4205 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4206 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4208 DepobjLVal.getAddress().withElementType(
4209 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4210 KmpDependInfoPtrTy->castAs<PointerType>());
4211 Address DepObjAddr = CGF.Builder.CreateGEP(
4212 CGF, Base.getAddress(),
4213 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4214 LValue NumDepsBase = CGF.MakeAddrLValue(
4215 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4216 // NumDeps = deps[i].base_addr;
4217 LValue BaseAddrLVal = CGF.EmitLValueForField(
4218 NumDepsBase,
4219 *std::next(KmpDependInfoRD->field_begin(),
4220 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4221 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4222 return std::make_pair(NumDeps, Base);
4223}
4224
4225static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4226 llvm::PointerUnion<unsigned *, LValue *> Pos,
4228 Address DependenciesArray) {
4229 CodeGenModule &CGM = CGF.CGM;
4230 ASTContext &C = CGM.getContext();
4231 QualType FlagsTy;
4232 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4233 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4234 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4235
4236 OMPIteratorGeneratorScope IteratorScope(
4237 CGF, cast_or_null<OMPIteratorExpr>(
4238 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4239 : nullptr));
4240 for (const Expr *E : Data.DepExprs) {
4241 llvm::Value *Addr;
4242 llvm::Value *Size;
4243
4244 // The expression will be a nullptr in the 'omp_all_memory' case.
4245 if (E) {
4246 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4247 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4248 } else {
4249 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4250 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4251 }
4252 LValue Base;
4253 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4254 Base = CGF.MakeAddrLValue(
4255 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4256 } else {
4257 assert(E && "Expected a non-null expression");
4258 LValue &PosLVal = *cast<LValue *>(Pos);
4259 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4260 Base = CGF.MakeAddrLValue(
4261 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4262 }
4263 // deps[i].base_addr = &<Dependencies[i].second>;
4264 LValue BaseAddrLVal = CGF.EmitLValueForField(
4265 Base,
4266 *std::next(KmpDependInfoRD->field_begin(),
4267 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4268 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4269 // deps[i].len = sizeof(<Dependencies[i].second>);
4270 LValue LenLVal = CGF.EmitLValueForField(
4271 Base, *std::next(KmpDependInfoRD->field_begin(),
4272 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4273 CGF.EmitStoreOfScalar(Size, LenLVal);
4274 // deps[i].flags = <Dependencies[i].first>;
4275 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4276 LValue FlagsLVal = CGF.EmitLValueForField(
4277 Base,
4278 *std::next(KmpDependInfoRD->field_begin(),
4279 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4281 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4282 FlagsLVal);
4283 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4284 ++(*P);
4285 } else {
4286 LValue &PosLVal = *cast<LValue *>(Pos);
4287 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4288 Idx = CGF.Builder.CreateNUWAdd(Idx,
4289 llvm::ConstantInt::get(Idx->getType(), 1));
4290 CGF.EmitStoreOfScalar(Idx, PosLVal);
4291 }
4292 }
4293}
4294
4298 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4299 "Expected depobj dependency kind.");
4301 SmallVector<LValue, 4> SizeLVals;
4302 ASTContext &C = CGF.getContext();
4303 {
4304 OMPIteratorGeneratorScope IteratorScope(
4305 CGF, cast_or_null<OMPIteratorExpr>(
4306 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4307 : nullptr));
4308 for (const Expr *E : Data.DepExprs) {
4309 llvm::Value *NumDeps;
4310 LValue Base;
4311 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4312 std::tie(NumDeps, Base) =
4313 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4314 LValue NumLVal = CGF.MakeAddrLValue(
4315 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4316 C.getUIntPtrType());
4317 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4318 NumLVal.getAddress());
4319 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4320 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4321 CGF.EmitStoreOfScalar(Add, NumLVal);
4322 SizeLVals.push_back(NumLVal);
4323 }
4324 }
4325 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4326 llvm::Value *Size =
4327 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4328 Sizes.push_back(Size);
4329 }
4330 return Sizes;
4331}
4332
4335 LValue PosLVal,
4337 Address DependenciesArray) {
4338 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4339 "Expected depobj dependency kind.");
4340 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4341 {
4342 OMPIteratorGeneratorScope IteratorScope(
4343 CGF, cast_or_null<OMPIteratorExpr>(
4344 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4345 : nullptr));
4346 for (const Expr *E : Data.DepExprs) {
4347 llvm::Value *NumDeps;
4348 LValue Base;
4349 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4350 std::tie(NumDeps, Base) =
4351 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4352
4353 // memcopy dependency data.
4354 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4355 ElSize,
4356 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4357 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4358 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4359 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4360
4361 // Increase pos.
4362 // pos += size;
4363 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4364 CGF.EmitStoreOfScalar(Add, PosLVal);
4365 }
4366 }
4367}
4368
4369std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4371 SourceLocation Loc) {
4372 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4373 return D.DepExprs.empty();
4374 }))
4375 return std::make_pair(nullptr, Address::invalid());
4376 // Process list of dependencies.
4377 ASTContext &C = CGM.getContext();
4378 Address DependenciesArray = Address::invalid();
4379 llvm::Value *NumOfElements = nullptr;
4380 unsigned NumDependencies = std::accumulate(
4381 Dependencies.begin(), Dependencies.end(), 0,
4382 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4383 return D.DepKind == OMPC_DEPEND_depobj
4384 ? V
4385 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4386 });
4387 QualType FlagsTy;
4388 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4389 bool HasDepobjDeps = false;
4390 bool HasRegularWithIterators = false;
4391 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4392 llvm::Value *NumOfRegularWithIterators =
4393 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4394 // Calculate number of depobj dependencies and regular deps with the
4395 // iterators.
4396 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4397 if (D.DepKind == OMPC_DEPEND_depobj) {
4400 for (llvm::Value *Size : Sizes) {
4401 NumOfDepobjElements =
4402 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4403 }
4404 HasDepobjDeps = true;
4405 continue;
4406 }
4407 // Include number of iterations, if any.
4408
4409 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4410 llvm::Value *ClauseIteratorSpace =
4411 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4412 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4413 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4414 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4415 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4416 }
4417 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4418 ClauseIteratorSpace,
4419 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4420 NumOfRegularWithIterators =
4421 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4422 HasRegularWithIterators = true;
4423 continue;
4424 }
4425 }
4426
4427 QualType KmpDependInfoArrayTy;
4428 if (HasDepobjDeps || HasRegularWithIterators) {
4429 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4430 /*isSigned=*/false);
4431 if (HasDepobjDeps) {
4432 NumOfElements =
4433 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4434 }
4435 if (HasRegularWithIterators) {
4436 NumOfElements =
4437 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4438 }
4439 auto *OVE = new (C) OpaqueValueExpr(
4440 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4441 VK_PRValue);
4442 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4443 RValue::get(NumOfElements));
4444 KmpDependInfoArrayTy =
4445 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4446 /*IndexTypeQuals=*/0);
4447 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4448 // Properly emit variable-sized array.
4449 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4451 CGF.EmitVarDecl(*PD);
4452 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4453 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4454 /*isSigned=*/false);
4455 } else {
4456 KmpDependInfoArrayTy = C.getConstantArrayType(
4457 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4458 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4459 DependenciesArray =
4460 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4461 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4462 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4463 /*isSigned=*/false);
4464 }
4465 unsigned Pos = 0;
4466 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4467 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4468 continue;
4469 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4470 }
4471 // Copy regular dependencies with iterators.
4472 LValue PosLVal = CGF.MakeAddrLValue(
4473 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4474 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4475 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4476 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4477 continue;
4478 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4479 }
4480 // Copy final depobj arrays without iterators.
4481 if (HasDepobjDeps) {
4482 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4483 if (Dep.DepKind != OMPC_DEPEND_depobj)
4484 continue;
4485 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4486 }
4487 }
4488 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4489 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4490 return std::make_pair(NumOfElements, DependenciesArray);
4491}
4492
4494 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4495 SourceLocation Loc) {
4496 if (Dependencies.DepExprs.empty())
4497 return Address::invalid();
4498 // Process list of dependencies.
4499 ASTContext &C = CGM.getContext();
4500 Address DependenciesArray = Address::invalid();
4501 unsigned NumDependencies = Dependencies.DepExprs.size();
4502 QualType FlagsTy;
4503 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4504 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4505
4506 llvm::Value *Size;
4507 // Define type kmp_depend_info[<Dependencies.size()>];
4508 // For depobj reserve one extra element to store the number of elements.
4509 // It is required to handle depobj(x) update(in) construct.
4510 // kmp_depend_info[<Dependencies.size()>] deps;
4511 llvm::Value *NumDepsVal;
4512 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4513 if (const auto *IE =
4514 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4515 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4516 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4517 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4518 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4519 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4520 }
4521 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4522 NumDepsVal);
4523 CharUnits SizeInBytes =
4524 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4525 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4526 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4527 NumDepsVal =
4528 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4529 } else {
4530 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4531 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4532 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4533 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4534 Size = CGM.getSize(Sz.alignTo(Align));
4535 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4536 }
4537 // Need to allocate on the dynamic memory.
4538 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4539 // Use default allocator.
4540 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4541 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4542
4543 llvm::Value *Addr =
4544 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4545 CGM.getModule(), OMPRTL___kmpc_alloc),
4546 Args, ".dep.arr.addr");
4547 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4549 Addr, CGF.Builder.getPtrTy(0));
4550 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4551 // Write number of elements in the first element of array for depobj.
4552 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4553 // deps[i].base_addr = NumDependencies;
4554 LValue BaseAddrLVal = CGF.EmitLValueForField(
4555 Base,
4556 *std::next(KmpDependInfoRD->field_begin(),
4557 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4558 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4559 llvm::PointerUnion<unsigned *, LValue *> Pos;
4560 unsigned Idx = 1;
4561 LValue PosLVal;
4562 if (Dependencies.IteratorExpr) {
4563 PosLVal = CGF.MakeAddrLValue(
4564 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4565 C.getSizeType());
4566 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4567 /*IsInit=*/true);
4568 Pos = &PosLVal;
4569 } else {
4570 Pos = &Idx;
4571 }
4572 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4573 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4574 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4575 CGF.Int8Ty);
4576 return DependenciesArray;
4577}
4578
4580 SourceLocation Loc) {
4581 ASTContext &C = CGM.getContext();
4582 QualType FlagsTy;
4583 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4585 C.VoidPtrTy.castAs<PointerType>());
4586 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4588 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4590 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4591 Addr.getElementType(), Addr.emitRawPointer(CGF),
4592 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4593 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4594 CGF.VoidPtrTy);
4595 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4596 // Use default allocator.
4597 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4598 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4599
4600 // _kmpc_free(gtid, addr, nullptr);
4601 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4602 CGM.getModule(), OMPRTL___kmpc_free),
4603 Args);
4604}
4605
4607 OpenMPDependClauseKind NewDepKind,
4608 SourceLocation Loc) {
4609 ASTContext &C = CGM.getContext();
4610 QualType FlagsTy;
4611 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4612 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4613 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4614 llvm::Value *NumDeps;
4615 LValue Base;
4616 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4617
4618 Address Begin = Base.getAddress();
4619 // Cast from pointer to array type to pointer to single element.
4620 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4621 Begin.emitRawPointer(CGF), NumDeps);
4622 // The basic structure here is a while-do loop.
4623 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4624 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4625 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4626 CGF.EmitBlock(BodyBB);
4627 llvm::PHINode *ElementPHI =
4628 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4629 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4630 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4631 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4632 Base.getTBAAInfo());
4633 // deps[i].flags = NewDepKind;
4634 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4635 LValue FlagsLVal = CGF.EmitLValueForField(
4636 Base, *std::next(KmpDependInfoRD->field_begin(),
4637 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4639 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4640 FlagsLVal);
4641
4642 // Shift the address forward by one element.
4643 llvm::Value *ElementNext =
4644 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4645 .emitRawPointer(CGF);
4646 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4647 llvm::Value *IsEmpty =
4648 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4649 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4650 // Done.
4651 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4652}
4653
4655 const OMPExecutableDirective &D,
4656 llvm::Function *TaskFunction,
4657 QualType SharedsTy, Address Shareds,
4658 const Expr *IfCond,
4659 const OMPTaskDataTy &Data) {
4660 if (!CGF.HaveInsertPoint())
4661 return;
4662
4664 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4665 llvm::Value *NewTask = Result.NewTask;
4666 llvm::Function *TaskEntry = Result.TaskEntry;
4667 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4668 LValue TDBase = Result.TDBase;
4669 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4670 // Process list of dependences.
4671 Address DependenciesArray = Address::invalid();
4672 llvm::Value *NumOfElements;
4673 std::tie(NumOfElements, DependenciesArray) =
4674 emitDependClause(CGF, Data.Dependences, Loc);
4675
4676 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4677 // libcall.
4678 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4679 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4680 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4681 // list is not empty
4682 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4683 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4684 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4685 llvm::Value *DepTaskArgs[7];
4686 if (!Data.Dependences.empty()) {
4687 DepTaskArgs[0] = UpLoc;
4688 DepTaskArgs[1] = ThreadID;
4689 DepTaskArgs[2] = NewTask;
4690 DepTaskArgs[3] = NumOfElements;
4691 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4692 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4693 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4694 }
4695 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4696 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4697 if (!Data.Tied) {
4698 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4699 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4700 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4701 }
4702 if (!Data.Dependences.empty()) {
4703 CGF.EmitRuntimeCall(
4704 OMPBuilder.getOrCreateRuntimeFunction(
4705 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4706 DepTaskArgs);
4707 } else {
4708 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4709 CGM.getModule(), OMPRTL___kmpc_omp_task),
4710 TaskArgs);
4711 }
4712 // Check if parent region is untied and build return for untied task;
4713 if (auto *Region =
4714 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4715 Region->emitUntiedSwitch(CGF);
4716 };
4717
4718 llvm::Value *DepWaitTaskArgs[7];
4719 if (!Data.Dependences.empty()) {
4720 DepWaitTaskArgs[0] = UpLoc;
4721 DepWaitTaskArgs[1] = ThreadID;
4722 DepWaitTaskArgs[2] = NumOfElements;
4723 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4724 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4725 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4726 DepWaitTaskArgs[6] =
4727 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4728 }
4729 auto &M = CGM.getModule();
4730 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4731 TaskEntry, &Data, &DepWaitTaskArgs,
4732 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4733 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4734 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4735 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4736 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4737 // is specified.
4738 if (!Data.Dependences.empty())
4739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4740 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4741 DepWaitTaskArgs);
4742 // Call proxy_task_entry(gtid, new_task);
4743 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4744 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4745 Action.Enter(CGF);
4746 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4747 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4748 OutlinedFnArgs);
4749 };
4750
4751 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4752 // kmp_task_t *new_task);
4753 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4754 // kmp_task_t *new_task);
4756 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4757 M, OMPRTL___kmpc_omp_task_begin_if0),
4758 TaskArgs,
4759 OMPBuilder.getOrCreateRuntimeFunction(
4760 M, OMPRTL___kmpc_omp_task_complete_if0),
4761 TaskArgs);
4762 RCG.setAction(Action);
4763 RCG(CGF);
4764 };
4765
4766 if (IfCond) {
4767 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4768 } else {
4769 RegionCodeGenTy ThenRCG(ThenCodeGen);
4770 ThenRCG(CGF);
4771 }
4772}
4773
4775 const OMPLoopDirective &D,
4776 llvm::Function *TaskFunction,
4777 QualType SharedsTy, Address Shareds,
4778 const Expr *IfCond,
4779 const OMPTaskDataTy &Data) {
4780 if (!CGF.HaveInsertPoint())
4781 return;
4783 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4784 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4785 // libcall.
4786 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4787 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4788 // sched, kmp_uint64 grainsize, void *task_dup);
4789 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4790 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4791 llvm::Value *IfVal;
4792 if (IfCond) {
4793 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4794 /*isSigned=*/true);
4795 } else {
4796 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4797 }
4798
4799 LValue LBLVal = CGF.EmitLValueForField(
4800 Result.TDBase,
4801 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4802 const auto *LBVar =
4803 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4804 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4805 /*IsInitializer=*/true);
4806 LValue UBLVal = CGF.EmitLValueForField(
4807 Result.TDBase,
4808 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4809 const auto *UBVar =
4810 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4811 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4812 /*IsInitializer=*/true);
4813 LValue StLVal = CGF.EmitLValueForField(
4814 Result.TDBase,
4815 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4816 const auto *StVar =
4817 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 // Store reductions address.
4821 LValue RedLVal = CGF.EmitLValueForField(
4822 Result.TDBase,
4823 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4824 if (Data.Reductions) {
4825 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4826 } else {
4827 CGF.EmitNullInitialization(RedLVal.getAddress(),
4828 CGF.getContext().VoidPtrTy);
4829 }
4830 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4832 UpLoc,
4833 ThreadID,
4834 Result.NewTask,
4835 IfVal,
4836 LBLVal.getPointer(CGF),
4837 UBLVal.getPointer(CGF),
4838 CGF.EmitLoadOfScalar(StLVal, Loc),
4839 llvm::ConstantInt::getSigned(
4840 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4841 llvm::ConstantInt::getSigned(
4842 CGF.IntTy, Data.Schedule.getPointer()
4843 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4844 : NoSchedule),
4845 Data.Schedule.getPointer()
4846 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4847 /*isSigned=*/false)
4848 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4849 if (Data.HasModifier)
4850 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4851
4852 TaskArgs.push_back(Result.TaskDupFn
4854 Result.TaskDupFn, CGF.VoidPtrTy)
4855 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4856 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4857 CGM.getModule(), Data.HasModifier
4858 ? OMPRTL___kmpc_taskloop_5
4859 : OMPRTL___kmpc_taskloop),
4860 TaskArgs);
4861}
4862
4863/// Emit reduction operation for each element of array (required for
4864/// array sections) LHS op = RHS.
4865/// \param Type Type of array.
4866/// \param LHSVar Variable on the left side of the reduction operation
4867/// (references element of array in original variable).
4868/// \param RHSVar Variable on the right side of the reduction operation
4869/// (references element of array in original variable).
4870/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4871/// RHSVar.
4873 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4874 const VarDecl *RHSVar,
4875 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4876 const Expr *, const Expr *)> &RedOpGen,
4877 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4878 const Expr *UpExpr = nullptr) {
4879 // Perform element-by-element initialization.
4880 QualType ElementTy;
4881 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4882 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4883
4884 // Drill down to the base element type on both arrays.
4885 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4886 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4887
4888 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4889 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4890 // Cast from pointer to array type to pointer to single element.
4891 llvm::Value *LHSEnd =
4892 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4893 // The basic structure here is a while-do loop.
4894 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4895 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4896 llvm::Value *IsEmpty =
4897 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4898 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4899
4900 // Enter the loop body, making that address the current address.
4901 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4902 CGF.EmitBlock(BodyBB);
4903
4904 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4905
4906 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4907 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4908 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4909 Address RHSElementCurrent(
4910 RHSElementPHI, RHSAddr.getElementType(),
4911 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4912
4913 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4914 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4915 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4916 Address LHSElementCurrent(
4917 LHSElementPHI, LHSAddr.getElementType(),
4918 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4919
4920 // Emit copy.
4922 Scope.addPrivate(LHSVar, LHSElementCurrent);
4923 Scope.addPrivate(RHSVar, RHSElementCurrent);
4924 Scope.Privatize();
4925 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4926 Scope.ForceCleanup();
4927
4928 // Shift the address forward by one element.
4929 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4930 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4931 "omp.arraycpy.dest.element");
4932 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4933 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4934 "omp.arraycpy.src.element");
4935 // Check whether we've reached the end.
4936 llvm::Value *Done =
4937 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4938 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4939 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4940 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4941
4942 // Done.
4943 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4944}
4945
4946/// Emit reduction combiner. If the combiner is a simple expression emit it as
4947/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4948/// UDR combiner function.
4950 const Expr *ReductionOp) {
4951 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4952 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4953 if (const auto *DRE =
4954 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4955 if (const auto *DRD =
4956 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4957 std::pair<llvm::Function *, llvm::Function *> Reduction =
4961 CGF.EmitIgnoredExpr(ReductionOp);
4962 return;
4963 }
4964 CGF.EmitIgnoredExpr(ReductionOp);
4965}
4966
4968 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4970 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4971 ASTContext &C = CGM.getContext();
4972
4973 // void reduction_func(void *LHSArg, void *RHSArg);
4974 FunctionArgList Args;
4975 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4977 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4979 Args.push_back(&LHSArg);
4980 Args.push_back(&RHSArg);
4981 const auto &CGFI =
4982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4983 std::string Name = getReductionFuncName(ReducerName);
4984 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4985 llvm::GlobalValue::InternalLinkage, Name,
4986 &CGM.getModule());
4987 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4988 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4989 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
4990 Fn->setDoesNotRecurse();
4991 CodeGenFunction CGF(CGM);
4992 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4993
4994 // Dst = (void*[n])(LHSArg);
4995 // Src = (void*[n])(RHSArg);
4997 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4998 CGF.Builder.getPtrTy(0)),
4999 ArgsElemType, CGF.getPointerAlign());
5001 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5002 CGF.Builder.getPtrTy(0)),
5003 ArgsElemType, CGF.getPointerAlign());
5004
5005 // ...
5006 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5007 // ...
5009 const auto *IPriv = Privates.begin();
5010 unsigned Idx = 0;
5011 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5012 const auto *RHSVar =
5013 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5014 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5015 const auto *LHSVar =
5016 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5017 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5018 QualType PrivTy = (*IPriv)->getType();
5019 if (PrivTy->isVariablyModifiedType()) {
5020 // Get array size and emit VLA type.
5021 ++Idx;
5022 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5023 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5024 const VariableArrayType *VLA =
5025 CGF.getContext().getAsVariableArrayType(PrivTy);
5026 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5028 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5029 CGF.EmitVariablyModifiedType(PrivTy);
5030 }
5031 }
5032 Scope.Privatize();
5033 IPriv = Privates.begin();
5034 const auto *ILHS = LHSExprs.begin();
5035 const auto *IRHS = RHSExprs.begin();
5036 for (const Expr *E : ReductionOps) {
5037 if ((*IPriv)->getType()->isArrayType()) {
5038 // Emit reduction for array section.
5039 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5040 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5042 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5043 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5044 emitReductionCombiner(CGF, E);
5045 });
5046 } else {
5047 // Emit reduction for array subscript or single variable.
5048 emitReductionCombiner(CGF, E);
5049 }
5050 ++IPriv;
5051 ++ILHS;
5052 ++IRHS;
5053 }
5054 Scope.ForceCleanup();
5055 CGF.FinishFunction();
5056 return Fn;
5057}
5058
5060 const Expr *ReductionOp,
5061 const Expr *PrivateRef,
5062 const DeclRefExpr *LHS,
5063 const DeclRefExpr *RHS) {
5064 if (PrivateRef->getType()->isArrayType()) {
5065 // Emit reduction for array section.
5066 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5067 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5069 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5070 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5071 emitReductionCombiner(CGF, ReductionOp);
5072 });
5073 } else {
5074 // Emit reduction for array subscript or single variable.
5075 emitReductionCombiner(CGF, ReductionOp);
5076 }
5077}
5078
5079static std::string generateUniqueName(CodeGenModule &CGM,
5080 llvm::StringRef Prefix, const Expr *Ref);
5081
5083 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5084 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5085
5086 // Create a shared global variable (__shared_reduction_var) to accumulate the
5087 // final result.
5088 //
5089 // Call __kmpc_barrier to synchronize threads before initialization.
5090 //
5091 // The master thread (thread_id == 0) initializes __shared_reduction_var
5092 // with the identity value or initializer.
5093 //
5094 // Call __kmpc_barrier to synchronize before combining.
5095 // For each i:
5096 // - Thread enters critical section.
5097 // - Reads its private value from LHSExprs[i].
5098 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5099 // Privates[i]).
5100 // - Exits critical section.
5101 //
5102 // Call __kmpc_barrier after combining.
5103 //
5104 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5105 //
5106 // Final __kmpc_barrier to synchronize after broadcasting
5107 QualType PrivateType = Privates->getType();
5108 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
5109
5110 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
5111 std::string ReductionVarNameStr;
5112 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
5113 ReductionVarNameStr =
5114 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
5115 else
5116 ReductionVarNameStr = "unnamed_priv_var";
5117
5118 // Create an internal shared variable
5119 std::string SharedName =
5120 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
5121 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5122 LLVMType, ".omp.reduction." + SharedName);
5123
5124 SharedVar->setAlignment(
5125 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
5126
5127 Address SharedResult =
5128 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
5129
5130 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5131 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5132 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5133
5134 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5135 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5136
5137 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5138 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5139 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5140
5141 CGF.EmitBlock(InitBB);
5142
5143 auto EmitSharedInit = [&]() {
5144 if (UDR) { // Check if it's a User-Defined Reduction
5145 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5146 std::pair<llvm::Function *, llvm::Function *> FnPair =
5148 llvm::Function *InitializerFn = FnPair.second;
5149 if (InitializerFn) {
5150 if (const auto *CE =
5151 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5152 const auto *OutDRE = cast<DeclRefExpr>(
5153 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5154 ->getSubExpr());
5155 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5156
5157 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5158 LocalScope.addPrivate(OutVD, SharedResult);
5159
5160 (void)LocalScope.Privatize();
5161 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5162 CE->getCallee()->IgnoreParenImpCasts())) {
5164 CGF, OVE, RValue::get(InitializerFn));
5165 CGF.EmitIgnoredExpr(CE);
5166 } else {
5167 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5168 PrivateType.getQualifiers(),
5169 /*IsInitializer=*/true);
5170 }
5171 } else {
5172 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5173 PrivateType.getQualifiers(),
5174 /*IsInitializer=*/true);
5175 }
5176 } else {
5177 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5178 PrivateType.getQualifiers(),
5179 /*IsInitializer=*/true);
5180 }
5181 } else {
5182 // EmitNullInitialization handles default construction for C++ classes
5183 // and zeroing for scalars, which is a reasonable default.
5184 CGF.EmitNullInitialization(SharedResult, PrivateType);
5185 }
5186 return; // UDR initialization handled
5187 }
5188 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5189 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5190 if (const Expr *InitExpr = VD->getInit()) {
5191 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5192 PrivateType.getQualifiers(), true);
5193 return;
5194 }
5195 }
5196 }
5197 CGF.EmitNullInitialization(SharedResult, PrivateType);
5198 };
5199 EmitSharedInit();
5200 CGF.Builder.CreateBr(InitEndBB);
5201 CGF.EmitBlock(InitEndBB);
5202
5203 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5204 CGM.getModule(), OMPRTL___kmpc_barrier),
5205 BarrierArgs);
5206
5207 const Expr *ReductionOp = ReductionOps;
5208 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5209 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5210 LValue LHSLV = CGF.EmitLValue(Privates);
5211
5212 auto EmitCriticalReduction = [&](auto ReductionGen) {
5213 std::string CriticalName = getName({"reduction_critical"});
5214 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5215 };
5216
5217 if (CurrentUDR) {
5218 // Handle user-defined reduction.
5219 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5220 Action.Enter(CGF);
5221 std::pair<llvm::Function *, llvm::Function *> FnPair =
5222 getUserDefinedReduction(CurrentUDR);
5223 if (FnPair.first) {
5224 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5225 const auto *OutDRE = cast<DeclRefExpr>(
5226 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5227 ->getSubExpr());
5228 const auto *InDRE = cast<DeclRefExpr>(
5229 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5230 ->getSubExpr());
5231 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5232 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5233 SharedLV.getAddress());
5234 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5235 LHSLV.getAddress());
5236 (void)LocalScope.Privatize();
5237 emitReductionCombiner(CGF, ReductionOp);
5238 }
5239 }
5240 };
5241 EmitCriticalReduction(ReductionGen);
5242 } else {
5243 // Handle built-in reduction operations.
5244#ifndef NDEBUG
5245 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5246 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5247 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5248
5249 const Expr *AssignRHS = nullptr;
5250 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5251 if (BinOp->getOpcode() == BO_Assign)
5252 AssignRHS = BinOp->getRHS();
5253 } else if (const auto *OpCall =
5254 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5255 if (OpCall->getOperator() == OO_Equal)
5256 AssignRHS = OpCall->getArg(1);
5257 }
5258
5259 assert(AssignRHS &&
5260 "Private Variable Reduction : Invalid ReductionOp expression");
5261#endif
5262
5263 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5264 Action.Enter(CGF);
5265 const auto *OmpOutDRE =
5266 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5267 const auto *OmpInDRE =
5268 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5269 assert(
5270 OmpOutDRE && OmpInDRE &&
5271 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5272 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5273 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5274 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5275 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5276 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5277 (void)LocalScope.Privatize();
5278 // Emit the actual reduction operation
5279 CGF.EmitIgnoredExpr(ReductionOp);
5280 };
5281 EmitCriticalReduction(ReductionGen);
5282 }
5283
5284 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5285 CGM.getModule(), OMPRTL___kmpc_barrier),
5286 BarrierArgs);
5287
5288 // Broadcast final result
5289 bool IsAggregate = PrivateType->isAggregateType();
5290 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5291 llvm::Value *FinalResultVal = nullptr;
5292 Address FinalResultAddr = Address::invalid();
5293
5294 if (IsAggregate)
5295 FinalResultAddr = SharedResult;
5296 else
5297 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5298
5299 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5300 if (IsAggregate) {
5301 CGF.EmitAggregateCopy(TargetLHSLV,
5302 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5303 PrivateType, AggValueSlot::DoesNotOverlap, false);
5304 } else {
5305 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5306 }
5307 // Final synchronization barrier
5308 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5309 CGM.getModule(), OMPRTL___kmpc_barrier),
5310 BarrierArgs);
5311
5312 // Combiner with original list item
5313 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5314 PrePostActionTy &Action) {
5315 Action.Enter(CGF);
5316 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5317 cast<DeclRefExpr>(LHSExprs),
5318 cast<DeclRefExpr>(RHSExprs));
5319 };
5320 EmitCriticalReduction(OriginalListCombiner);
5321}
5322
5324 ArrayRef<const Expr *> OrgPrivates,
5325 ArrayRef<const Expr *> OrgLHSExprs,
5326 ArrayRef<const Expr *> OrgRHSExprs,
5327 ArrayRef<const Expr *> OrgReductionOps,
5328 ReductionOptionsTy Options) {
5329 if (!CGF.HaveInsertPoint())
5330 return;
5331
5332 bool WithNowait = Options.WithNowait;
5333 bool SimpleReduction = Options.SimpleReduction;
5334
5335 // Next code should be emitted for reduction:
5336 //
5337 // static kmp_critical_name lock = { 0 };
5338 //
5339 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5340 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5341 // ...
5342 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5343 // *(Type<n>-1*)rhs[<n>-1]);
5344 // }
5345 //
5346 // ...
5347 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5348 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5349 // RedList, reduce_func, &<lock>)) {
5350 // case 1:
5351 // ...
5352 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5353 // ...
5354 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5355 // break;
5356 // case 2:
5357 // ...
5358 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5359 // ...
5360 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5361 // break;
5362 // default:;
5363 // }
5364 //
5365 // if SimpleReduction is true, only the next code is generated:
5366 // ...
5367 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5368 // ...
5369
5370 ASTContext &C = CGM.getContext();
5371
5372 if (SimpleReduction) {
5374 const auto *IPriv = OrgPrivates.begin();
5375 const auto *ILHS = OrgLHSExprs.begin();
5376 const auto *IRHS = OrgRHSExprs.begin();
5377 for (const Expr *E : OrgReductionOps) {
5378 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5379 cast<DeclRefExpr>(*IRHS));
5380 ++IPriv;
5381 ++ILHS;
5382 ++IRHS;
5383 }
5384 return;
5385 }
5386
5387 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5388 // Only keep entries where the corresponding variable is not private.
5389 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5390 FilteredRHSExprs, FilteredReductionOps;
5391 for (unsigned I : llvm::seq<unsigned>(
5392 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5393 if (!Options.IsPrivateVarReduction[I]) {
5394 FilteredPrivates.emplace_back(OrgPrivates[I]);
5395 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5396 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5397 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5398 }
5399 }
5400 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5401 // processing.
5402 ArrayRef<const Expr *> Privates = FilteredPrivates;
5403 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5404 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5405 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5406
5407 // 1. Build a list of reduction variables.
5408 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5409 auto Size = RHSExprs.size();
5410 for (const Expr *E : Privates) {
5411 if (E->getType()->isVariablyModifiedType())
5412 // Reserve place for array size.
5413 ++Size;
5414 }
5415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5416 QualType ReductionArrayTy = C.getConstantArrayType(
5417 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5418 /*IndexTypeQuals=*/0);
5419 RawAddress ReductionList =
5420 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5421 const auto *IPriv = Privates.begin();
5422 unsigned Idx = 0;
5423 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5424 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5425 CGF.Builder.CreateStore(
5427 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5428 Elem);
5429 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5430 // Store array size.
5431 ++Idx;
5432 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5433 llvm::Value *Size = CGF.Builder.CreateIntCast(
5434 CGF.getVLASize(
5435 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5436 .NumElts,
5437 CGF.SizeTy, /*isSigned=*/false);
5438 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5439 Elem);
5440 }
5441 }
5442
5443 // 2. Emit reduce_func().
5444 llvm::Function *ReductionFn = emitReductionFunction(
5445 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5446 Privates, LHSExprs, RHSExprs, ReductionOps);
5447
5448 // 3. Create static kmp_critical_name lock = { 0 };
5449 std::string Name = getName({"reduction"});
5450 llvm::Value *Lock = getCriticalRegionLock(Name);
5451
5452 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5453 // RedList, reduce_func, &<lock>);
5454 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5455 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5456 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5457 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5458 ReductionList.getPointer(), CGF.VoidPtrTy);
5459 llvm::Value *Args[] = {
5460 IdentTLoc, // ident_t *<loc>
5461 ThreadId, // i32 <gtid>
5462 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5463 ReductionArrayTySize, // size_type sizeof(RedList)
5464 RL, // void *RedList
5465 ReductionFn, // void (*) (void *, void *) <reduce_func>
5466 Lock // kmp_critical_name *&<lock>
5467 };
5468 llvm::Value *Res = CGF.EmitRuntimeCall(
5469 OMPBuilder.getOrCreateRuntimeFunction(
5470 CGM.getModule(),
5471 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5472 Args);
5473
5474 // 5. Build switch(res)
5475 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5476 llvm::SwitchInst *SwInst =
5477 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5478
5479 // 6. Build case 1:
5480 // ...
5481 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5482 // ...
5483 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5484 // break;
5485 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5486 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5487 CGF.EmitBlock(Case1BB);
5488
5489 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5490 llvm::Value *EndArgs[] = {
5491 IdentTLoc, // ident_t *<loc>
5492 ThreadId, // i32 <gtid>
5493 Lock // kmp_critical_name *&<lock>
5494 };
5495 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5496 CodeGenFunction &CGF, PrePostActionTy &Action) {
5498 const auto *IPriv = Privates.begin();
5499 const auto *ILHS = LHSExprs.begin();
5500 const auto *IRHS = RHSExprs.begin();
5501 for (const Expr *E : ReductionOps) {
5502 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5503 cast<DeclRefExpr>(*IRHS));
5504 ++IPriv;
5505 ++ILHS;
5506 ++IRHS;
5507 }
5508 };
5510 CommonActionTy Action(
5511 nullptr, {},
5512 OMPBuilder.getOrCreateRuntimeFunction(
5513 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5514 : OMPRTL___kmpc_end_reduce),
5515 EndArgs);
5516 RCG.setAction(Action);
5517 RCG(CGF);
5518
5519 CGF.EmitBranch(DefaultBB);
5520
5521 // 7. Build case 2:
5522 // ...
5523 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5524 // ...
5525 // break;
5526 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5527 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5528 CGF.EmitBlock(Case2BB);
5529
5530 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5531 CodeGenFunction &CGF, PrePostActionTy &Action) {
5532 const auto *ILHS = LHSExprs.begin();
5533 const auto *IRHS = RHSExprs.begin();
5534 const auto *IPriv = Privates.begin();
5535 for (const Expr *E : ReductionOps) {
5536 const Expr *XExpr = nullptr;
5537 const Expr *EExpr = nullptr;
5538 const Expr *UpExpr = nullptr;
5539 BinaryOperatorKind BO = BO_Comma;
5540 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5541 if (BO->getOpcode() == BO_Assign) {
5542 XExpr = BO->getLHS();
5543 UpExpr = BO->getRHS();
5544 }
5545 }
5546 // Try to emit update expression as a simple atomic.
5547 const Expr *RHSExpr = UpExpr;
5548 if (RHSExpr) {
5549 // Analyze RHS part of the whole expression.
5550 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5551 RHSExpr->IgnoreParenImpCasts())) {
5552 // If this is a conditional operator, analyze its condition for
5553 // min/max reduction operator.
5554 RHSExpr = ACO->getCond();
5555 }
5556 if (const auto *BORHS =
5557 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5558 EExpr = BORHS->getRHS();
5559 BO = BORHS->getOpcode();
5560 }
5561 }
5562 if (XExpr) {
5563 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5564 auto &&AtomicRedGen = [BO, VD,
5565 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5566 const Expr *EExpr, const Expr *UpExpr) {
5567 LValue X = CGF.EmitLValue(XExpr);
5568 RValue E;
5569 if (EExpr)
5570 E = CGF.EmitAnyExpr(EExpr);
5571 CGF.EmitOMPAtomicSimpleUpdateExpr(
5572 X, E, BO, /*IsXLHSInRHSPart=*/true,
5573 llvm::AtomicOrdering::Monotonic, Loc,
5574 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5575 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5576 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5577 CGF.emitOMPSimpleStore(
5578 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5579 VD->getType().getNonReferenceType(), Loc);
5580 PrivateScope.addPrivate(VD, LHSTemp);
5581 (void)PrivateScope.Privatize();
5582 return CGF.EmitAnyExpr(UpExpr);
5583 });
5584 };
5585 if ((*IPriv)->getType()->isArrayType()) {
5586 // Emit atomic reduction for array section.
5587 const auto *RHSVar =
5588 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5589 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5590 AtomicRedGen, XExpr, EExpr, UpExpr);
5591 } else {
5592 // Emit atomic reduction for array subscript or single variable.
5593 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5594 }
5595 } else {
5596 // Emit as a critical region.
5597 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5598 const Expr *, const Expr *) {
5599 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5600 std::string Name = RT.getName({"atomic_reduction"});
5602 CGF, Name,
5603 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5604 Action.Enter(CGF);
5605 emitReductionCombiner(CGF, E);
5606 },
5607 Loc);
5608 };
5609 if ((*IPriv)->getType()->isArrayType()) {
5610 const auto *LHSVar =
5611 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5612 const auto *RHSVar =
5613 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5614 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5615 CritRedGen);
5616 } else {
5617 CritRedGen(CGF, nullptr, nullptr, nullptr);
5618 }
5619 }
5620 ++ILHS;
5621 ++IRHS;
5622 ++IPriv;
5623 }
5624 };
5625 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5626 if (!WithNowait) {
5627 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5628 llvm::Value *EndArgs[] = {
5629 IdentTLoc, // ident_t *<loc>
5630 ThreadId, // i32 <gtid>
5631 Lock // kmp_critical_name *&<lock>
5632 };
5633 CommonActionTy Action(nullptr, {},
5634 OMPBuilder.getOrCreateRuntimeFunction(
5635 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5636 EndArgs);
5637 AtomicRCG.setAction(Action);
5638 AtomicRCG(CGF);
5639 } else {
5640 AtomicRCG(CGF);
5641 }
5642
5643 CGF.EmitBranch(DefaultBB);
5644 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5645 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5646 "PrivateVarReduction: Privates size mismatch");
5647 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5648 "PrivateVarReduction: ReductionOps size mismatch");
5649 for (unsigned I : llvm::seq<unsigned>(
5650 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5651 if (Options.IsPrivateVarReduction[I])
5652 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5653 OrgRHSExprs[I], OrgReductionOps[I]);
5654 }
5655}
5656
5657/// Generates unique name for artificial threadprivate variables.
5658/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5659static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5660 const Expr *Ref) {
5661 SmallString<256> Buffer;
5662 llvm::raw_svector_ostream Out(Buffer);
5663 const clang::DeclRefExpr *DE;
5664 const VarDecl *D = ::getBaseDecl(Ref, DE);
5665 if (!D)
5666 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5667 D = D->getCanonicalDecl();
5668 std::string Name = CGM.getOpenMPRuntime().getName(
5669 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5670 Out << Prefix << Name << "_"
5672 return std::string(Out.str());
5673}
5674
5675/// Emits reduction initializer function:
5676/// \code
5677/// void @.red_init(void* %arg, void* %orig) {
5678/// %0 = bitcast void* %arg to <type>*
5679/// store <type> <init>, <type>* %0
5680/// ret void
5681/// }
5682/// \endcode
5683static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5684 SourceLocation Loc,
5685 ReductionCodeGen &RCG, unsigned N) {
5686 ASTContext &C = CGM.getContext();
5687 QualType VoidPtrTy = C.VoidPtrTy;
5688 VoidPtrTy.addRestrict();
5689 FunctionArgList Args;
5690 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5692 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5694 Args.emplace_back(&Param);
5695 Args.emplace_back(&ParamOrig);
5696 const auto &FnInfo =
5697 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5698 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5699 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5700 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5701 Name, &CGM.getModule());
5702 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5703 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5704 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5705 Fn->setDoesNotRecurse();
5706 CodeGenFunction CGF(CGM);
5707 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5708 QualType PrivateType = RCG.getPrivateType(N);
5709 Address PrivateAddr = CGF.EmitLoadOfPointer(
5710 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5711 C.getPointerType(PrivateType)->castAs<PointerType>());
5712 llvm::Value *Size = nullptr;
5713 // If the size of the reduction item is non-constant, load it from global
5714 // threadprivate variable.
5715 if (RCG.getSizes(N).second) {
5717 CGF, CGM.getContext().getSizeType(),
5718 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5719 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5720 CGM.getContext().getSizeType(), Loc);
5721 }
5722 RCG.emitAggregateType(CGF, N, Size);
5723 Address OrigAddr = Address::invalid();
5724 // If initializer uses initializer from declare reduction construct, emit a
5725 // pointer to the address of the original reduction item (reuired by reduction
5726 // initializer)
5727 if (RCG.usesReductionInitializer(N)) {
5728 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5729 OrigAddr = CGF.EmitLoadOfPointer(
5730 SharedAddr,
5731 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5732 }
5733 // Emit the initializer:
5734 // %0 = bitcast void* %arg to <type>*
5735 // store <type> <init>, <type>* %0
5736 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5737 [](CodeGenFunction &) { return false; });
5738 CGF.FinishFunction();
5739 return Fn;
5740}
5741
5742/// Emits reduction combiner function:
5743/// \code
5744/// void @.red_comb(void* %arg0, void* %arg1) {
5745/// %lhs = bitcast void* %arg0 to <type>*
5746/// %rhs = bitcast void* %arg1 to <type>*
5747/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5748/// store <type> %2, <type>* %lhs
5749/// ret void
5750/// }
5751/// \endcode
5752static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5753 SourceLocation Loc,
5754 ReductionCodeGen &RCG, unsigned N,
5755 const Expr *ReductionOp,
5756 const Expr *LHS, const Expr *RHS,
5757 const Expr *PrivateRef) {
5758 ASTContext &C = CGM.getContext();
5759 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5760 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5761 FunctionArgList Args;
5762 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5763 C.VoidPtrTy, ImplicitParamKind::Other);
5764 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5766 Args.emplace_back(&ParamInOut);
5767 Args.emplace_back(&ParamIn);
5768 const auto &FnInfo =
5769 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5770 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5771 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5772 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5773 Name, &CGM.getModule());
5774 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5775 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5776 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5777 Fn->setDoesNotRecurse();
5778 CodeGenFunction CGF(CGM);
5779 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5780 llvm::Value *Size = nullptr;
5781 // If the size of the reduction item is non-constant, load it from global
5782 // threadprivate variable.
5783 if (RCG.getSizes(N).second) {
5785 CGF, CGM.getContext().getSizeType(),
5786 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5787 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5788 CGM.getContext().getSizeType(), Loc);
5789 }
5790 RCG.emitAggregateType(CGF, N, Size);
5791 // Remap lhs and rhs variables to the addresses of the function arguments.
5792 // %lhs = bitcast void* %arg0 to <type>*
5793 // %rhs = bitcast void* %arg1 to <type>*
5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795 PrivateScope.addPrivate(
5796 LHSVD,
5797 // Pull out the pointer to the variable.
5799 CGF.GetAddrOfLocalVar(&ParamInOut)
5800 .withElementType(CGF.Builder.getPtrTy(0)),
5801 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5802 PrivateScope.addPrivate(
5803 RHSVD,
5804 // Pull out the pointer to the variable.
5806 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5807 CGF.Builder.getPtrTy(0)),
5808 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5809 PrivateScope.Privatize();
5810 // Emit the combiner body:
5811 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5812 // store <type> %2, <type>* %lhs
5814 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5815 cast<DeclRefExpr>(RHS));
5816 CGF.FinishFunction();
5817 return Fn;
5818}
5819
5820/// Emits reduction finalizer function:
5821/// \code
5822/// void @.red_fini(void* %arg) {
5823/// %0 = bitcast void* %arg to <type>*
5824/// <destroy>(<type>* %0)
5825/// ret void
5826/// }
5827/// \endcode
5828static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5829 SourceLocation Loc,
5830 ReductionCodeGen &RCG, unsigned N) {
5831 if (!RCG.needCleanups(N))
5832 return nullptr;
5833 ASTContext &C = CGM.getContext();
5834 FunctionArgList Args;
5835 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5837 Args.emplace_back(&Param);
5838 const auto &FnInfo =
5839 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5840 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5841 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5842 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5843 Name, &CGM.getModule());
5844 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5845 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5846 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5847 Fn->setDoesNotRecurse();
5848 CodeGenFunction CGF(CGM);
5849 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5850 Address PrivateAddr = CGF.EmitLoadOfPointer(
5851 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5852 llvm::Value *Size = nullptr;
5853 // If the size of the reduction item is non-constant, load it from global
5854 // threadprivate variable.
5855 if (RCG.getSizes(N).second) {
5857 CGF, CGM.getContext().getSizeType(),
5858 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5859 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5860 CGM.getContext().getSizeType(), Loc);
5861 }
5862 RCG.emitAggregateType(CGF, N, Size);
5863 // Emit the finalizer body:
5864 // <destroy>(<type>* %0)
5865 RCG.emitCleanups(CGF, N, PrivateAddr);
5866 CGF.FinishFunction(Loc);
5867 return Fn;
5868}
5869
5872 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5873 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5874 return nullptr;
5875
5876 // Build typedef struct:
5877 // kmp_taskred_input {
5878 // void *reduce_shar; // shared reduction item
5879 // void *reduce_orig; // original reduction item used for initialization
5880 // size_t reduce_size; // size of data item
5881 // void *reduce_init; // data initialization routine
5882 // void *reduce_fini; // data finalization routine
5883 // void *reduce_comb; // data combiner routine
5884 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5885 // } kmp_taskred_input_t;
5886 ASTContext &C = CGM.getContext();
5887 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5888 RD->startDefinition();
5889 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5890 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5891 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5892 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5893 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5894 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5895 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5896 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5897 RD->completeDefinition();
5898 CanQualType RDType = C.getCanonicalTagType(RD);
5899 unsigned Size = Data.ReductionVars.size();
5900 llvm::APInt ArraySize(/*numBits=*/64, Size);
5901 QualType ArrayRDType =
5902 C.getConstantArrayType(RDType, ArraySize, nullptr,
5903 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5904 // kmp_task_red_input_t .rd_input.[Size];
5905 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5906 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5907 Data.ReductionCopies, Data.ReductionOps);
5908 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5909 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5910 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5911 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5912 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5913 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5914 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5915 ".rd_input.gep.");
5916 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5917 // ElemLVal.reduce_shar = &Shareds[Cnt];
5918 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5919 RCG.emitSharedOrigLValue(CGF, Cnt);
5920 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5921 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5922 // ElemLVal.reduce_orig = &Origs[Cnt];
5923 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5924 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5925 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5926 RCG.emitAggregateType(CGF, Cnt);
5927 llvm::Value *SizeValInChars;
5928 llvm::Value *SizeVal;
5929 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5930 // We use delayed creation/initialization for VLAs and array sections. It is
5931 // required because runtime does not provide the way to pass the sizes of
5932 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5933 // threadprivate global variables are used to store these values and use
5934 // them in the functions.
5935 bool DelayedCreation = !!SizeVal;
5936 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5937 /*isSigned=*/false);
5938 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5939 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5940 // ElemLVal.reduce_init = init;
5941 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5942 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5943 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5944 // ElemLVal.reduce_fini = fini;
5945 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5946 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5947 llvm::Value *FiniAddr =
5948 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5949 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5950 // ElemLVal.reduce_comb = comb;
5951 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5952 llvm::Value *CombAddr = emitReduceCombFunction(
5953 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5954 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5955 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5956 // ElemLVal.flags = 0;
5957 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5958 if (DelayedCreation) {
5960 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5961 FlagsLVal);
5962 } else
5963 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5964 }
5965 if (Data.IsReductionWithTaskMod) {
5966 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5967 // is_ws, int num, void *data);
5968 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5969 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5970 CGM.IntTy, /*isSigned=*/true);
5971 llvm::Value *Args[] = {
5972 IdentTLoc, GTid,
5973 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5974 /*isSigned=*/true),
5975 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5977 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5978 return CGF.EmitRuntimeCall(
5979 OMPBuilder.getOrCreateRuntimeFunction(
5980 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5981 Args);
5982 }
5983 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5984 llvm::Value *Args[] = {
5985 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5986 /*isSigned=*/true),
5987 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5989 CGM.VoidPtrTy)};
5990 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5991 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5992 Args);
5993}
5994
5996 SourceLocation Loc,
5997 bool IsWorksharingReduction) {
5998 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5999 // is_ws, int num, void *data);
6000 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6001 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6002 CGM.IntTy, /*isSigned=*/true);
6003 llvm::Value *Args[] = {IdentTLoc, GTid,
6004 llvm::ConstantInt::get(CGM.IntTy,
6005 IsWorksharingReduction ? 1 : 0,
6006 /*isSigned=*/true)};
6007 (void)CGF.EmitRuntimeCall(
6008 OMPBuilder.getOrCreateRuntimeFunction(
6009 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6010 Args);
6011}
6012
6014 SourceLocation Loc,
6015 ReductionCodeGen &RCG,
6016 unsigned N) {
6017 auto Sizes = RCG.getSizes(N);
6018 // Emit threadprivate global variable if the type is non-constant
6019 // (Sizes.second = nullptr).
6020 if (Sizes.second) {
6021 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6022 /*isSigned=*/false);
6024 CGF, CGM.getContext().getSizeType(),
6025 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6026 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6027 }
6028}
6029
6031 SourceLocation Loc,
6032 llvm::Value *ReductionsPtr,
6033 LValue SharedLVal) {
6034 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6035 // *d);
6036 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6037 CGM.IntTy,
6038 /*isSigned=*/true),
6039 ReductionsPtr,
6041 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6042 return Address(
6043 CGF.EmitRuntimeCall(
6044 OMPBuilder.getOrCreateRuntimeFunction(
6045 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6046 Args),
6047 CGF.Int8Ty, SharedLVal.getAlignment());
6048}
6049
6051 const OMPTaskDataTy &Data) {
6052 if (!CGF.HaveInsertPoint())
6053 return;
6054
6055 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6056 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6057 OMPBuilder.createTaskwait(CGF.Builder);
6058 } else {
6059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6060 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6061 auto &M = CGM.getModule();
6062 Address DependenciesArray = Address::invalid();
6063 llvm::Value *NumOfElements;
6064 std::tie(NumOfElements, DependenciesArray) =
6065 emitDependClause(CGF, Data.Dependences, Loc);
6066 if (!Data.Dependences.empty()) {
6067 llvm::Value *DepWaitTaskArgs[7];
6068 DepWaitTaskArgs[0] = UpLoc;
6069 DepWaitTaskArgs[1] = ThreadID;
6070 DepWaitTaskArgs[2] = NumOfElements;
6071 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6072 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6073 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6074 DepWaitTaskArgs[6] =
6075 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
6076
6077 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6078
6079 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6080 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6081 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6082 // kmp_int32 has_no_wait); if dependence info is specified.
6083 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6084 M, OMPRTL___kmpc_omp_taskwait_deps_51),
6085 DepWaitTaskArgs);
6086
6087 } else {
6088
6089 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6090 // global_tid);
6091 llvm::Value *Args[] = {UpLoc, ThreadID};
6092 // Ignore return result until untied tasks are supported.
6093 CGF.EmitRuntimeCall(
6094 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6095 Args);
6096 }
6097 }
6098
6099 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6100 Region->emitUntiedSwitch(CGF);
6101}
6102
6104 OpenMPDirectiveKind InnerKind,
6105 const RegionCodeGenTy &CodeGen,
6106 bool HasCancel) {
6107 if (!CGF.HaveInsertPoint())
6108 return;
6109 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6110 InnerKind != OMPD_critical &&
6111 InnerKind != OMPD_master &&
6112 InnerKind != OMPD_masked);
6113 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6114}
6115
6116namespace {
6117enum RTCancelKind {
6118 CancelNoreq = 0,
6119 CancelParallel = 1,
6120 CancelLoop = 2,
6121 CancelSections = 3,
6122 CancelTaskgroup = 4
6123};
6124} // anonymous namespace
6125
6126static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6127 RTCancelKind CancelKind = CancelNoreq;
6128 if (CancelRegion == OMPD_parallel)
6129 CancelKind = CancelParallel;
6130 else if (CancelRegion == OMPD_for)
6131 CancelKind = CancelLoop;
6132 else if (CancelRegion == OMPD_sections)
6133 CancelKind = CancelSections;
6134 else {
6135 assert(CancelRegion == OMPD_taskgroup);
6136 CancelKind = CancelTaskgroup;
6137 }
6138 return CancelKind;
6139}
6140
6143 OpenMPDirectiveKind CancelRegion) {
6144 if (!CGF.HaveInsertPoint())
6145 return;
6146 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6147 // global_tid, kmp_int32 cncl_kind);
6148 if (auto *OMPRegionInfo =
6149 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6150 // For 'cancellation point taskgroup', the task region info may not have a
6151 // cancel. This may instead happen in another adjacent task.
6152 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6153 llvm::Value *Args[] = {
6154 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6155 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6156 // Ignore return result until untied tasks are supported.
6157 llvm::Value *Result = CGF.EmitRuntimeCall(
6158 OMPBuilder.getOrCreateRuntimeFunction(
6159 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6160 Args);
6161 // if (__kmpc_cancellationpoint()) {
6162 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6163 // exit from construct;
6164 // }
6165 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6166 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6167 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6168 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6169 CGF.EmitBlock(ExitBB);
6170 if (CancelRegion == OMPD_parallel)
6171 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6172 // exit from construct;
6173 CodeGenFunction::JumpDest CancelDest =
6174 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6175 CGF.EmitBranchThroughCleanup(CancelDest);
6176 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6177 }
6178 }
6179}
6180
6182 const Expr *IfCond,
6183 OpenMPDirectiveKind CancelRegion) {
6184 if (!CGF.HaveInsertPoint())
6185 return;
6186 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6187 // kmp_int32 cncl_kind);
6188 auto &M = CGM.getModule();
6189 if (auto *OMPRegionInfo =
6190 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6191 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6192 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6193 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6194 llvm::Value *Args[] = {
6195 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6196 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6197 // Ignore return result until untied tasks are supported.
6198 llvm::Value *Result = CGF.EmitRuntimeCall(
6199 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6200 // if (__kmpc_cancel()) {
6201 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6202 // exit from construct;
6203 // }
6204 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6205 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6206 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6207 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6208 CGF.EmitBlock(ExitBB);
6209 if (CancelRegion == OMPD_parallel)
6210 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6211 // exit from construct;
6212 CodeGenFunction::JumpDest CancelDest =
6213 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6214 CGF.EmitBranchThroughCleanup(CancelDest);
6215 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6216 };
6217 if (IfCond) {
6218 emitIfClause(CGF, IfCond, ThenGen,
6219 [](CodeGenFunction &, PrePostActionTy &) {});
6220 } else {
6221 RegionCodeGenTy ThenRCG(ThenGen);
6222 ThenRCG(CGF);
6223 }
6224 }
6225}
6226
6227namespace {
6228/// Cleanup action for uses_allocators support.
6229class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6231
6232public:
6233 OMPUsesAllocatorsActionTy(
6234 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6235 : Allocators(Allocators) {}
6236 void Enter(CodeGenFunction &CGF) override {
6237 if (!CGF.HaveInsertPoint())
6238 return;
6239 for (const auto &AllocatorData : Allocators) {
6241 CGF, AllocatorData.first, AllocatorData.second);
6242 }
6243 }
6244 void Exit(CodeGenFunction &CGF) override {
6245 if (!CGF.HaveInsertPoint())
6246 return;
6247 for (const auto &AllocatorData : Allocators) {
6249 AllocatorData.first);
6250 }
6251 }
6252};
6253} // namespace
6254
6256 const OMPExecutableDirective &D, StringRef ParentName,
6257 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6258 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6259 assert(!ParentName.empty() && "Invalid target entry parent name!");
6262 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6263 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6264 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6265 if (!D.AllocatorTraits)
6266 continue;
6267 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6268 }
6269 }
6270 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6271 CodeGen.setAction(UsesAllocatorAction);
6272 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6273 IsOffloadEntry, CodeGen);
6274}
6275
6277 const Expr *Allocator,
6278 const Expr *AllocatorTraits) {
6279 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6280 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6281 // Use default memspace handle.
6282 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6283 llvm::Value *NumTraits = llvm::ConstantInt::get(
6285 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6286 ->getSize()
6287 .getLimitedValue());
6288 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6290 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6291 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6292 AllocatorTraitsLVal.getBaseInfo(),
6293 AllocatorTraitsLVal.getTBAAInfo());
6294 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6295
6296 llvm::Value *AllocatorVal =
6297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6298 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6299 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6300 // Store to allocator.
6302 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6303 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6304 AllocatorVal =
6305 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6306 Allocator->getType(), Allocator->getExprLoc());
6307 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6308}
6309
6311 const Expr *Allocator) {
6312 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6313 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6314 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6317 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6318 CGF.getContext().VoidPtrTy,
6319 Allocator->getExprLoc());
6320 (void)CGF.EmitRuntimeCall(
6321 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6322 OMPRTL___kmpc_destroy_allocator),
6323 {ThreadId, AllocatorVal});
6324}
6325
6328 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6329 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6330 "invalid default attrs structure");
6331 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6332 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6333
6334 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6335 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6336 /*UpperBoundOnly=*/true);
6337
6338 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6339 for (auto *A : C->getAttrs()) {
6340 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6341 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6342 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6343 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6344 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6345 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6346 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6347 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6348 &AttrMaxThreadsVal);
6349 else
6350 continue;
6351
6352 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6353 if (AttrMaxThreadsVal > 0)
6354 MaxThreadsVal = MaxThreadsVal > 0
6355 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6356 : AttrMaxThreadsVal;
6357 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6358 if (AttrMaxBlocksVal > 0)
6359 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6360 : AttrMaxBlocksVal;
6361 }
6362 }
6363}
6364
6366 const OMPExecutableDirective &D, StringRef ParentName,
6367 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6368 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6369
6370 llvm::TargetRegionEntryInfo EntryInfo =
6371 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6372
6373 CodeGenFunction CGF(CGM, true);
6374 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6375 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6376 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6377
6378 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6379 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6380 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6381 };
6382
6383 cantFail(OMPBuilder.emitTargetRegionFunction(
6384 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6385 OutlinedFnID));
6386
6387 if (!OutlinedFn)
6388 return;
6389
6390 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6391
6392 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6393 for (auto *A : C->getAttrs()) {
6394 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6395 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6396 }
6397 }
6398 registerVTable(D);
6399}
6400
6401/// Checks if the expression is constant or does not have non-trivial function
6402/// calls.
6403static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6404 // We can skip constant expressions.
6405 // We can skip expressions with trivial calls or simple expressions.
6407 !E->hasNonTrivialCall(Ctx)) &&
6408 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6409}
6410
6412 const Stmt *Body) {
6413 const Stmt *Child = Body->IgnoreContainers();
6414 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6415 Child = nullptr;
6416 for (const Stmt *S : C->body()) {
6417 if (const auto *E = dyn_cast<Expr>(S)) {
6418 if (isTrivial(Ctx, E))
6419 continue;
6420 }
6421 // Some of the statements can be ignored.
6424 continue;
6425 // Analyze declarations.
6426 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6427 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6428 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6429 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6430 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6431 isa<UsingDirectiveDecl>(D) ||
6432 isa<OMPDeclareReductionDecl>(D) ||
6433 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6434 return true;
6435 const auto *VD = dyn_cast<VarDecl>(D);
6436 if (!VD)
6437 return false;
6438 return VD->hasGlobalStorage() || !VD->isUsed();
6439 }))
6440 continue;
6441 }
6442 // Found multiple children - cannot get the one child only.
6443 if (Child)
6444 return nullptr;
6445 Child = S;
6446 }
6447 if (Child)
6448 Child = Child->IgnoreContainers();
6449 }
6450 return Child;
6451}
6452
6454 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6455 int32_t &MaxTeamsVal) {
6456
6457 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6458 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6459 "Expected target-based executable directive.");
6460 switch (DirectiveKind) {
6461 case OMPD_target: {
6462 const auto *CS = D.getInnermostCapturedStmt();
6463 const auto *Body =
6464 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6465 const Stmt *ChildStmt =
6467 if (const auto *NestedDir =
6468 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6469 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6470 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6471 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6472 ->getNumTeams()
6473 .front();
6474 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6475 if (auto Constant =
6476 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6477 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6478 return NumTeams;
6479 }
6480 MinTeamsVal = MaxTeamsVal = 0;
6481 return nullptr;
6482 }
6483 MinTeamsVal = MaxTeamsVal = 1;
6484 return nullptr;
6485 }
6486 // A value of -1 is used to check if we need to emit no teams region
6487 MinTeamsVal = MaxTeamsVal = -1;
6488 return nullptr;
6489 }
6490 case OMPD_target_teams_loop:
6491 case OMPD_target_teams:
6492 case OMPD_target_teams_distribute:
6493 case OMPD_target_teams_distribute_simd:
6494 case OMPD_target_teams_distribute_parallel_for:
6495 case OMPD_target_teams_distribute_parallel_for_simd: {
6496 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6497 const Expr *NumTeams =
6498 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6499 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6500 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6501 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6502 return NumTeams;
6503 }
6504 MinTeamsVal = MaxTeamsVal = 0;
6505 return nullptr;
6506 }
6507 case OMPD_target_parallel:
6508 case OMPD_target_parallel_for:
6509 case OMPD_target_parallel_for_simd:
6510 case OMPD_target_parallel_loop:
6511 case OMPD_target_simd:
6512 MinTeamsVal = MaxTeamsVal = 1;
6513 return nullptr;
6514 case OMPD_parallel:
6515 case OMPD_for:
6516 case OMPD_parallel_for:
6517 case OMPD_parallel_loop:
6518 case OMPD_parallel_master:
6519 case OMPD_parallel_sections:
6520 case OMPD_for_simd:
6521 case OMPD_parallel_for_simd:
6522 case OMPD_cancel:
6523 case OMPD_cancellation_point:
6524 case OMPD_ordered:
6525 case OMPD_threadprivate:
6526 case OMPD_allocate:
6527 case OMPD_task:
6528 case OMPD_simd:
6529 case OMPD_tile:
6530 case OMPD_unroll:
6531 case OMPD_sections:
6532 case OMPD_section:
6533 case OMPD_single:
6534 case OMPD_master:
6535 case OMPD_critical:
6536 case OMPD_taskyield:
6537 case OMPD_barrier:
6538 case OMPD_taskwait:
6539 case OMPD_taskgroup:
6540 case OMPD_atomic:
6541 case OMPD_flush:
6542 case OMPD_depobj:
6543 case OMPD_scan:
6544 case OMPD_teams:
6545 case OMPD_target_data:
6546 case OMPD_target_exit_data:
6547 case OMPD_target_enter_data:
6548 case OMPD_distribute:
6549 case OMPD_distribute_simd:
6550 case OMPD_distribute_parallel_for:
6551 case OMPD_distribute_parallel_for_simd:
6552 case OMPD_teams_distribute:
6553 case OMPD_teams_distribute_simd:
6554 case OMPD_teams_distribute_parallel_for:
6555 case OMPD_teams_distribute_parallel_for_simd:
6556 case OMPD_target_update:
6557 case OMPD_declare_simd:
6558 case OMPD_declare_variant:
6559 case OMPD_begin_declare_variant:
6560 case OMPD_end_declare_variant:
6561 case OMPD_declare_target:
6562 case OMPD_end_declare_target:
6563 case OMPD_declare_reduction:
6564 case OMPD_declare_mapper:
6565 case OMPD_taskloop:
6566 case OMPD_taskloop_simd:
6567 case OMPD_master_taskloop:
6568 case OMPD_master_taskloop_simd:
6569 case OMPD_parallel_master_taskloop:
6570 case OMPD_parallel_master_taskloop_simd:
6571 case OMPD_requires:
6572 case OMPD_metadirective:
6573 case OMPD_unknown:
6574 break;
6575 default:
6576 break;
6577 }
6578 llvm_unreachable("Unexpected directive kind.");
6579}
6580
6582 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6583 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6584 "Clauses associated with the teams directive expected to be emitted "
6585 "only for the host!");
6586 CGBuilderTy &Bld = CGF.Builder;
6587 int32_t MinNT = -1, MaxNT = -1;
6588 const Expr *NumTeams =
6589 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6590 if (NumTeams != nullptr) {
6591 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592
6593 switch (DirectiveKind) {
6594 case OMPD_target: {
6595 const auto *CS = D.getInnermostCapturedStmt();
6596 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6597 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6598 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6599 /*IgnoreResultAssign*/ true);
6600 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6601 /*isSigned=*/true);
6602 }
6603 case OMPD_target_teams:
6604 case OMPD_target_teams_distribute:
6605 case OMPD_target_teams_distribute_simd:
6606 case OMPD_target_teams_distribute_parallel_for:
6607 case OMPD_target_teams_distribute_parallel_for_simd: {
6608 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6609 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6610 /*IgnoreResultAssign*/ true);
6611 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6612 /*isSigned=*/true);
6613 }
6614 default:
6615 break;
6616 }
6617 }
6618
6619 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6620 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6621}
6622
6623/// Check for a num threads constant value (stored in \p DefaultVal), or
6624/// expression (stored in \p E). If the value is conditional (via an if-clause),
6625/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6626/// nullptr, no expression evaluation is perfomed.
6627static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6628 const Expr **E, int32_t &UpperBound,
6629 bool UpperBoundOnly, llvm::Value **CondVal) {
6631 CGF.getContext(), CS->getCapturedStmt());
6632 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6633 if (!Dir)
6634 return;
6635
6636 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6637 // Handle if clause. If if clause present, the number of threads is
6638 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6639 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6640 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6641 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6642 const OMPIfClause *IfClause = nullptr;
6643 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6644 if (C->getNameModifier() == OMPD_unknown ||
6645 C->getNameModifier() == OMPD_parallel) {
6646 IfClause = C;
6647 break;
6648 }
6649 }
6650 if (IfClause) {
6651 const Expr *CondExpr = IfClause->getCondition();
6652 bool Result;
6653 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6654 if (!Result) {
6655 UpperBound = 1;
6656 return;
6657 }
6658 } else {
6660 if (const auto *PreInit =
6661 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6662 for (const auto *I : PreInit->decls()) {
6663 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6664 CGF.EmitVarDecl(cast<VarDecl>(*I));
6665 } else {
6668 CGF.EmitAutoVarCleanups(Emission);
6669 }
6670 }
6671 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6672 }
6673 }
6674 }
6675 }
6676 // Check the value of num_threads clause iff if clause was not specified
6677 // or is not evaluated to false.
6678 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6679 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6680 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6681 const auto *NumThreadsClause =
6682 Dir->getSingleClause<OMPNumThreadsClause>();
6683 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6684 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6685 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6686 UpperBound =
6687 UpperBound
6688 ? Constant->getZExtValue()
6689 : std::min(UpperBound,
6690 static_cast<int32_t>(Constant->getZExtValue()));
6691 // If we haven't found a upper bound, remember we saw a thread limiting
6692 // clause.
6693 if (UpperBound == -1)
6694 UpperBound = 0;
6695 if (!E)
6696 return;
6697 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6698 if (const auto *PreInit =
6699 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6700 for (const auto *I : PreInit->decls()) {
6701 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6702 CGF.EmitVarDecl(cast<VarDecl>(*I));
6703 } else {
6706 CGF.EmitAutoVarCleanups(Emission);
6707 }
6708 }
6709 }
6710 *E = NTExpr;
6711 }
6712 return;
6713 }
6714 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6715 UpperBound = 1;
6716}
6717
6719 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6720 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6721 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6722 "Clauses associated with the teams directive expected to be emitted "
6723 "only for the host!");
6724 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6725 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6726 "Expected target-based executable directive.");
6727
6728 const Expr *NT = nullptr;
6729 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6730
6731 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6732 if (E->isIntegerConstantExpr(CGF.getContext())) {
6733 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6734 UpperBound = UpperBound ? Constant->getZExtValue()
6735 : std::min(UpperBound,
6736 int32_t(Constant->getZExtValue()));
6737 }
6738 // If we haven't found a upper bound, remember we saw a thread limiting
6739 // clause.
6740 if (UpperBound == -1)
6741 UpperBound = 0;
6742 if (EPtr)
6743 *EPtr = E;
6744 };
6745
6746 auto ReturnSequential = [&]() {
6747 UpperBound = 1;
6748 return NT;
6749 };
6750
6751 switch (DirectiveKind) {
6752 case OMPD_target: {
6753 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6754 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6756 CGF.getContext(), CS->getCapturedStmt());
6757 // TODO: The standard is not clear how to resolve two thread limit clauses,
6758 // let's pick the teams one if it's present, otherwise the target one.
6759 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6760 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6761 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6762 ThreadLimitClause = TLC;
6763 if (ThreadLimitExpr) {
6764 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6765 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767 CGF,
6768 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6769 if (const auto *PreInit =
6770 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6771 for (const auto *I : PreInit->decls()) {
6772 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6773 CGF.EmitVarDecl(cast<VarDecl>(*I));
6774 } else {
6777 CGF.EmitAutoVarCleanups(Emission);
6778 }
6779 }
6780 }
6781 }
6782 }
6783 }
6784 if (ThreadLimitClause)
6785 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6786 ThreadLimitExpr);
6787 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6788 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6789 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6790 CS = Dir->getInnermostCapturedStmt();
6792 CGF.getContext(), CS->getCapturedStmt());
6793 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6794 }
6795 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6796 CS = Dir->getInnermostCapturedStmt();
6797 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6798 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6799 return ReturnSequential();
6800 }
6801 return NT;
6802 }
6803 case OMPD_target_teams: {
6804 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6805 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6806 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6807 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6808 ThreadLimitExpr);
6809 }
6810 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6811 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6813 CGF.getContext(), CS->getCapturedStmt());
6814 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6815 if (Dir->getDirectiveKind() == OMPD_distribute) {
6816 CS = Dir->getInnermostCapturedStmt();
6817 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6818 }
6819 }
6820 return NT;
6821 }
6822 case OMPD_target_teams_distribute:
6823 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6824 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6825 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6826 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6827 ThreadLimitExpr);
6828 }
6829 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6830 UpperBoundOnly, CondVal);
6831 return NT;
6832 case OMPD_target_teams_loop:
6833 case OMPD_target_parallel_loop:
6834 case OMPD_target_parallel:
6835 case OMPD_target_parallel_for:
6836 case OMPD_target_parallel_for_simd:
6837 case OMPD_target_teams_distribute_parallel_for:
6838 case OMPD_target_teams_distribute_parallel_for_simd: {
6839 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6840 const OMPIfClause *IfClause = nullptr;
6841 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6842 if (C->getNameModifier() == OMPD_unknown ||
6843 C->getNameModifier() == OMPD_parallel) {
6844 IfClause = C;
6845 break;
6846 }
6847 }
6848 if (IfClause) {
6849 const Expr *Cond = IfClause->getCondition();
6850 bool Result;
6851 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6852 if (!Result)
6853 return ReturnSequential();
6854 } else {
6856 *CondVal = CGF.EvaluateExprAsBool(Cond);
6857 }
6858 }
6859 }
6860 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6861 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6862 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6863 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6864 ThreadLimitExpr);
6865 }
6866 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6867 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6868 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6869 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6870 return NumThreadsClause->getNumThreads();
6871 }
6872 return NT;
6873 }
6874 case OMPD_target_teams_distribute_simd:
6875 case OMPD_target_simd:
6876 return ReturnSequential();
6877 default:
6878 break;
6879 }
6880 llvm_unreachable("Unsupported directive kind.");
6881}
6882
6884 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6885 llvm::Value *NumThreadsVal = nullptr;
6886 llvm::Value *CondVal = nullptr;
6887 llvm::Value *ThreadLimitVal = nullptr;
6888 const Expr *ThreadLimitExpr = nullptr;
6889 int32_t UpperBound = -1;
6890
6892 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6893 &ThreadLimitExpr);
6894
6895 // Thread limit expressions are used below, emit them.
6896 if (ThreadLimitExpr) {
6897 ThreadLimitVal =
6898 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6899 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6900 /*isSigned=*/false);
6901 }
6902
6903 // Generate the num teams expression.
6904 if (UpperBound == 1) {
6905 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6906 } else if (NT) {
6907 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6908 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6909 /*isSigned=*/false);
6910 } else if (ThreadLimitVal) {
6911 // If we do not have a num threads value but a thread limit, replace the
6912 // former with the latter. We know handled the thread limit expression.
6913 NumThreadsVal = ThreadLimitVal;
6914 ThreadLimitVal = nullptr;
6915 } else {
6916 // Default to "0" which means runtime choice.
6917 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6918 NumThreadsVal = CGF.Builder.getInt32(0);
6919 }
6920
6921 // Handle if clause. If if clause present, the number of threads is
6922 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6923 if (CondVal) {
6925 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6926 CGF.Builder.getInt32(1));
6927 }
6928
6929 // If the thread limit and num teams expression were present, take the
6930 // minimum.
6931 if (ThreadLimitVal) {
6932 NumThreadsVal = CGF.Builder.CreateSelect(
6933 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6934 ThreadLimitVal, NumThreadsVal);
6935 }
6936
6937 return NumThreadsVal;
6938}
6939
6940namespace {
6942
6943// Utility to handle information from clauses associated with a given
6944// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6945// It provides a convenient interface to obtain the information and generate
6946// code for that information.
6947class MappableExprsHandler {
6948public:
6949 /// Custom comparator for attach-pointer expressions that compares them by
6950 /// complexity (i.e. their component-depth) first, then by the order in which
6951 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6952 /// different.
6953 struct AttachPtrExprComparator {
6954 const MappableExprsHandler &Handler;
6955 // Cache of previous equality comparison results.
6956 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6957 CachedEqualityComparisons;
6958
6959 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6960 AttachPtrExprComparator() = delete;
6961
6962 // Return true iff LHS is "less than" RHS.
6963 bool operator()(const Expr *LHS, const Expr *RHS) const {
6964 if (LHS == RHS)
6965 return false;
6966
6967 // First, compare by complexity (depth)
6968 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6969 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6970
6971 std::optional<size_t> DepthLHS =
6972 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6973 : std::nullopt;
6974 std::optional<size_t> DepthRHS =
6975 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6976 : std::nullopt;
6977
6978 // std::nullopt (no attach pointer) has lowest complexity
6979 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6980 // Both have same complexity, now check semantic equality
6981 if (areEqual(LHS, RHS))
6982 return false;
6983 // Different semantically, compare by computation order
6984 return wasComputedBefore(LHS, RHS);
6985 }
6986 if (!DepthLHS.has_value())
6987 return true; // LHS has lower complexity
6988 if (!DepthRHS.has_value())
6989 return false; // RHS has lower complexity
6990
6991 // Both have values, compare by depth (lower depth = lower complexity)
6992 if (DepthLHS.value() != DepthRHS.value())
6993 return DepthLHS.value() < DepthRHS.value();
6994
6995 // Same complexity, now check semantic equality
6996 if (areEqual(LHS, RHS))
6997 return false;
6998 // Different semantically, compare by computation order
6999 return wasComputedBefore(LHS, RHS);
7000 }
7001
7002 public:
7003 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7004 /// results, if available, otherwise does a recursive semantic comparison.
7005 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7006 // Check cache first for faster lookup
7007 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
7008 if (CachedResultIt != CachedEqualityComparisons.end())
7009 return CachedResultIt->second;
7010
7011 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7012
7013 // Cache the result for future lookups (both orders since semantic
7014 // equality is commutative)
7015 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7016 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7017 return ComparisonResult;
7018 }
7019
7020 /// Compare the two attach-ptr expressions by their computation order.
7021 /// Returns true iff LHS was computed before RHS by
7022 /// collectAttachPtrExprInfo().
7023 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7024 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
7025 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
7026
7027 return OrderLHS < OrderRHS;
7028 }
7029
7030 private:
7031 /// Helper function to compare attach-pointer expressions semantically.
7032 /// This function handles various expression types that can be part of an
7033 /// attach-pointer.
7034 /// TODO: Not urgent, but we should ideally return true when comparing
7035 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7036 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7037 if (LHS == RHS)
7038 return true;
7039
7040 // If only one is null, they aren't equal
7041 if (!LHS || !RHS)
7042 return false;
7043
7044 ASTContext &Ctx = Handler.CGF.getContext();
7045 // Strip away parentheses and no-op casts to get to the core expression
7046 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7047 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7048
7049 // Direct pointer comparison of the underlying expressions
7050 if (LHS == RHS)
7051 return true;
7052
7053 // Check if the expression classes match
7054 if (LHS->getStmtClass() != RHS->getStmtClass())
7055 return false;
7056
7057 // Handle DeclRefExpr (variable references)
7058 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
7059 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
7060 if (!RD)
7061 return false;
7062 return LD->getDecl()->getCanonicalDecl() ==
7063 RD->getDecl()->getCanonicalDecl();
7064 }
7065
7066 // Handle ArraySubscriptExpr (array indexing like a[i])
7067 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
7068 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
7069 if (!RA)
7070 return false;
7071 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
7072 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
7073 }
7074
7075 // Handle MemberExpr (member access like s.m or p->m)
7076 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
7077 const auto *RM = dyn_cast<MemberExpr>(RHS);
7078 if (!RM)
7079 return false;
7080 if (LM->getMemberDecl()->getCanonicalDecl() !=
7081 RM->getMemberDecl()->getCanonicalDecl())
7082 return false;
7083 return areSemanticallyEqual(LM->getBase(), RM->getBase());
7084 }
7085
7086 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7087 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
7088 const auto *RU = dyn_cast<UnaryOperator>(RHS);
7089 if (!RU)
7090 return false;
7091 if (LU->getOpcode() != RU->getOpcode())
7092 return false;
7093 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
7094 }
7095
7096 // Handle BinaryOperator (binary operations like p + offset)
7097 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
7098 const auto *RB = dyn_cast<BinaryOperator>(RHS);
7099 if (!RB)
7100 return false;
7101 if (LB->getOpcode() != RB->getOpcode())
7102 return false;
7103 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
7104 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
7105 }
7106
7107 // Handle ArraySectionExpr (array sections like a[0:1])
7108 // Attach pointers should not contain array-sections, but currently we
7109 // don't emit an error.
7110 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
7111 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
7112 if (!RAS)
7113 return false;
7114 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
7115 areSemanticallyEqual(LAS->getLowerBound(),
7116 RAS->getLowerBound()) &&
7117 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
7118 }
7119
7120 // Handle CastExpr (explicit casts)
7121 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
7122 const auto *RC = dyn_cast<CastExpr>(RHS);
7123 if (!RC)
7124 return false;
7125 if (LC->getCastKind() != RC->getCastKind())
7126 return false;
7127 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
7128 }
7129
7130 // Handle CXXThisExpr (this pointer)
7131 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
7132 return true;
7133
7134 // Handle IntegerLiteral (integer constants)
7135 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
7136 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
7137 if (!RI)
7138 return false;
7139 return LI->getValue() == RI->getValue();
7140 }
7141
7142 // Handle CharacterLiteral (character constants)
7143 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7144 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7145 if (!RC)
7146 return false;
7147 return LC->getValue() == RC->getValue();
7148 }
7149
7150 // Handle FloatingLiteral (floating point constants)
7151 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7152 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7153 if (!RF)
7154 return false;
7155 // Use bitwise comparison for floating point literals
7156 return LF->getValue().bitwiseIsEqual(RF->getValue());
7157 }
7158
7159 // Handle StringLiteral (string constants)
7160 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7161 const auto *RS = dyn_cast<StringLiteral>(RHS);
7162 if (!RS)
7163 return false;
7164 return LS->getString() == RS->getString();
7165 }
7166
7167 // Handle CXXNullPtrLiteralExpr (nullptr)
7169 return true;
7170
7171 // Handle CXXBoolLiteralExpr (true/false)
7172 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7173 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7174 if (!RB)
7175 return false;
7176 return LB->getValue() == RB->getValue();
7177 }
7178
7179 // Fallback for other forms - use the existing comparison method
7180 return Expr::isSameComparisonOperand(LHS, RHS);
7181 }
7182 };
7183
7184 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7185 static unsigned getFlagMemberOffset() {
7186 unsigned Offset = 0;
7187 for (uint64_t Remain =
7188 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7189 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7190 !(Remain & 1); Remain = Remain >> 1)
7191 Offset++;
7192 return Offset;
7193 }
7194
7195 /// Class that holds debugging information for a data mapping to be passed to
7196 /// the runtime library.
7197 class MappingExprInfo {
7198 /// The variable declaration used for the data mapping.
7199 const ValueDecl *MapDecl = nullptr;
7200 /// The original expression used in the map clause, or null if there is
7201 /// none.
7202 const Expr *MapExpr = nullptr;
7203
7204 public:
7205 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7206 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7207
7208 const ValueDecl *getMapDecl() const { return MapDecl; }
7209 const Expr *getMapExpr() const { return MapExpr; }
7210 };
7211
7212 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7213 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7214 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7215 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7216 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7217 using MapNonContiguousArrayTy =
7218 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7219 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7220 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7221 using MapData =
7223 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7224 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7225 using MapDataArrayTy = SmallVector<MapData, 4>;
7226
7227 /// This structure contains combined information generated for mappable
7228 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7229 /// mappers, and non-contiguous information.
7230 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7231 MapExprsArrayTy Exprs;
7232 MapValueDeclsArrayTy Mappers;
7233 MapValueDeclsArrayTy DevicePtrDecls;
7234
7235 /// Append arrays in \a CurInfo.
7236 void append(MapCombinedInfoTy &CurInfo) {
7237 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7238 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7239 CurInfo.DevicePtrDecls.end());
7240 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7241 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7242 }
7243 };
7244
7245 /// Map between a struct and the its lowest & highest elements which have been
7246 /// mapped.
7247 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7248 /// HE(FieldIndex, Pointer)}
7249 struct StructRangeInfoTy {
7250 MapCombinedInfoTy PreliminaryMapData;
7251 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7252 0, Address::invalid()};
7253 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7254 0, Address::invalid()};
7255 Address Base = Address::invalid();
7256 Address LB = Address::invalid();
7257 bool IsArraySection = false;
7258 bool HasCompleteRecord = false;
7259 };
7260
7261 /// A struct to store the attach pointer and pointee information, to be used
7262 /// when emitting an attach entry.
7263 struct AttachInfoTy {
7264 Address AttachPtrAddr = Address::invalid();
7265 Address AttachPteeAddr = Address::invalid();
7266 const ValueDecl *AttachPtrDecl = nullptr;
7267 const Expr *AttachMapExpr = nullptr;
7268
7269 bool isValid() const {
7270 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7271 }
7272 };
7273
7274 /// Check if there's any component list where the attach pointer expression
7275 /// matches the given captured variable.
7276 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7277 for (const auto &AttachEntry : AttachPtrExprMap) {
7278 if (AttachEntry.second) {
7279 // Check if the attach pointer expression is a DeclRefExpr that
7280 // references the captured variable
7281 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7282 if (DRE->getDecl() == VD)
7283 return true;
7284 }
7285 }
7286 return false;
7287 }
7288
7289 /// Get the previously-cached attach pointer for a component list, if-any.
7290 const Expr *getAttachPtrExpr(
7292 const {
7293 const auto It = AttachPtrExprMap.find(Components);
7294 if (It != AttachPtrExprMap.end())
7295 return It->second;
7296
7297 return nullptr;
7298 }
7299
7300private:
7301 /// Kind that defines how a device pointer has to be returned.
7302 struct MapInfo {
7305 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7306 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7307 bool ReturnDevicePointer = false;
7308 bool IsImplicit = false;
7309 const ValueDecl *Mapper = nullptr;
7310 const Expr *VarRef = nullptr;
7311 bool ForDeviceAddr = false;
7312 bool HasUdpFbNullify = false;
7313
7314 MapInfo() = default;
7315 MapInfo(
7317 OpenMPMapClauseKind MapType,
7318 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7319 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7320 bool ReturnDevicePointer, bool IsImplicit,
7321 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7322 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7323 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7324 MotionModifiers(MotionModifiers),
7325 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7326 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7327 HasUdpFbNullify(HasUdpFbNullify) {}
7328 };
7329
7330 /// The target directive from where the mappable clauses were extracted. It
7331 /// is either a executable directive or a user-defined mapper directive.
7332 llvm::PointerUnion<const OMPExecutableDirective *,
7333 const OMPDeclareMapperDecl *>
7334 CurDir;
7335
7336 /// Function the directive is being generated for.
7337 CodeGenFunction &CGF;
7338
7339 /// Set of all first private variables in the current directive.
7340 /// bool data is set to true if the variable is implicitly marked as
7341 /// firstprivate, false otherwise.
7342 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7343
7344 /// Set of defaultmap clause kinds that use firstprivate behavior.
7345 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7346
7347 /// Map between device pointer declarations and their expression components.
7348 /// The key value for declarations in 'this' is null.
7349 llvm::DenseMap<
7350 const ValueDecl *,
7351 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7352 DevPointersMap;
7353
7354 /// Map between device addr declarations and their expression components.
7355 /// The key value for declarations in 'this' is null.
7356 llvm::DenseMap<
7357 const ValueDecl *,
7358 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7359 HasDevAddrsMap;
7360
7361 /// Map between lambda declarations and their map type.
7362 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7363
7364 /// Map from component lists to their attach pointer expressions.
7366 const Expr *>
7367 AttachPtrExprMap;
7368
7369 /// Map from attach pointer expressions to their component depth.
7370 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7371 /// expressions with increasing/decreasing depth.
7372 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7373 /// TODO: Not urgent, but we should ideally use the number of pointer
7374 /// dereferences in an expr as an indicator of its complexity, instead of the
7375 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7376 /// `*(p + 5 + 5)` together.
7377 llvm::DenseMap<const Expr *, std::optional<size_t>>
7378 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7379
7380 /// Map from attach pointer expressions to the order they were computed in, in
7381 /// collectAttachPtrExprInfo().
7382 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7383 {nullptr, 0}};
7384
7385 /// An instance of attach-ptr-expr comparator that can be used throughout the
7386 /// lifetime of this handler.
7387 AttachPtrExprComparator AttachPtrComparator;
7388
7389 llvm::Value *getExprTypeSize(const Expr *E) const {
7390 QualType ExprTy = E->getType().getCanonicalType();
7391
7392 // Calculate the size for array shaping expression.
7393 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7394 llvm::Value *Size =
7395 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7396 for (const Expr *SE : OAE->getDimensions()) {
7397 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7398 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7399 CGF.getContext().getSizeType(),
7400 SE->getExprLoc());
7401 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7402 }
7403 return Size;
7404 }
7405
7406 // Reference types are ignored for mapping purposes.
7407 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7408 ExprTy = RefTy->getPointeeType().getCanonicalType();
7409
7410 // Given that an array section is considered a built-in type, we need to
7411 // do the calculation based on the length of the section instead of relying
7412 // on CGF.getTypeSize(E->getType()).
7413 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7414 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7415 OAE->getBase()->IgnoreParenImpCasts())
7417
7418 // If there is no length associated with the expression and lower bound is
7419 // not specified too, that means we are using the whole length of the
7420 // base.
7421 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7422 !OAE->getLowerBound())
7423 return CGF.getTypeSize(BaseTy);
7424
7425 llvm::Value *ElemSize;
7426 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7427 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7428 } else {
7429 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7430 assert(ATy && "Expecting array type if not a pointer type.");
7431 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7432 }
7433
7434 // If we don't have a length at this point, that is because we have an
7435 // array section with a single element.
7436 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7437 return ElemSize;
7438
7439 if (const Expr *LenExpr = OAE->getLength()) {
7440 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7441 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7442 CGF.getContext().getSizeType(),
7443 LenExpr->getExprLoc());
7444 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7445 }
7446 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7447 OAE->getLowerBound() && "expected array_section[lb:].");
7448 // Size = sizetype - lb * elemtype;
7449 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7450 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7451 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7452 CGF.getContext().getSizeType(),
7453 OAE->getLowerBound()->getExprLoc());
7454 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7455 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7456 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7457 LengthVal = CGF.Builder.CreateSelect(
7458 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7459 return LengthVal;
7460 }
7461 return CGF.getTypeSize(ExprTy);
7462 }
7463
7464 /// Return the corresponding bits for a given map clause modifier. Add
7465 /// a flag marking the map as a pointer if requested. Add a flag marking the
7466 /// map as the first one of a series of maps that relate to the same map
7467 /// expression.
7468 OpenMPOffloadMappingFlags getMapTypeBits(
7469 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7470 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7471 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7472 OpenMPOffloadMappingFlags Bits =
7473 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7474 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7475 switch (MapType) {
7476 case OMPC_MAP_alloc:
7477 case OMPC_MAP_release:
7478 // alloc and release is the default behavior in the runtime library, i.e.
7479 // if we don't pass any bits alloc/release that is what the runtime is
7480 // going to do. Therefore, we don't need to signal anything for these two
7481 // type modifiers.
7482 break;
7483 case OMPC_MAP_to:
7484 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7485 break;
7486 case OMPC_MAP_from:
7487 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7488 break;
7489 case OMPC_MAP_tofrom:
7490 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7491 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7492 break;
7493 case OMPC_MAP_delete:
7494 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7495 break;
7496 case OMPC_MAP_unknown:
7497 llvm_unreachable("Unexpected map type!");
7498 }
7499 if (AddPtrFlag)
7500 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7501 if (AddIsTargetParamFlag)
7502 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7503 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7504 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7505 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7506 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7507 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7508 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7509 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7510 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7512 if (IsNonContiguous)
7513 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7514 return Bits;
7515 }
7516
7517 /// Return true if the provided expression is a final array section. A
7518 /// final array section, is one whose length can't be proved to be one.
7519 bool isFinalArraySectionExpression(const Expr *E) const {
7520 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7521
7522 // It is not an array section and therefore not a unity-size one.
7523 if (!OASE)
7524 return false;
7525
7526 // An array section with no colon always refer to a single element.
7527 if (OASE->getColonLocFirst().isInvalid())
7528 return false;
7529
7530 const Expr *Length = OASE->getLength();
7531
7532 // If we don't have a length we have to check if the array has size 1
7533 // for this dimension. Also, we should always expect a length if the
7534 // base type is pointer.
7535 if (!Length) {
7536 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7537 OASE->getBase()->IgnoreParenImpCasts())
7539 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7540 return ATy->getSExtSize() != 1;
7541 // If we don't have a constant dimension length, we have to consider
7542 // the current section as having any size, so it is not necessarily
7543 // unitary. If it happen to be unity size, that's user fault.
7544 return true;
7545 }
7546
7547 // Check if the length evaluates to 1.
7548 Expr::EvalResult Result;
7549 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7550 return true; // Can have more that size 1.
7551
7552 llvm::APSInt ConstLength = Result.Val.getInt();
7553 return ConstLength.getSExtValue() != 1;
7554 }
7555
7556 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7557 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7558 /// an attach entry has the following form:
7559 /// &p, &p[1], sizeof(void*), ATTACH
7560 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7561 const AttachInfoTy &AttachInfo) const {
7562 assert(AttachInfo.isValid() &&
7563 "Expected valid attach pointer/pointee information!");
7564
7565 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7566 // size
7567 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7568 llvm::ConstantInt::get(
7569 CGF.CGM.SizeTy, CGF.getContext()
7571 .getQuantity()),
7572 CGF.Int64Ty, /*isSigned=*/true);
7573
7574 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7575 AttachInfo.AttachMapExpr);
7576 CombinedInfo.BasePointers.push_back(
7577 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7578 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7579 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7580 CombinedInfo.Pointers.push_back(
7581 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7582 CombinedInfo.Sizes.push_back(PointerSize);
7583 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7584 CombinedInfo.Mappers.push_back(nullptr);
7585 CombinedInfo.NonContigInfo.Dims.push_back(1);
7586 }
7587
7588 /// A helper class to copy structures with overlapped elements, i.e. those
7589 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7590 /// are not explicitly copied have mapping nodes synthesized for them,
7591 /// taking care to avoid generating zero-sized copies.
7592 class CopyOverlappedEntryGaps {
7593 CodeGenFunction &CGF;
7594 MapCombinedInfoTy &CombinedInfo;
7595 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7596 const ValueDecl *MapDecl = nullptr;
7597 const Expr *MapExpr = nullptr;
7598 Address BP = Address::invalid();
7599 bool IsNonContiguous = false;
7600 uint64_t DimSize = 0;
7601 // These elements track the position as the struct is iterated over
7602 // (in order of increasing element address).
7603 const RecordDecl *LastParent = nullptr;
7604 uint64_t Cursor = 0;
7605 unsigned LastIndex = -1u;
7606 Address LB = Address::invalid();
7607
7608 public:
7609 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7610 MapCombinedInfoTy &CombinedInfo,
7611 OpenMPOffloadMappingFlags Flags,
7612 const ValueDecl *MapDecl, const Expr *MapExpr,
7613 Address BP, Address LB, bool IsNonContiguous,
7614 uint64_t DimSize)
7615 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7616 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7617 DimSize(DimSize), LB(LB) {}
7618
7619 void processField(
7620 const OMPClauseMappableExprCommon::MappableComponent &MC,
7621 const FieldDecl *FD,
7622 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7623 EmitMemberExprBase) {
7624 const RecordDecl *RD = FD->getParent();
7625 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7626 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7627 uint64_t FieldSize =
7629 Address ComponentLB = Address::invalid();
7630
7631 if (FD->getType()->isLValueReferenceType()) {
7632 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7633 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7634 ComponentLB =
7635 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7636 } else {
7637 ComponentLB =
7639 }
7640
7641 if (!LastParent)
7642 LastParent = RD;
7643 if (FD->getParent() == LastParent) {
7644 if (FD->getFieldIndex() != LastIndex + 1)
7645 copyUntilField(FD, ComponentLB);
7646 } else {
7647 LastParent = FD->getParent();
7648 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7649 copyUntilField(FD, ComponentLB);
7650 }
7651 Cursor = FieldOffset + FieldSize;
7652 LastIndex = FD->getFieldIndex();
7653 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7654 }
7655
7656 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7657 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7658 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7659 llvm::Value *Size =
7660 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7661 copySizedChunk(LBPtr, Size);
7662 }
7663
7664 void copyUntilEnd(Address HB) {
7665 if (LastParent) {
7666 const ASTRecordLayout &RL =
7667 CGF.getContext().getASTRecordLayout(LastParent);
7668 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7669 return;
7670 }
7671 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7672 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7673 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7674 LBPtr);
7675 copySizedChunk(LBPtr, Size);
7676 }
7677
7678 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7679 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7680 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7681 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7682 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7683 CombinedInfo.Pointers.push_back(Base);
7684 CombinedInfo.Sizes.push_back(
7685 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7686 CombinedInfo.Types.push_back(Flags);
7687 CombinedInfo.Mappers.push_back(nullptr);
7688 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7689 }
7690 };
7691
7692 /// Generate the base pointers, section pointers, sizes, map type bits, and
7693 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7694 /// map type, map or motion modifiers, and expression components.
7695 /// \a IsFirstComponent should be set to true if the provided set of
7696 /// components is the first associated with a capture.
7697 void generateInfoForComponentList(
7698 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7699 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7701 MapCombinedInfoTy &CombinedInfo,
7702 MapCombinedInfoTy &StructBaseCombinedInfo,
7703 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7704 bool IsFirstComponentList, bool IsImplicit,
7705 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7706 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7707 const Expr *MapExpr = nullptr,
7708 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7709 OverlappedElements = {}) const {
7710
7711 // The following summarizes what has to be generated for each map and the
7712 // types below. The generated information is expressed in this order:
7713 // base pointer, section pointer, size, flags
7714 // (to add to the ones that come from the map type and modifier).
7715 // Entries annotated with (+) are only generated for "target" constructs,
7716 // and only if the variable at the beginning of the expression is used in
7717 // the region.
7718 //
7719 // double d;
7720 // int i[100];
7721 // float *p;
7722 // int **a = &i;
7723 //
7724 // struct S1 {
7725 // int i;
7726 // float f[50];
7727 // }
7728 // struct S2 {
7729 // int i;
7730 // float f[50];
7731 // S1 s;
7732 // double *p;
7733 // double *&pref;
7734 // struct S2 *ps;
7735 // int &ref;
7736 // }
7737 // S2 s;
7738 // S2 *ps;
7739 //
7740 // map(d)
7741 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7742 //
7743 // map(i)
7744 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7745 //
7746 // map(i[1:23])
7747 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7748 //
7749 // map(p)
7750 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7751 //
7752 // map(p[1:24])
7753 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7754 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7755 // // are present, and either is new
7756 //
7757 // map(([22])p)
7758 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7759 // &p, p, sizeof(void*), ATTACH
7760 //
7761 // map((*a)[0:3])
7762 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7763 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7764 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7765 // (+) Only on target, if a is used in the region
7766 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7767 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7768 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7769 // referenced in the target region, because it is a pointer.
7770 //
7771 // map(**a)
7772 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7773 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7774 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7775 // (+) Only on target, if a is used in the region
7776 //
7777 // map(s)
7778 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7779 // effect is supposed to be same as if the user had a map for every element
7780 // of the struct. We currently do a shallow-map of s.
7781 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7782 //
7783 // map(s.i)
7784 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7785 //
7786 // map(s.s.f)
7787 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7788 //
7789 // map(s.p)
7790 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7791 //
7792 // map(to: s.p[:22])
7793 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7794 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7795 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7796 //
7797 // map(to: s.ref)
7798 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7799 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7800 // (*) alloc space for struct members, only this is a target parameter.
7801 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7802 // optimizes this entry out, same in the examples below)
7803 // (***) map the pointee (map: to)
7804 // Note: ptr(s.ref) represents the referring pointer of s.ref
7805 // ptee(s.ref) represents the referenced pointee of s.ref
7806 //
7807 // map(to: s.pref)
7808 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7809 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7810 //
7811 // map(to: s.pref[:22])
7812 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7813 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7814 // FROM | IMPLICIT // (+)
7815 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7816 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7817 //
7818 // map(s.ps)
7819 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7820 //
7821 // map(from: s.ps->s.i)
7822 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7823 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7824 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7825 //
7826 // map(to: s.ps->ps)
7827 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7828 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7829 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7830 //
7831 // map(s.ps->ps->ps)
7832 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7833 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7834 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7835 //
7836 // map(to: s.ps->ps->s.f[:22])
7837 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7838 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7839 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7840 //
7841 // map(ps)
7842 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7843 //
7844 // map(ps->i)
7845 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7846 // &ps, &(ps->i), sizeof(void*), ATTACH
7847 //
7848 // map(ps->s.f)
7849 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7850 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7851 //
7852 // map(from: ps->p)
7853 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7854 // &ps, &(ps->p), sizeof(ps), ATTACH
7855 //
7856 // map(to: ps->p[:22])
7857 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7858 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7859 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7860 //
7861 // map(ps->ps)
7862 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7863 // &ps, &(ps->ps), sizeof(ps), ATTACH
7864 //
7865 // map(from: ps->ps->s.i)
7866 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7867 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7868 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7869 //
7870 // map(from: ps->ps->ps)
7871 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7872 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7873 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7874 //
7875 // map(ps->ps->ps->ps)
7876 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7877 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7878 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7879 //
7880 // map(to: ps->ps->ps->s.f[:22])
7881 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7882 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7883 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7884 //
7885 // map(to: s.f[:22]) map(from: s.p[:33])
7886 // On target, and if s is used in the region:
7887 //
7888 // &s, &(s.f[0]), 50*sizeof(float) +
7889 // sizeof(struct S1) +
7890 // sizeof(double*) (**), TARGET_PARAM
7891 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7892 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7893 // FROM | IMPLICIT
7894 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7895 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7896 // (**) allocate contiguous space needed to fit all mapped members even if
7897 // we allocate space for members not mapped (in this example,
7898 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7899 // them as well because they fall between &s.f[0] and &s.p)
7900 //
7901 // On other constructs, and, if s is not used in the region, on target:
7902 // &s, &(s.f[0]), 22*sizeof(float), TO
7903 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7904 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7905 //
7906 // map(from: s.f[:22]) map(to: ps->p[:33])
7907 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7908 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7909 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7910 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7911 //
7912 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7913 // &s, &(s.f[0]), 50*sizeof(float) +
7914 // sizeof(struct S1), TARGET_PARAM
7915 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7916 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7917 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7918 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7919 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7920 //
7921 // map(p[:100], p)
7922 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7923 // p, &p[0], 100*sizeof(float), TO | FROM
7924 // &p, &p[0], sizeof(float*), ATTACH
7925
7926 // Track if the map information being generated is the first for a capture.
7927 bool IsCaptureFirstInfo = IsFirstComponentList;
7928 // When the variable is on a declare target link or in a to clause with
7929 // unified memory, a reference is needed to hold the host/device address
7930 // of the variable.
7931 bool RequiresReference = false;
7932
7933 // Scan the components from the base to the complete expression.
7934 auto CI = Components.rbegin();
7935 auto CE = Components.rend();
7936 auto I = CI;
7937
7938 // Track if the map information being generated is the first for a list of
7939 // components.
7940 bool IsExpressionFirstInfo = true;
7941 bool FirstPointerInComplexData = false;
7942 Address BP = Address::invalid();
7943 Address FinalLowestElem = Address::invalid();
7944 const Expr *AssocExpr = I->getAssociatedExpression();
7945 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7946 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7947 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7948
7949 // Get the pointer-attachment base-pointer for the given list, if any.
7950 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7951 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7952 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7953
7954 bool HasAttachPtr = AttachPtrExpr != nullptr;
7955 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7956 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7957
7958 if (FirstComponentIsForAttachPtr) {
7959 // No need to process AttachPtr here. It will be processed at the end
7960 // after we have computed the pointee's address.
7961 ++I;
7962 } else if (isa<MemberExpr>(AssocExpr)) {
7963 // The base is the 'this' pointer. The content of the pointer is going
7964 // to be the base of the field being mapped.
7965 BP = CGF.LoadCXXThisAddress();
7966 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7967 (OASE &&
7968 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7969 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7970 } else if (OAShE &&
7971 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7972 BP = Address(
7973 CGF.EmitScalarExpr(OAShE->getBase()),
7974 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7975 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7976 } else {
7977 // The base is the reference to the variable.
7978 // BP = &Var.
7979 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7980 if (const auto *VD =
7981 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7982 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7983 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7984 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7985 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7986 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7988 RequiresReference = true;
7990 }
7991 }
7992 }
7993
7994 // If the variable is a pointer and is being dereferenced (i.e. is not
7995 // the last component), the base has to be the pointer itself, not its
7996 // reference. References are ignored for mapping purposes.
7997 QualType Ty =
7998 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7999 if (Ty->isAnyPointerType() && std::next(I) != CE) {
8000 // No need to generate individual map information for the pointer, it
8001 // can be associated with the combined storage if shared memory mode is
8002 // active or the base declaration is not global variable.
8003 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
8005 !VD || VD->hasLocalStorage() || HasAttachPtr)
8006 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8007 else
8008 FirstPointerInComplexData = true;
8009 ++I;
8010 }
8011 }
8012
8013 // Track whether a component of the list should be marked as MEMBER_OF some
8014 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8015 // in a component list should be marked as MEMBER_OF, all subsequent entries
8016 // do not belong to the base struct. E.g.
8017 // struct S2 s;
8018 // s.ps->ps->ps->f[:]
8019 // (1) (2) (3) (4)
8020 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8021 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8022 // is the pointee of ps(2) which is not member of struct s, so it should not
8023 // be marked as such (it is still PTR_AND_OBJ).
8024 // The variable is initialized to false so that PTR_AND_OBJ entries which
8025 // are not struct members are not considered (e.g. array of pointers to
8026 // data).
8027 bool ShouldBeMemberOf = false;
8028
8029 // Variable keeping track of whether or not we have encountered a component
8030 // in the component list which is a member expression. Useful when we have a
8031 // pointer or a final array section, in which case it is the previous
8032 // component in the list which tells us whether we have a member expression.
8033 // E.g. X.f[:]
8034 // While processing the final array section "[:]" it is "f" which tells us
8035 // whether we are dealing with a member of a declared struct.
8036 const MemberExpr *EncounteredME = nullptr;
8037
8038 // Track for the total number of dimension. Start from one for the dummy
8039 // dimension.
8040 uint64_t DimSize = 1;
8041
8042 // Detects non-contiguous updates due to strided accesses.
8043 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8044 // correctly when generating information to be passed to the runtime. The
8045 // flag is set to true if any array section has a stride not equal to 1, or
8046 // if the stride is not a constant expression (conservatively assumed
8047 // non-contiguous).
8048 bool IsNonContiguous =
8049 CombinedInfo.NonContigInfo.IsNonContiguous ||
8050 any_of(Components, [&](const auto &Component) {
8051 const auto *OASE =
8052 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8053 if (!OASE)
8054 return false;
8055
8056 const Expr *StrideExpr = OASE->getStride();
8057 if (!StrideExpr)
8058 return false;
8059
8060 const auto Constant =
8061 StrideExpr->getIntegerConstantExpr(CGF.getContext());
8062 if (!Constant)
8063 return false;
8064
8065 return !Constant->isOne();
8066 });
8067
8068 bool IsPrevMemberReference = false;
8069
8070 bool IsPartialMapped =
8071 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8072
8073 // We need to check if we will be encountering any MEs. If we do not
8074 // encounter any ME expression it means we will be mapping the whole struct.
8075 // In that case we need to skip adding an entry for the struct to the
8076 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8077 // list only when generating all info for clauses.
8078 bool IsMappingWholeStruct = true;
8079 if (!GenerateAllInfoForClauses) {
8080 IsMappingWholeStruct = false;
8081 } else {
8082 for (auto TempI = I; TempI != CE; ++TempI) {
8083 const MemberExpr *PossibleME =
8084 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
8085 if (PossibleME) {
8086 IsMappingWholeStruct = false;
8087 break;
8088 }
8089 }
8090 }
8091
8092 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8093 for (; I != CE; ++I) {
8094 // If we have a valid attach-ptr, we skip processing all components until
8095 // after the attach-ptr.
8096 if (HasAttachPtr && !SeenAttachPtr) {
8097 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8098 continue;
8099 }
8100
8101 // After finding the attach pointer, skip binary-ops, to skip past
8102 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8103 // the attach-ptr.
8104 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8105 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8106 if (BO)
8107 continue;
8108
8109 // Found the first non-binary-operator component after attach
8110 SeenFirstNonBinOpExprAfterAttachPtr = true;
8111 BP = AttachPteeBaseAddr;
8112 }
8113
8114 // If the current component is member of a struct (parent struct) mark it.
8115 if (!EncounteredME) {
8116 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8117 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8118 // as MEMBER_OF the parent struct.
8119 if (EncounteredME) {
8120 ShouldBeMemberOf = true;
8121 // Do not emit as complex pointer if this is actually not array-like
8122 // expression.
8123 if (FirstPointerInComplexData) {
8124 QualType Ty = std::prev(I)
8125 ->getAssociatedDeclaration()
8126 ->getType()
8127 .getNonReferenceType();
8128 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8129 FirstPointerInComplexData = false;
8130 }
8131 }
8132 }
8133
8134 auto Next = std::next(I);
8135
8136 // We need to generate the addresses and sizes if this is the last
8137 // component, if the component is a pointer or if it is an array section
8138 // whose length can't be proved to be one. If this is a pointer, it
8139 // becomes the base address for the following components.
8140
8141 // A final array section, is one whose length can't be proved to be one.
8142 // If the map item is non-contiguous then we don't treat any array section
8143 // as final array section.
8144 bool IsFinalArraySection =
8145 !IsNonContiguous &&
8146 isFinalArraySectionExpression(I->getAssociatedExpression());
8147
8148 // If we have a declaration for the mapping use that, otherwise use
8149 // the base declaration of the map clause.
8150 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8151 ? I->getAssociatedDeclaration()
8152 : BaseDecl;
8153 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8154 : MapExpr;
8155
8156 // Get information on whether the element is a pointer. Have to do a
8157 // special treatment for array sections given that they are built-in
8158 // types.
8159 const auto *OASE =
8160 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8161 const auto *OAShE =
8162 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8163 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8164 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8165 bool IsPointer =
8166 OAShE ||
8169 ->isAnyPointerType()) ||
8170 I->getAssociatedExpression()->getType()->isAnyPointerType();
8171 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8172 MapDecl &&
8173 MapDecl->getType()->isLValueReferenceType();
8174 bool IsNonDerefPointer = IsPointer &&
8175 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8176 !IsNonContiguous;
8177
8178 if (OASE)
8179 ++DimSize;
8180
8181 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8182 IsFinalArraySection) {
8183 // If this is not the last component, we expect the pointer to be
8184 // associated with an array expression or member expression.
8185 assert((Next == CE ||
8186 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8187 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8188 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8189 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8190 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8191 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8192 "Unexpected expression");
8193
8194 Address LB = Address::invalid();
8195 Address LowestElem = Address::invalid();
8196 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8197 const MemberExpr *E) {
8198 const Expr *BaseExpr = E->getBase();
8199 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8200 // scalar.
8201 LValue BaseLV;
8202 if (E->isArrow()) {
8203 LValueBaseInfo BaseInfo;
8204 TBAAAccessInfo TBAAInfo;
8205 Address Addr =
8206 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8207 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8208 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8209 } else {
8210 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8211 }
8212 return BaseLV;
8213 };
8214 if (OAShE) {
8215 LowestElem = LB =
8216 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8218 OAShE->getBase()->getType()->getPointeeType()),
8220 OAShE->getBase()->getType()));
8221 } else if (IsMemberReference) {
8222 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8223 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8224 LowestElem = CGF.EmitLValueForFieldInitialization(
8225 BaseLVal, cast<FieldDecl>(MapDecl))
8226 .getAddress();
8227 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8228 .getAddress();
8229 } else {
8230 LowestElem = LB =
8231 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8232 .getAddress();
8233 }
8234
8235 // Save the final LowestElem, to use it as the pointee in attach maps,
8236 // if emitted.
8237 if (Next == CE)
8238 FinalLowestElem = LowestElem;
8239
8240 // If this component is a pointer inside the base struct then we don't
8241 // need to create any entry for it - it will be combined with the object
8242 // it is pointing to into a single PTR_AND_OBJ entry.
8243 bool IsMemberPointerOrAddr =
8244 EncounteredME &&
8245 (((IsPointer || ForDeviceAddr) &&
8246 I->getAssociatedExpression() == EncounteredME) ||
8247 (IsPrevMemberReference && !IsPointer) ||
8248 (IsMemberReference && Next != CE &&
8249 !Next->getAssociatedExpression()->getType()->isPointerType()));
8250 if (!OverlappedElements.empty() && Next == CE) {
8251 // Handle base element with the info for overlapped elements.
8252 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8253 assert(!IsPointer &&
8254 "Unexpected base element with the pointer type.");
8255 // Mark the whole struct as the struct that requires allocation on the
8256 // device.
8257 PartialStruct.LowestElem = {0, LowestElem};
8258 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8259 I->getAssociatedExpression()->getType());
8260 Address HB = CGF.Builder.CreateConstGEP(
8262 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8263 TypeSize.getQuantity() - 1);
8264 PartialStruct.HighestElem = {
8265 std::numeric_limits<decltype(
8266 PartialStruct.HighestElem.first)>::max(),
8267 HB};
8268 PartialStruct.Base = BP;
8269 PartialStruct.LB = LB;
8270 assert(
8271 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8272 "Overlapped elements must be used only once for the variable.");
8273 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8274 // Emit data for non-overlapped data.
8275 OpenMPOffloadMappingFlags Flags =
8276 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8277 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8278 /*AddPtrFlag=*/false,
8279 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8280 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8281 MapExpr, BP, LB, IsNonContiguous,
8282 DimSize);
8283 // Do bitcopy of all non-overlapped structure elements.
8285 Component : OverlappedElements) {
8286 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8287 Component) {
8288 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8289 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8290 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8291 }
8292 }
8293 }
8294 }
8295 CopyGaps.copyUntilEnd(HB);
8296 break;
8297 }
8298 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8299 // Skip adding an entry in the CurInfo of this combined entry if the
8300 // whole struct is currently being mapped. The struct needs to be added
8301 // in the first position before any data internal to the struct is being
8302 // mapped.
8303 // Skip adding an entry in the CurInfo of this combined entry if the
8304 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8305 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8306 (Next == CE && MapType != OMPC_MAP_unknown)) {
8307 if (!IsMappingWholeStruct) {
8308 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8309 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8310 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8311 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8312 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8313 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8314 Size, CGF.Int64Ty, /*isSigned=*/true));
8315 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8316 : 1);
8317 } else {
8318 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8319 StructBaseCombinedInfo.BasePointers.push_back(
8320 BP.emitRawPointer(CGF));
8321 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8322 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8323 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8324 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8325 Size, CGF.Int64Ty, /*isSigned=*/true));
8326 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8327 IsNonContiguous ? DimSize : 1);
8328 }
8329
8330 // If Mapper is valid, the last component inherits the mapper.
8331 bool HasMapper = Mapper && Next == CE;
8332 if (!IsMappingWholeStruct)
8333 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8334 else
8335 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8336 : nullptr);
8337
8338 // We need to add a pointer flag for each map that comes from the
8339 // same expression except for the first one. We also need to signal
8340 // this map is the first one that relates with the current capture
8341 // (there is a set of entries for each capture).
8342 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8343 MapType, MapModifiers, MotionModifiers, IsImplicit,
8344 !IsExpressionFirstInfo || RequiresReference ||
8345 FirstPointerInComplexData || IsMemberReference,
8346 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8347
8348 if (!IsExpressionFirstInfo || IsMemberReference) {
8349 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8350 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8351 if (IsPointer || (IsMemberReference && Next != CE))
8352 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8353 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8354 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8355 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8356 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8357
8358 if (ShouldBeMemberOf) {
8359 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8360 // should be later updated with the correct value of MEMBER_OF.
8361 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8362 // From now on, all subsequent PTR_AND_OBJ entries should not be
8363 // marked as MEMBER_OF.
8364 ShouldBeMemberOf = false;
8365 }
8366 }
8367
8368 if (!IsMappingWholeStruct)
8369 CombinedInfo.Types.push_back(Flags);
8370 else
8371 StructBaseCombinedInfo.Types.push_back(Flags);
8372 }
8373
8374 // If we have encountered a member expression so far, keep track of the
8375 // mapped member. If the parent is "*this", then the value declaration
8376 // is nullptr.
8377 if (EncounteredME) {
8378 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8379 unsigned FieldIndex = FD->getFieldIndex();
8380
8381 // Update info about the lowest and highest elements for this struct
8382 if (!PartialStruct.Base.isValid()) {
8383 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8384 if (IsFinalArraySection && OASE) {
8385 Address HB =
8386 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8387 .getAddress();
8388 PartialStruct.HighestElem = {FieldIndex, HB};
8389 } else {
8390 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8391 }
8392 PartialStruct.Base = BP;
8393 PartialStruct.LB = BP;
8394 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8395 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8396 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8397 if (IsFinalArraySection && OASE) {
8398 Address HB =
8399 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8400 .getAddress();
8401 PartialStruct.HighestElem = {FieldIndex, HB};
8402 } else {
8403 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8404 }
8405 }
8406 }
8407
8408 // Need to emit combined struct for array sections.
8409 if (IsFinalArraySection || IsNonContiguous)
8410 PartialStruct.IsArraySection = true;
8411
8412 // If we have a final array section, we are done with this expression.
8413 if (IsFinalArraySection)
8414 break;
8415
8416 // The pointer becomes the base for the next element.
8417 if (Next != CE)
8418 BP = IsMemberReference ? LowestElem : LB;
8419 if (!IsPartialMapped)
8420 IsExpressionFirstInfo = false;
8421 IsCaptureFirstInfo = false;
8422 FirstPointerInComplexData = false;
8423 IsPrevMemberReference = IsMemberReference;
8424 } else if (FirstPointerInComplexData) {
8425 QualType Ty = Components.rbegin()
8426 ->getAssociatedDeclaration()
8427 ->getType()
8428 .getNonReferenceType();
8429 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8430 FirstPointerInComplexData = false;
8431 }
8432 }
8433 // If ran into the whole component - allocate the space for the whole
8434 // record.
8435 if (!EncounteredME)
8436 PartialStruct.HasCompleteRecord = true;
8437
8438 // Populate ATTACH information for later processing by emitAttachEntry.
8439 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8440 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8441 AttachInfo.AttachPteeAddr = FinalLowestElem;
8442 AttachInfo.AttachPtrDecl = BaseDecl;
8443 AttachInfo.AttachMapExpr = MapExpr;
8444 }
8445
8446 if (!IsNonContiguous)
8447 return;
8448
8449 const ASTContext &Context = CGF.getContext();
8450
8451 // For supporting stride in array section, we need to initialize the first
8452 // dimension size as 1, first offset as 0, and first count as 1
8453 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8454 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8455 MapValuesArrayTy CurStrides;
8456 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8457 uint64_t ElementTypeSize;
8458
8459 // Collect Size information for each dimension and get the element size as
8460 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8461 // should be [10, 10] and the first stride is 4 btyes.
8462 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8463 Components) {
8464 const Expr *AssocExpr = Component.getAssociatedExpression();
8465 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8466
8467 if (!OASE)
8468 continue;
8469
8470 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8471 auto *CAT = Context.getAsConstantArrayType(Ty);
8472 auto *VAT = Context.getAsVariableArrayType(Ty);
8473
8474 // We need all the dimension size except for the last dimension.
8475 assert((VAT || CAT || &Component == &*Components.begin()) &&
8476 "Should be either ConstantArray or VariableArray if not the "
8477 "first Component");
8478
8479 // Get element size if CurStrides is empty.
8480 if (CurStrides.empty()) {
8481 const Type *ElementType = nullptr;
8482 if (CAT)
8483 ElementType = CAT->getElementType().getTypePtr();
8484 else if (VAT)
8485 ElementType = VAT->getElementType().getTypePtr();
8486 else if (&Component == &*Components.begin()) {
8487 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8488 // there was no earlier CAT/VAT/array handling to establish
8489 // ElementType. Capture the pointee type now so that subsequent
8490 // components (offset/length/stride) have a concrete element type to
8491 // work with. This makes pointer-backed sections behave consistently
8492 // with CAT/VAT/array bases.
8493 if (const auto *PtrType = Ty->getAs<PointerType>())
8494 ElementType = PtrType->getPointeeType().getTypePtr();
8495 } else {
8496 // Any component after the first should never have a raw pointer type;
8497 // by this point. ElementType must already be known (set above or in
8498 // prior array / CAT / VAT handling).
8499 assert(!Ty->isPointerType() &&
8500 "Non-first components should not be raw pointers");
8501 }
8502
8503 // At this stage, if ElementType was a base pointer and we are in the
8504 // first iteration, it has been computed.
8505 if (ElementType) {
8506 // For the case that having pointer as base, we need to remove one
8507 // level of indirection.
8508 if (&Component != &*Components.begin())
8509 ElementType = ElementType->getPointeeOrArrayElementType();
8510 ElementTypeSize =
8511 Context.getTypeSizeInChars(ElementType).getQuantity();
8512 CurStrides.push_back(
8513 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8514 }
8515 }
8516 // Get dimension value except for the last dimension since we don't need
8517 // it.
8518 if (DimSizes.size() < Components.size() - 1) {
8519 if (CAT)
8520 DimSizes.push_back(
8521 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8522 else if (VAT)
8523 DimSizes.push_back(CGF.Builder.CreateIntCast(
8524 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8525 /*IsSigned=*/false));
8526 }
8527 }
8528
8529 // Skip the dummy dimension since we have already have its information.
8530 auto *DI = DimSizes.begin() + 1;
8531 // Product of dimension.
8532 llvm::Value *DimProd =
8533 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8534
8535 // Collect info for non-contiguous. Notice that offset, count, and stride
8536 // are only meaningful for array-section, so we insert a null for anything
8537 // other than array-section.
8538 // Also, the size of offset, count, and stride are not the same as
8539 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8540 // count, and stride are the same as the number of non-contiguous
8541 // declaration in target update to/from clause.
8542 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8543 Components) {
8544 const Expr *AssocExpr = Component.getAssociatedExpression();
8545
8546 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8547 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8548 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8549 /*isSigned=*/false);
8550 CurOffsets.push_back(Offset);
8551 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8552 CurStrides.push_back(CurStrides.back());
8553 continue;
8554 }
8555
8556 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8557
8558 if (!OASE)
8559 continue;
8560
8561 // Offset
8562 const Expr *OffsetExpr = OASE->getLowerBound();
8563 llvm::Value *Offset = nullptr;
8564 if (!OffsetExpr) {
8565 // If offset is absent, then we just set it to zero.
8566 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8567 } else {
8568 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8569 CGF.Int64Ty,
8570 /*isSigned=*/false);
8571 }
8572 CurOffsets.push_back(Offset);
8573
8574 // Count
8575 const Expr *CountExpr = OASE->getLength();
8576 llvm::Value *Count = nullptr;
8577 if (!CountExpr) {
8578 // In Clang, once a high dimension is an array section, we construct all
8579 // the lower dimension as array section, however, for case like
8580 // arr[0:2][2], Clang construct the inner dimension as an array section
8581 // but it actually is not in an array section form according to spec.
8582 if (!OASE->getColonLocFirst().isValid() &&
8583 !OASE->getColonLocSecond().isValid()) {
8584 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8585 } else {
8586 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8587 // When the length is absent it defaults to ⌈(size −
8588 // lower-bound)/stride⌉, where size is the size of the array
8589 // dimension.
8590 const Expr *StrideExpr = OASE->getStride();
8591 llvm::Value *Stride =
8592 StrideExpr
8593 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8594 CGF.Int64Ty, /*isSigned=*/false)
8595 : nullptr;
8596 if (Stride)
8597 Count = CGF.Builder.CreateUDiv(
8598 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8599 else
8600 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8601 }
8602 } else {
8603 Count = CGF.EmitScalarExpr(CountExpr);
8604 }
8605 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8606 CurCounts.push_back(Count);
8607
8608 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8609 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8610 // Offset Count Stride
8611 // D0 0 1 4 (int) <- dummy dimension
8612 // D1 0 2 8 (2 * (1) * 4)
8613 // D2 1 2 20 (1 * (1 * 5) * 4)
8614 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8615 const Expr *StrideExpr = OASE->getStride();
8616 llvm::Value *Stride =
8617 StrideExpr
8618 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8619 CGF.Int64Ty, /*isSigned=*/false)
8620 : nullptr;
8621 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8622 if (Stride)
8623 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8624 else
8625 CurStrides.push_back(DimProd);
8626 if (DI != DimSizes.end())
8627 ++DI;
8628 }
8629
8630 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8631 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8632 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8633 }
8634
8635 /// Return the adjusted map modifiers if the declaration a capture refers to
8636 /// appears in a first-private clause. This is expected to be used only with
8637 /// directives that start with 'target'.
8638 OpenMPOffloadMappingFlags
8639 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8640 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8641
8642 // A first private variable captured by reference will use only the
8643 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8644 // declaration is known as first-private in this handler.
8645 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8646 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8647 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8648 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8649 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8650 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8651 }
8652 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8653 if (I != LambdasMap.end())
8654 // for map(to: lambda): using user specified map type.
8655 return getMapTypeBits(
8656 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8657 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8658 /*AddPtrFlag=*/false,
8659 /*AddIsTargetParamFlag=*/false,
8660 /*isNonContiguous=*/false);
8661 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8662 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8663 }
8664
8665 void getPlainLayout(const CXXRecordDecl *RD,
8666 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8667 bool AsBase) const {
8668 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8669
8670 llvm::StructType *St =
8671 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8672
8673 unsigned NumElements = St->getNumElements();
8674 llvm::SmallVector<
8675 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8676 RecordLayout(NumElements);
8677
8678 // Fill bases.
8679 for (const auto &I : RD->bases()) {
8680 if (I.isVirtual())
8681 continue;
8682
8683 QualType BaseTy = I.getType();
8684 const auto *Base = BaseTy->getAsCXXRecordDecl();
8685 // Ignore empty bases.
8686 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8687 CGF.getContext()
8688 .getASTRecordLayout(Base)
8690 .isZero())
8691 continue;
8692
8693 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8694 RecordLayout[FieldIndex] = Base;
8695 }
8696 // Fill in virtual bases.
8697 for (const auto &I : RD->vbases()) {
8698 QualType BaseTy = I.getType();
8699 // Ignore empty bases.
8700 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8701 continue;
8702
8703 const auto *Base = BaseTy->getAsCXXRecordDecl();
8704 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8705 if (RecordLayout[FieldIndex])
8706 continue;
8707 RecordLayout[FieldIndex] = Base;
8708 }
8709 // Fill in all the fields.
8710 assert(!RD->isUnion() && "Unexpected union.");
8711 for (const auto *Field : RD->fields()) {
8712 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8713 // will fill in later.)
8714 if (!Field->isBitField() &&
8715 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8716 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8717 RecordLayout[FieldIndex] = Field;
8718 }
8719 }
8720 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8721 &Data : RecordLayout) {
8722 if (Data.isNull())
8723 continue;
8724 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8725 getPlainLayout(Base, Layout, /*AsBase=*/true);
8726 else
8727 Layout.push_back(cast<const FieldDecl *>(Data));
8728 }
8729 }
8730
8731 /// Returns the address corresponding to \p PointerExpr.
8732 static Address getAttachPtrAddr(const Expr *PointerExpr,
8733 CodeGenFunction &CGF) {
8734 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8735 Address AttachPtrAddr = Address::invalid();
8736
8737 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8738 // If the pointer is a variable, we can use its address directly.
8739 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8740 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8741 AttachPtrAddr =
8742 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8743 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8744 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8745 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8746 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8747 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8748 assert(UO->getOpcode() == UO_Deref &&
8749 "Unexpected unary-operator on attach-ptr-expr");
8750 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8751 }
8752 assert(AttachPtrAddr.isValid() &&
8753 "Failed to get address for attach pointer expression");
8754 return AttachPtrAddr;
8755 }
8756
8757 /// Get the address of the attach pointer, and a load from it, to get the
8758 /// pointee base address.
8759 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8760 /// contains invalid addresses if \p AttachPtrExpr is null.
8761 static std::pair<Address, Address>
8762 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8763 CodeGenFunction &CGF) {
8764
8765 if (!AttachPtrExpr)
8766 return {Address::invalid(), Address::invalid()};
8767
8768 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8769 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8770
8771 QualType AttachPtrType =
8774
8775 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8776 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8777 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8778
8779 return {AttachPtrAddr, AttachPteeBaseAddr};
8780 }
8781
8782 /// Returns whether an attach entry should be emitted for a map on
8783 /// \p MapBaseDecl on the directive \p CurDir.
8784 static bool
8785 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8786 CodeGenFunction &CGF,
8787 llvm::PointerUnion<const OMPExecutableDirective *,
8788 const OMPDeclareMapperDecl *>
8789 CurDir) {
8790 if (!PointerExpr)
8791 return false;
8792
8793 // Pointer attachment is needed at map-entering time or for declare
8794 // mappers.
8795 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8798 ->getDirectiveKind());
8799 }
8800
8801 /// Computes the attach-ptr expr for \p Components, and updates various maps
8802 /// with the information.
8803 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8804 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8805 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8806 /// AttachPtrExprMap.
8807 void collectAttachPtrExprInfo(
8809 llvm::PointerUnion<const OMPExecutableDirective *,
8810 const OMPDeclareMapperDecl *>
8811 CurDir) {
8812
8813 OpenMPDirectiveKind CurDirectiveID =
8815 ? OMPD_declare_mapper
8816 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8817
8818 const auto &[AttachPtrExpr, Depth] =
8820 CurDirectiveID);
8821
8822 AttachPtrComputationOrderMap.try_emplace(
8823 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8824 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8825 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8826 }
8827
8828 /// Generate all the base pointers, section pointers, sizes, map types, and
8829 /// mappers for the extracted mappable expressions (all included in \a
8830 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8831 /// pair of the relevant declaration and index where it occurs is appended to
8832 /// the device pointers info array.
8833 void generateAllInfoForClauses(
8834 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8835 llvm::OpenMPIRBuilder &OMPBuilder,
8836 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8837 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8838 // We have to process the component lists that relate with the same
8839 // declaration in a single chunk so that we can generate the map flags
8840 // correctly. Therefore, we organize all lists in a map.
8841 enum MapKind { Present, Allocs, Other, Total };
8842 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8843 SmallVector<SmallVector<MapInfo, 8>, 4>>
8844 Info;
8845
8846 // Helper function to fill the information map for the different supported
8847 // clauses.
8848 auto &&InfoGen =
8849 [&Info, &SkipVarSet](
8850 const ValueDecl *D, MapKind Kind,
8852 OpenMPMapClauseKind MapType,
8853 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8854 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8855 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8856 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8857 if (SkipVarSet.contains(D))
8858 return;
8859 auto It = Info.try_emplace(D, Total).first;
8860 It->second[Kind].emplace_back(
8861 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8862 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8863 };
8864
8865 for (const auto *Cl : Clauses) {
8866 const auto *C = dyn_cast<OMPMapClause>(Cl);
8867 if (!C)
8868 continue;
8869 MapKind Kind = Other;
8870 if (llvm::is_contained(C->getMapTypeModifiers(),
8871 OMPC_MAP_MODIFIER_present))
8872 Kind = Present;
8873 else if (C->getMapType() == OMPC_MAP_alloc)
8874 Kind = Allocs;
8875 const auto *EI = C->getVarRefs().begin();
8876 for (const auto L : C->component_lists()) {
8877 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8878 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8879 C->getMapTypeModifiers(), {},
8880 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8881 E);
8882 ++EI;
8883 }
8884 }
8885 for (const auto *Cl : Clauses) {
8886 const auto *C = dyn_cast<OMPToClause>(Cl);
8887 if (!C)
8888 continue;
8889 MapKind Kind = Other;
8890 if (llvm::is_contained(C->getMotionModifiers(),
8891 OMPC_MOTION_MODIFIER_present))
8892 Kind = Present;
8893 if (llvm::is_contained(C->getMotionModifiers(),
8894 OMPC_MOTION_MODIFIER_iterator)) {
8895 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8896 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8897 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8898 CGF.EmitVarDecl(*VD);
8899 }
8900 }
8901
8902 const auto *EI = C->getVarRefs().begin();
8903 for (const auto L : C->component_lists()) {
8904 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8905 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8906 C->isImplicit(), std::get<2>(L), *EI);
8907 ++EI;
8908 }
8909 }
8910 for (const auto *Cl : Clauses) {
8911 const auto *C = dyn_cast<OMPFromClause>(Cl);
8912 if (!C)
8913 continue;
8914 MapKind Kind = Other;
8915 if (llvm::is_contained(C->getMotionModifiers(),
8916 OMPC_MOTION_MODIFIER_present))
8917 Kind = Present;
8918 if (llvm::is_contained(C->getMotionModifiers(),
8919 OMPC_MOTION_MODIFIER_iterator)) {
8920 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8921 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8922 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8923 CGF.EmitVarDecl(*VD);
8924 }
8925 }
8926
8927 const auto *EI = C->getVarRefs().begin();
8928 for (const auto L : C->component_lists()) {
8929 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8930 C->getMotionModifiers(),
8931 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8932 *EI);
8933 ++EI;
8934 }
8935 }
8936
8937 // Look at the use_device_ptr and use_device_addr clauses information and
8938 // mark the existing map entries as such. If there is no map information for
8939 // an entry in the use_device_ptr and use_device_addr list, we create one
8940 // with map type 'return_param' and zero size section. It is the user's
8941 // fault if that was not mapped before. If there is no map information, then
8942 // we defer the emission of that entry until all the maps for the same VD
8943 // have been handled.
8944 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8945
8946 auto &&UseDeviceDataCombinedInfoGen =
8947 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8948 CodeGenFunction &CGF, bool IsDevAddr,
8949 bool HasUdpFbNullify = false) {
8950 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8951 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8952 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8953 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8954 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8955 // FIXME: For use_device_addr on array-sections, this should
8956 // be the starting address of the section.
8957 // e.g. int *p;
8958 // ... use_device_addr(p[3])
8959 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8960 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8961 UseDeviceDataCombinedInfo.Sizes.push_back(
8962 llvm::Constant::getNullValue(CGF.Int64Ty));
8963 OpenMPOffloadMappingFlags Flags =
8964 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8965 if (HasUdpFbNullify)
8966 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8967 UseDeviceDataCombinedInfo.Types.push_back(Flags);
8968 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8969 };
8970
8971 auto &&MapInfoGen =
8972 [&UseDeviceDataCombinedInfoGen](
8973 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8975 Components,
8976 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8977 bool HasUdpFbNullify = false) {
8978 // We didn't find any match in our map information - generate a zero
8979 // size array section.
8980 llvm::Value *Ptr;
8981 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8982 if (IE->isGLValue())
8983 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8984 else
8985 Ptr = CGF.EmitScalarExpr(IE);
8986 } else {
8987 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8988 }
8989 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8990 // For the purpose of address-translation, treat something like the
8991 // following:
8992 // int *p;
8993 // ... use_device_addr(p[1])
8994 // equivalent to
8995 // ... use_device_ptr(p)
8996 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
8997 !TreatDevAddrAsDevPtr,
8998 HasUdpFbNullify);
8999 };
9000
9001 auto &&IsMapInfoExist =
9002 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9003 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9004 bool HasUdpFbNullify = false) -> bool {
9005 // We potentially have map information for this declaration already.
9006 // Look for the first set of components that refer to it. If found,
9007 // return true.
9008 // If the first component is a member expression, we have to look into
9009 // 'this', which maps to null in the map of map information. Otherwise
9010 // look directly for the information.
9011 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
9012 if (It != Info.end()) {
9013 bool Found = false;
9014 for (auto &Data : It->second) {
9015 MapInfo *CI = nullptr;
9016 // We potentially have multiple maps for the same decl. We need to
9017 // only consider those for which the attach-ptr matches the desired
9018 // attach-ptr.
9019 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
9020 if (MI.Components.back().getAssociatedDeclaration() != VD)
9021 return false;
9022
9023 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
9024 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
9025 DesiredAttachPtrExpr);
9026 return Match;
9027 });
9028
9029 if (It != Data.end())
9030 CI = &*It;
9031
9032 if (CI) {
9033 if (IsDevAddr) {
9034 CI->ForDeviceAddr = true;
9035 CI->ReturnDevicePointer = true;
9036 CI->HasUdpFbNullify = HasUdpFbNullify;
9037 Found = true;
9038 break;
9039 } else {
9040 auto PrevCI = std::next(CI->Components.rbegin());
9041 const auto *VarD = dyn_cast<VarDecl>(VD);
9042 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
9043 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9044 isa<MemberExpr>(IE) ||
9045 !VD->getType().getNonReferenceType()->isPointerType() ||
9046 PrevCI == CI->Components.rend() ||
9047 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
9048 VarD->hasLocalStorage() ||
9049 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
9050 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
9051 CI->ForDeviceAddr = IsDevAddr;
9052 CI->ReturnDevicePointer = true;
9053 CI->HasUdpFbNullify = HasUdpFbNullify;
9054 Found = true;
9055 break;
9056 }
9057 }
9058 }
9059 }
9060 return Found;
9061 }
9062 return false;
9063 };
9064
9065 // Look at the use_device_ptr clause information and mark the existing map
9066 // entries as such. If there is no map information for an entry in the
9067 // use_device_ptr list, we create one with map type 'alloc' and zero size
9068 // section. It is the user fault if that was not mapped before. If there is
9069 // no map information and the pointer is a struct member, then we defer the
9070 // emission of that entry until the whole struct has been processed.
9071 for (const auto *Cl : Clauses) {
9072 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
9073 if (!C)
9074 continue;
9075 bool HasUdpFbNullify =
9076 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9077 for (const auto L : C->component_lists()) {
9079 std::get<1>(L);
9080 assert(!Components.empty() &&
9081 "Not expecting empty list of components!");
9082 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9084 const Expr *IE = Components.back().getAssociatedExpression();
9085 // For use_device_ptr, we match an existing map clause if its attach-ptr
9086 // is same as the use_device_ptr operand. e.g.
9087 // map expr | use_device_ptr expr | current behavior
9088 // ---------|---------------------|-----------------
9089 // p[1] | p | match
9090 // ps->a | ps | match
9091 // p | p | no match
9092 const Expr *UDPOperandExpr =
9093 Components.front().getAssociatedExpression();
9094 if (IsMapInfoExist(CGF, VD, IE,
9095 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9096 /*IsDevAddr=*/false, HasUdpFbNullify))
9097 continue;
9098 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9099 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9100 }
9101 }
9102
9103 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9104 for (const auto *Cl : Clauses) {
9105 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
9106 if (!C)
9107 continue;
9108 for (const auto L : C->component_lists()) {
9110 std::get<1>(L);
9111 assert(!std::get<1>(L).empty() &&
9112 "Not expecting empty list of components!");
9113 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
9114 if (!Processed.insert(VD).second)
9115 continue;
9117 // For use_device_addr, we match an existing map clause if the
9118 // use_device_addr operand's attach-ptr matches the map operand's
9119 // attach-ptr.
9120 // We chould also restrict to only match cases when there is a full
9121 // match between the map/use_device_addr clause exprs, but that may be
9122 // unnecessary.
9123 //
9124 // map expr | use_device_addr expr | current | possible restrictive/
9125 // | | behavior | safer behavior
9126 // ---------|----------------------|-----------|-----------------------
9127 // p | p | match | match
9128 // p[0] | p[0] | match | match
9129 // p[0:1] | p[0] | match | no match
9130 // p[0:1] | p[2:1] | match | no match
9131 // p[1] | p[0] | match | no match
9132 // ps->a | ps->b | match | no match
9133 // p | p[0] | no match | no match
9134 // pp | pp[0][0] | no match | no match
9135 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9136 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
9137 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9138 "use_device_addr operand has an attach-ptr, but does not match "
9139 "last component's expr.");
9140 if (IsMapInfoExist(CGF, VD, IE,
9141 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9142 /*IsDevAddr=*/true))
9143 continue;
9144 MapInfoGen(CGF, IE, VD, Components,
9145 /*IsDevAddr=*/true,
9146 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9147 }
9148 }
9149
9150 for (const auto &Data : Info) {
9151 MapCombinedInfoTy CurInfo;
9152 const Decl *D = Data.first;
9153 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
9154 // Group component lists by their AttachPtrExpr and process them in order
9155 // of increasing complexity (nullptr first, then simple expressions like
9156 // p, then more complex ones like p[0], etc.)
9157 //
9158 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9159 // grouping for target constructs.
9160 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9161
9162 // First, collect all MapData entries with their attach-ptr exprs.
9163 for (const auto &M : Data.second) {
9164 for (const MapInfo &L : M) {
9165 assert(!L.Components.empty() &&
9166 "Not expecting declaration with no component lists.");
9167
9168 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9169 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9170 }
9171 }
9172
9173 // Next, sort by increasing order of their complexity.
9174 llvm::stable_sort(AttachPtrMapInfoPairs,
9175 [this](const auto &LHS, const auto &RHS) {
9176 return AttachPtrComparator(LHS.first, RHS.first);
9177 });
9178
9179 // And finally, process them all in order, grouping those with
9180 // equivalent attach-ptr exprs together.
9181 auto *It = AttachPtrMapInfoPairs.begin();
9182 while (It != AttachPtrMapInfoPairs.end()) {
9183 const Expr *AttachPtrExpr = It->first;
9184
9185 SmallVector<MapInfo, 8> GroupLists;
9186 while (It != AttachPtrMapInfoPairs.end() &&
9187 (It->first == AttachPtrExpr ||
9188 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9189 GroupLists.push_back(It->second);
9190 ++It;
9191 }
9192 assert(!GroupLists.empty() && "GroupLists should not be empty");
9193
9194 StructRangeInfoTy PartialStruct;
9195 AttachInfoTy AttachInfo;
9196 MapCombinedInfoTy GroupCurInfo;
9197 // Current group's struct base information:
9198 MapCombinedInfoTy GroupStructBaseCurInfo;
9199 for (const MapInfo &L : GroupLists) {
9200 // Remember the current base pointer index.
9201 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9202 unsigned StructBasePointersIdx =
9203 GroupStructBaseCurInfo.BasePointers.size();
9204
9205 GroupCurInfo.NonContigInfo.IsNonContiguous =
9206 L.Components.back().isNonContiguous();
9207 generateInfoForComponentList(
9208 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9209 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9210 /*IsFirstComponentList=*/false, L.IsImplicit,
9211 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9212 L.VarRef, /*OverlappedElements*/ {});
9213
9214 // If this entry relates to a device pointer, set the relevant
9215 // declaration and add the 'return pointer' flag.
9216 if (L.ReturnDevicePointer) {
9217 // Check whether a value was added to either GroupCurInfo or
9218 // GroupStructBaseCurInfo and error if no value was added to either
9219 // of them:
9220 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9221 StructBasePointersIdx <
9222 GroupStructBaseCurInfo.BasePointers.size()) &&
9223 "Unexpected number of mapped base pointers.");
9224
9225 // Choose a base pointer index which is always valid:
9226 const ValueDecl *RelevantVD =
9227 L.Components.back().getAssociatedDeclaration();
9228 assert(RelevantVD &&
9229 "No relevant declaration related with device pointer??");
9230
9231 // If GroupStructBaseCurInfo has been updated this iteration then
9232 // work on the first new entry added to it i.e. make sure that when
9233 // multiple values are added to any of the lists, the first value
9234 // added is being modified by the assignments below (not the last
9235 // value added).
9236 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9237 unsigned Idx) {
9238 Info.DevicePtrDecls[Idx] = RelevantVD;
9239 Info.DevicePointers[Idx] = L.ForDeviceAddr
9240 ? DeviceInfoTy::Address
9241 : DeviceInfoTy::Pointer;
9242 Info.Types[Idx] |=
9243 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9244 if (L.HasUdpFbNullify)
9245 Info.Types[Idx] |=
9246 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9247 };
9248
9249 if (StructBasePointersIdx <
9250 GroupStructBaseCurInfo.BasePointers.size())
9251 SetDevicePointerInfo(GroupStructBaseCurInfo,
9252 StructBasePointersIdx);
9253 else
9254 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9255 }
9256 }
9257
9258 // Unify entries in one list making sure the struct mapping precedes the
9259 // individual fields:
9260 MapCombinedInfoTy GroupUnionCurInfo;
9261 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9262 GroupUnionCurInfo.append(GroupCurInfo);
9263
9264 // If there is an entry in PartialStruct it means we have a struct with
9265 // individual members mapped. Emit an extra combined entry.
9266 if (PartialStruct.Base.isValid()) {
9267 // Prepend a synthetic dimension of length 1 to represent the
9268 // aggregated struct object. Using 1 (not 0, as 0 produced an
9269 // incorrect non-contiguous descriptor (DimSize==1), causing the
9270 // non-contiguous motion clause path to be skipped.) is important:
9271 // * It preserves the correct rank so targetDataUpdate() computes
9272 // DimSize == 2 for cases like strided array sections originating
9273 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9274 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9275 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9276 emitCombinedEntry(
9277 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9278 /*IsMapThis=*/!VD, OMPBuilder, VD,
9279 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9280 /*NotTargetParams=*/true);
9281 }
9282
9283 // Append this group's results to the overall CurInfo in the correct
9284 // order: combined-entry -> original-field-entries -> attach-entry
9285 CurInfo.append(GroupUnionCurInfo);
9286 if (AttachInfo.isValid())
9287 emitAttachEntry(CGF, CurInfo, AttachInfo);
9288 }
9289
9290 // We need to append the results of this capture to what we already have.
9291 CombinedInfo.append(CurInfo);
9292 }
9293 // Append data for use_device_ptr/addr clauses.
9294 CombinedInfo.append(UseDeviceDataCombinedInfo);
9295 }
9296
9297public:
9298 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9299 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9300 // Extract firstprivate clause information.
9301 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9302 for (const auto *D : C->varlist())
9303 FirstPrivateDecls.try_emplace(
9304 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9305 // Extract implicit firstprivates from uses_allocators clauses.
9306 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9307 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9308 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9309 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9310 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9311 /*Implicit=*/true);
9312 else if (const auto *VD = dyn_cast<VarDecl>(
9313 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9314 ->getDecl()))
9315 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9316 }
9317 }
9318 // Extract defaultmap clause information.
9319 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9320 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9321 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9322 // Extract device pointer clause information.
9323 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9324 for (auto L : C->component_lists())
9325 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9326 // Extract device addr clause information.
9327 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9328 for (auto L : C->component_lists())
9329 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9330 // Extract map information.
9331 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9332 if (C->getMapType() != OMPC_MAP_to)
9333 continue;
9334 for (auto L : C->component_lists()) {
9335 const ValueDecl *VD = std::get<0>(L);
9336 const auto *RD = VD ? VD->getType()
9337 .getCanonicalType()
9338 .getNonReferenceType()
9339 ->getAsCXXRecordDecl()
9340 : nullptr;
9341 if (RD && RD->isLambda())
9342 LambdasMap.try_emplace(std::get<0>(L), C);
9343 }
9344 }
9345
9346 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9347 for (auto L : C->component_lists()) {
9349 std::get<1>(L);
9350 if (!Components.empty())
9351 collectAttachPtrExprInfo(Components, CurDir);
9352 }
9353 };
9354
9355 // Populate the AttachPtrExprMap for all component lists from map-related
9356 // clauses.
9357 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9358 CollectAttachPtrExprsForClauseComponents(C);
9359 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9360 CollectAttachPtrExprsForClauseComponents(C);
9361 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9362 CollectAttachPtrExprsForClauseComponents(C);
9363 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9364 CollectAttachPtrExprsForClauseComponents(C);
9365 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9366 CollectAttachPtrExprsForClauseComponents(C);
9367 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9368 CollectAttachPtrExprsForClauseComponents(C);
9369 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9370 CollectAttachPtrExprsForClauseComponents(C);
9371 }
9372
9373 /// Constructor for the declare mapper directive.
9374 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9375 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9376
9377 /// Generate code for the combined entry if we have a partially mapped struct
9378 /// and take care of the mapping flags of the arguments corresponding to
9379 /// individual struct members.
9380 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9381 /// to the combined-entry's begin address, if emitted.
9382 /// \p PartialStruct contains attach base-pointer information.
9383 /// \returns The index of the combined entry if one was added, std::nullopt
9384 /// otherwise.
9385 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9386 MapFlagsArrayTy &CurTypes,
9387 const StructRangeInfoTy &PartialStruct,
9388 AttachInfoTy &AttachInfo, bool IsMapThis,
9389 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9390 unsigned OffsetForMemberOfFlag,
9391 bool NotTargetParams) const {
9392 if (CurTypes.size() == 1 &&
9393 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9394 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9395 !PartialStruct.IsArraySection)
9396 return;
9397 Address LBAddr = PartialStruct.LowestElem.second;
9398 Address HBAddr = PartialStruct.HighestElem.second;
9399 if (PartialStruct.HasCompleteRecord) {
9400 LBAddr = PartialStruct.LB;
9401 HBAddr = PartialStruct.LB;
9402 }
9403 CombinedInfo.Exprs.push_back(VD);
9404 // Base is the base of the struct
9405 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9406 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9407 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9408 // Pointer is the address of the lowest element
9409 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9410 const CXXMethodDecl *MD =
9411 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9412 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9413 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9414 // There should not be a mapper for a combined entry.
9415 if (HasBaseClass) {
9416 // OpenMP 5.2 148:21:
9417 // If the target construct is within a class non-static member function,
9418 // and a variable is an accessible data member of the object for which the
9419 // non-static data member function is invoked, the variable is treated as
9420 // if the this[:1] expression had appeared in a map clause with a map-type
9421 // of tofrom.
9422 // Emit this[:1]
9423 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9424 QualType Ty = MD->getFunctionObjectParameterType();
9425 llvm::Value *Size =
9426 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9427 /*isSigned=*/true);
9428 CombinedInfo.Sizes.push_back(Size);
9429 } else {
9430 CombinedInfo.Pointers.push_back(LB);
9431 // Size is (addr of {highest+1} element) - (addr of lowest element)
9432 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9433 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9434 HBAddr.getElementType(), HB, /*Idx0=*/1);
9435 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9436 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9437 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9438 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9439 /*isSigned=*/false);
9440 CombinedInfo.Sizes.push_back(Size);
9441 }
9442 CombinedInfo.Mappers.push_back(nullptr);
9443 // Map type is always TARGET_PARAM, if generate info for captures.
9444 CombinedInfo.Types.push_back(
9445 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9446 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9447 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9448 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9449 // If any element has the present modifier, then make sure the runtime
9450 // doesn't attempt to allocate the struct.
9451 if (CurTypes.end() !=
9452 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9453 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9454 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9455 }))
9456 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9457 // Remove TARGET_PARAM flag from the first element
9458 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9459 // If any element has the ompx_hold modifier, then make sure the runtime
9460 // uses the hold reference count for the struct as a whole so that it won't
9461 // be unmapped by an extra dynamic reference count decrement. Add it to all
9462 // elements as well so the runtime knows which reference count to check
9463 // when determining whether it's time for device-to-host transfers of
9464 // individual elements.
9465 if (CurTypes.end() !=
9466 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9467 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9468 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9469 })) {
9470 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9471 for (auto &M : CurTypes)
9472 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9473 }
9474
9475 // All other current entries will be MEMBER_OF the combined entry
9476 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9477 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9478 // to be handled by themselves, after all other maps).
9479 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9480 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9481 for (auto &M : CurTypes)
9482 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9483
9484 // When we are emitting a combined entry. If there were any pending
9485 // attachments to be done, we do them to the begin address of the combined
9486 // entry. Note that this means only one attachment per combined-entry will
9487 // be done. So, for instance, if we have:
9488 // S *ps;
9489 // ... map(ps->a, ps->b)
9490 // When we are emitting a combined entry. If AttachInfo is valid,
9491 // update the pointee address to point to the begin address of the combined
9492 // entry. This ensures that if we have multiple maps like:
9493 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9494 //
9495 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9496 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9497 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9498 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9499 if (AttachInfo.isValid())
9500 AttachInfo.AttachPteeAddr = LBAddr;
9501 }
9502
9503 /// Generate all the base pointers, section pointers, sizes, map types, and
9504 /// mappers for the extracted mappable expressions (all included in \a
9505 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9506 /// pair of the relevant declaration and index where it occurs is appended to
9507 /// the device pointers info array.
9508 void generateAllInfo(
9509 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9510 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9511 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9512 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9513 "Expect a executable directive");
9514 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9515 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9516 SkipVarSet);
9517 }
9518
9519 /// Generate all the base pointers, section pointers, sizes, map types, and
9520 /// mappers for the extracted map clauses of user-defined mapper (all included
9521 /// in \a CombinedInfo).
9522 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9523 llvm::OpenMPIRBuilder &OMPBuilder) const {
9524 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9525 "Expect a declare mapper directive");
9526 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9527 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9528 OMPBuilder);
9529 }
9530
9531 /// Emit capture info for lambdas for variables captured by reference.
9532 void generateInfoForLambdaCaptures(
9533 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9534 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9535 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9536 const auto *RD = VDType->getAsCXXRecordDecl();
9537 if (!RD || !RD->isLambda())
9538 return;
9539 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9540 CGF.getContext().getDeclAlign(VD));
9541 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9542 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9543 FieldDecl *ThisCapture = nullptr;
9544 RD->getCaptureFields(Captures, ThisCapture);
9545 if (ThisCapture) {
9546 LValue ThisLVal =
9547 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9548 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9549 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9550 VDLVal.getPointer(CGF));
9551 CombinedInfo.Exprs.push_back(VD);
9552 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9553 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9554 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9555 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9556 CombinedInfo.Sizes.push_back(
9557 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9558 CGF.Int64Ty, /*isSigned=*/true));
9559 CombinedInfo.Types.push_back(
9560 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9561 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9562 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9563 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9564 CombinedInfo.Mappers.push_back(nullptr);
9565 }
9566 for (const LambdaCapture &LC : RD->captures()) {
9567 if (!LC.capturesVariable())
9568 continue;
9569 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9570 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9571 continue;
9572 auto It = Captures.find(VD);
9573 assert(It != Captures.end() && "Found lambda capture without field.");
9574 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9575 if (LC.getCaptureKind() == LCK_ByRef) {
9576 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9577 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9578 VDLVal.getPointer(CGF));
9579 CombinedInfo.Exprs.push_back(VD);
9580 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9581 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9582 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9583 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9584 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9585 CGF.getTypeSize(
9587 CGF.Int64Ty, /*isSigned=*/true));
9588 } else {
9589 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9590 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9591 VDLVal.getPointer(CGF));
9592 CombinedInfo.Exprs.push_back(VD);
9593 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9594 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9595 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9596 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9597 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9598 }
9599 CombinedInfo.Types.push_back(
9600 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9601 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9602 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9603 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9604 CombinedInfo.Mappers.push_back(nullptr);
9605 }
9606 }
9607
9608 /// Set correct indices for lambdas captures.
9609 void adjustMemberOfForLambdaCaptures(
9610 llvm::OpenMPIRBuilder &OMPBuilder,
9611 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9612 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9613 MapFlagsArrayTy &Types) const {
9614 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9615 // Set correct member_of idx for all implicit lambda captures.
9616 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9617 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9618 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9619 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9620 continue;
9621 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9622 assert(BasePtr && "Unable to find base lambda address.");
9623 int TgtIdx = -1;
9624 for (unsigned J = I; J > 0; --J) {
9625 unsigned Idx = J - 1;
9626 if (Pointers[Idx] != BasePtr)
9627 continue;
9628 TgtIdx = Idx;
9629 break;
9630 }
9631 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9632 // All other current entries will be MEMBER_OF the combined entry
9633 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9634 // 0xFFFF in the MEMBER_OF field).
9635 OpenMPOffloadMappingFlags MemberOfFlag =
9636 OMPBuilder.getMemberOfFlag(TgtIdx);
9637 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9638 }
9639 }
9640
9641 /// Populate component lists for non-lambda captured variables from map,
9642 /// is_device_ptr and has_device_addr clause info.
9643 void populateComponentListsForNonLambdaCaptureFromClauses(
9644 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9645 SmallVectorImpl<
9646 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9647 &StorageForImplicitlyAddedComponentLists) const {
9648 if (VD && LambdasMap.count(VD))
9649 return;
9650
9651 // For member fields list in is_device_ptr, store it in
9652 // DeclComponentLists for generating components info.
9654 auto It = DevPointersMap.find(VD);
9655 if (It != DevPointersMap.end())
9656 for (const auto &MCL : It->second)
9657 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9658 /*IsImpicit = */ true, nullptr,
9659 nullptr);
9660 auto I = HasDevAddrsMap.find(VD);
9661 if (I != HasDevAddrsMap.end())
9662 for (const auto &MCL : I->second)
9663 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9664 /*IsImpicit = */ true, nullptr,
9665 nullptr);
9666 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9667 "Expect a executable directive");
9668 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9669 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9670 const auto *EI = C->getVarRefs().begin();
9671 for (const auto L : C->decl_component_lists(VD)) {
9672 const ValueDecl *VDecl, *Mapper;
9673 // The Expression is not correct if the mapping is implicit
9674 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9676 std::tie(VDecl, Components, Mapper) = L;
9677 assert(VDecl == VD && "We got information for the wrong declaration??");
9678 assert(!Components.empty() &&
9679 "Not expecting declaration with no component lists.");
9680 DeclComponentLists.emplace_back(Components, C->getMapType(),
9681 C->getMapTypeModifiers(),
9682 C->isImplicit(), Mapper, E);
9683 ++EI;
9684 }
9685 }
9686
9687 // For the target construct, if there's a map with a base-pointer that's
9688 // a member of an implicitly captured struct, of the current class,
9689 // we need to emit an implicit map on the pointer.
9690 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9691 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9692 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9693
9694 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9695 const MapData &RHS) {
9696 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9697 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9698 bool HasPresent =
9699 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9700 bool HasAllocs = MapType == OMPC_MAP_alloc;
9701 MapModifiers = std::get<2>(RHS);
9702 MapType = std::get<1>(LHS);
9703 bool HasPresentR =
9704 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9705 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9706 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9707 });
9708 }
9709
9710 /// On a target construct, if there's an implicit map on a struct, or that of
9711 /// this[:], and an explicit map with a member of that struct/class as the
9712 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9713 /// to make sure we don't map the full struct/class. For example:
9714 ///
9715 /// \code
9716 /// struct S {
9717 /// int dummy[10000];
9718 /// int *p;
9719 /// void f1() {
9720 /// #pragma omp target map(p[0:1])
9721 /// (void)this;
9722 /// }
9723 /// }; S s;
9724 ///
9725 /// void f2() {
9726 /// #pragma omp target map(s.p[0:10])
9727 /// (void)s;
9728 /// }
9729 /// \endcode
9730 ///
9731 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9732 //
9733 // OpenMP 6.0: 7.9.6 map clause, pg 285
9734 // If a list item with an implicitly determined data-mapping attribute does
9735 // not have any corresponding storage in the device data environment prior to
9736 // a task encountering the construct associated with the map clause, and one
9737 // or more contiguous parts of the original storage are either list items or
9738 // base pointers to list items that are explicitly mapped on the construct,
9739 // only those parts of the original storage will have corresponding storage in
9740 // the device data environment as a result of the map clauses on the
9741 // construct.
9742 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9743 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9744 SmallVectorImpl<
9745 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9746 &ComponentVectorStorage) const {
9747 bool IsThisCapture = CapturedVD == nullptr;
9748
9749 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9751 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9752 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9753 if (!AttachPtrExpr)
9754 continue;
9755
9756 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9757 if (!ME)
9758 continue;
9759
9760 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9761
9762 // If we are handling a "this" capture, then we are looking for
9763 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9764 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9765 continue;
9766
9767 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9768 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9769 continue;
9770
9771 // For non-this captures, we are looking for attach-ptrs of form
9772 // `s.p`.
9773 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9774 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9775 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9776 continue;
9777
9778 // Check if we have an existing map on either:
9779 // this[:], s, this->p, or s.p, in which case, we don't need to add
9780 // an implicit one for the attach-ptr s.p/this->p.
9781 bool FoundExistingMap = false;
9782 for (const MapData &ExistingL : DeclComponentLists) {
9784 ExistingComponents = std::get<0>(ExistingL);
9785
9786 if (ExistingComponents.empty())
9787 continue;
9788
9789 // First check if we have a map like map(this->p) or map(s.p).
9790 const auto &FirstComponent = ExistingComponents.front();
9791 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9792
9793 if (!FirstExpr)
9794 continue;
9795
9796 // First check if we have a map like map(this->p) or map(s.p).
9797 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9798 FoundExistingMap = true;
9799 break;
9800 }
9801
9802 // Check if we have a map like this[0:1]
9803 if (IsThisCapture) {
9804 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9805 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9806 FoundExistingMap = true;
9807 break;
9808 }
9809 }
9810 continue;
9811 }
9812
9813 // When the attach-ptr is something like `s.p`, check if
9814 // `s` itself is mapped explicitly.
9815 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9816 if (DRE->getDecl() == CapturedVD) {
9817 FoundExistingMap = true;
9818 break;
9819 }
9820 }
9821 }
9822
9823 if (FoundExistingMap)
9824 continue;
9825
9826 // If no base map is found, we need to create an implicit map for the
9827 // attach-pointer expr.
9828
9829 ComponentVectorStorage.emplace_back();
9830 auto &AttachPtrComponents = ComponentVectorStorage.back();
9831
9833 bool SeenAttachPtrComponent = false;
9834 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9835 // components from the component-list which has `s.p/this->p`
9836 // as the attach-ptr, starting from the component which matches
9837 // `s.p/this->p`. This way, we'll have component-lists of
9838 // `s.p` -> `s`, and `this->p` -> `this`.
9839 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9840 const auto &Component = ComponentsWithAttachPtr[i];
9841 const Expr *ComponentExpr = Component.getAssociatedExpression();
9842
9843 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9844 continue;
9845 SeenAttachPtrComponent = true;
9846
9847 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9848 Component.getAssociatedDeclaration(),
9849 Component.isNonContiguous());
9850 }
9851 assert(!AttachPtrComponents.empty() &&
9852 "Could not populate component-lists for mapping attach-ptr");
9853
9854 DeclComponentLists.emplace_back(
9855 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9856 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9857 }
9858 }
9859
9860 /// For a capture that has an associated clause, generate the base pointers,
9861 /// section pointers, sizes, map types, and mappers (all included in
9862 /// \a CurCaptureVarInfo).
9863 void generateInfoForCaptureFromClauseInfo(
9864 const MapDataArrayTy &DeclComponentListsFromClauses,
9865 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9866 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9867 unsigned OffsetForMemberOfFlag) const {
9868 assert(!Cap->capturesVariableArrayType() &&
9869 "Not expecting to generate map info for a variable array type!");
9870
9871 // We need to know when we generating information for the first component
9872 const ValueDecl *VD = Cap->capturesThis()
9873 ? nullptr
9874 : Cap->getCapturedVar()->getCanonicalDecl();
9875
9876 // for map(to: lambda): skip here, processing it in
9877 // generateDefaultMapInfo
9878 if (LambdasMap.count(VD))
9879 return;
9880
9881 // If this declaration appears in a is_device_ptr clause we just have to
9882 // pass the pointer by value. If it is a reference to a declaration, we just
9883 // pass its value.
9884 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9885 CurCaptureVarInfo.Exprs.push_back(VD);
9886 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9887 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9888 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9889 CurCaptureVarInfo.Pointers.push_back(Arg);
9890 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9891 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9892 /*isSigned=*/true));
9893 CurCaptureVarInfo.Types.push_back(
9894 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9895 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9896 CurCaptureVarInfo.Mappers.push_back(nullptr);
9897 return;
9898 }
9899
9900 auto GenerateInfoForComponentLists =
9901 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9902 bool IsEligibleForTargetParamFlag) {
9903 MapCombinedInfoTy CurInfoForComponentLists;
9904 StructRangeInfoTy PartialStruct;
9905 AttachInfoTy AttachInfo;
9906
9907 if (DeclComponentListsFromClauses.empty())
9908 return;
9909
9910 generateInfoForCaptureFromComponentLists(
9911 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9912 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9913
9914 // If there is an entry in PartialStruct it means we have a
9915 // struct with individual members mapped. Emit an extra combined
9916 // entry.
9917 if (PartialStruct.Base.isValid()) {
9918 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9919 emitCombinedEntry(
9920 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9921 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9922 /*VD=*/nullptr, OffsetForMemberOfFlag,
9923 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9924 }
9925
9926 // We do the appends to get the entries in the following order:
9927 // combined-entry -> individual-field-entries -> attach-entry,
9928 CurCaptureVarInfo.append(CurInfoForComponentLists);
9929 if (AttachInfo.isValid())
9930 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9931 };
9932
9933 // Group component lists by their AttachPtrExpr and process them in order
9934 // of increasing complexity (nullptr first, then simple expressions like p,
9935 // then more complex ones like p[0], etc.)
9936 //
9937 // This ensure that we:
9938 // * handle maps that can contribute towards setting the kernel argument,
9939 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9940 // * allocate a single contiguous storage for all exprs with the same
9941 // captured var and having the same attach-ptr.
9942 //
9943 // Example: The map clauses below should be handled grouped together based
9944 // on their attachable-base-pointers:
9945 // map-clause | attachable-base-pointer
9946 // --------------------------+------------------------
9947 // map(p, ps) | nullptr
9948 // map(p[0]) | p
9949 // map(p[0]->b, p[0]->c) | p[0]
9950 // map(ps->d, ps->e, ps->pt) | ps
9951 // map(ps->pt->d, ps->pt->e) | ps->pt
9952
9953 // First, collect all MapData entries with their attach-ptr exprs.
9954 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9955
9956 for (const MapData &L : DeclComponentListsFromClauses) {
9958 std::get<0>(L);
9959 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9960 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9961 }
9962
9963 // Next, sort by increasing order of their complexity.
9964 llvm::stable_sort(AttachPtrMapDataPairs,
9965 [this](const auto &LHS, const auto &RHS) {
9966 return AttachPtrComparator(LHS.first, RHS.first);
9967 });
9968
9969 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9970 bool IsFirstGroup = true;
9971
9972 // And finally, process them all in order, grouping those with
9973 // equivalent attach-ptr exprs together.
9974 auto *It = AttachPtrMapDataPairs.begin();
9975 while (It != AttachPtrMapDataPairs.end()) {
9976 const Expr *AttachPtrExpr = It->first;
9977
9978 MapDataArrayTy GroupLists;
9979 while (It != AttachPtrMapDataPairs.end() &&
9980 (It->first == AttachPtrExpr ||
9981 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9982 GroupLists.push_back(It->second);
9983 ++It;
9984 }
9985 assert(!GroupLists.empty() && "GroupLists should not be empty");
9986
9987 // Determine if this group of component-lists is eligible for TARGET_PARAM
9988 // flag. Only the first group processed should be eligible, and only if no
9989 // default mapping was done.
9990 bool IsEligibleForTargetParamFlag =
9991 IsFirstGroup && NoDefaultMappingDoneForVD;
9992
9993 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9994 IsFirstGroup = false;
9995 }
9996 }
9997
9998 /// Generate the base pointers, section pointers, sizes, map types, and
9999 /// mappers associated to \a DeclComponentLists for a given capture
10000 /// \a VD (all included in \a CurComponentListInfo).
10001 void generateInfoForCaptureFromComponentLists(
10002 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10003 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10004 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10005 // Find overlapping elements (including the offset from the base element).
10006 llvm::SmallDenseMap<
10007 const MapData *,
10008 llvm::SmallVector<
10010 4>
10011 OverlappedData;
10012 size_t Count = 0;
10013 for (const MapData &L : DeclComponentLists) {
10015 OpenMPMapClauseKind MapType;
10016 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10017 bool IsImplicit;
10018 const ValueDecl *Mapper;
10019 const Expr *VarRef;
10020 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10021 L;
10022 ++Count;
10023 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
10025 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
10026 VarRef) = L1;
10027 auto CI = Components.rbegin();
10028 auto CE = Components.rend();
10029 auto SI = Components1.rbegin();
10030 auto SE = Components1.rend();
10031 for (; CI != CE && SI != SE; ++CI, ++SI) {
10032 if (CI->getAssociatedExpression()->getStmtClass() !=
10033 SI->getAssociatedExpression()->getStmtClass())
10034 break;
10035 // Are we dealing with different variables/fields?
10036 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10037 break;
10038 }
10039 // Found overlapping if, at least for one component, reached the head
10040 // of the components list.
10041 if (CI == CE || SI == SE) {
10042 // Ignore it if it is the same component.
10043 if (CI == CE && SI == SE)
10044 continue;
10045 const auto It = (SI == SE) ? CI : SI;
10046 // If one component is a pointer and another one is a kind of
10047 // dereference of this pointer (array subscript, section, dereference,
10048 // etc.), it is not an overlapping.
10049 // Same, if one component is a base and another component is a
10050 // dereferenced pointer memberexpr with the same base.
10051 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
10052 (std::prev(It)->getAssociatedDeclaration() &&
10053 std::prev(It)
10054 ->getAssociatedDeclaration()
10055 ->getType()
10056 ->isPointerType()) ||
10057 (It->getAssociatedDeclaration() &&
10058 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10059 std::next(It) != CE && std::next(It) != SE))
10060 continue;
10061 const MapData &BaseData = CI == CE ? L : L1;
10063 SI == SE ? Components : Components1;
10064 OverlappedData[&BaseData].push_back(SubData);
10065 }
10066 }
10067 }
10068 // Sort the overlapped elements for each item.
10069 llvm::SmallVector<const FieldDecl *, 4> Layout;
10070 if (!OverlappedData.empty()) {
10071 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10072 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10073 while (BaseType != OrigType) {
10074 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10075 OrigType = BaseType->getPointeeOrArrayElementType();
10076 }
10077
10078 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10079 getPlainLayout(CRD, Layout, /*AsBase=*/false);
10080 else {
10081 const auto *RD = BaseType->getAsRecordDecl();
10082 Layout.append(RD->field_begin(), RD->field_end());
10083 }
10084 }
10085 for (auto &Pair : OverlappedData) {
10086 llvm::stable_sort(
10087 Pair.getSecond(),
10088 [&Layout](
10091 Second) {
10092 auto CI = First.rbegin();
10093 auto CE = First.rend();
10094 auto SI = Second.rbegin();
10095 auto SE = Second.rend();
10096 for (; CI != CE && SI != SE; ++CI, ++SI) {
10097 if (CI->getAssociatedExpression()->getStmtClass() !=
10098 SI->getAssociatedExpression()->getStmtClass())
10099 break;
10100 // Are we dealing with different variables/fields?
10101 if (CI->getAssociatedDeclaration() !=
10102 SI->getAssociatedDeclaration())
10103 break;
10104 }
10105
10106 // Lists contain the same elements.
10107 if (CI == CE && SI == SE)
10108 return false;
10109
10110 // List with less elements is less than list with more elements.
10111 if (CI == CE || SI == SE)
10112 return CI == CE;
10113
10114 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
10115 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
10116 if (FD1->getParent() == FD2->getParent())
10117 return FD1->getFieldIndex() < FD2->getFieldIndex();
10118 const auto *It =
10119 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
10120 return FD == FD1 || FD == FD2;
10121 });
10122 return *It == FD1;
10123 });
10124 }
10125
10126 // Associated with a capture, because the mapping flags depend on it.
10127 // Go through all of the elements with the overlapped elements.
10128 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10129 MapCombinedInfoTy StructBaseCombinedInfo;
10130 for (const auto &Pair : OverlappedData) {
10131 const MapData &L = *Pair.getFirst();
10133 OpenMPMapClauseKind MapType;
10134 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10135 bool IsImplicit;
10136 const ValueDecl *Mapper;
10137 const Expr *VarRef;
10138 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10139 L;
10140 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10141 OverlappedComponents = Pair.getSecond();
10142 generateInfoForComponentList(
10143 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10144 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
10145 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10146 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
10147 AddTargetParamFlag = false;
10148 }
10149 // Go through other elements without overlapped elements.
10150 for (const MapData &L : DeclComponentLists) {
10152 OpenMPMapClauseKind MapType;
10153 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10154 bool IsImplicit;
10155 const ValueDecl *Mapper;
10156 const Expr *VarRef;
10157 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10158 L;
10159 auto It = OverlappedData.find(&L);
10160 if (It == OverlappedData.end())
10161 generateInfoForComponentList(
10162 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10163 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10164 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10165 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10166 /*OverlappedElements*/ {});
10167 AddTargetParamFlag = false;
10168 }
10169 }
10170
10171 /// Check if a variable should be treated as firstprivate due to explicit
10172 /// firstprivate clause or defaultmap(firstprivate:...).
10173 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10174 // Check explicit firstprivate clauses (not implicit from defaultmap)
10175 auto I = FirstPrivateDecls.find(VD);
10176 if (I != FirstPrivateDecls.end() && !I->getSecond())
10177 return true; // Explicit firstprivate only
10178
10179 // Check defaultmap(firstprivate:scalar) for scalar types
10180 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10181 if (Type->isScalarType())
10182 return true;
10183 }
10184
10185 // Check defaultmap(firstprivate:pointer) for pointer types
10186 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10187 if (Type->isAnyPointerType())
10188 return true;
10189 }
10190
10191 // Check defaultmap(firstprivate:aggregate) for aggregate types
10192 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10193 if (Type->isAggregateType())
10194 return true;
10195 }
10196
10197 // Check defaultmap(firstprivate:all) for all types
10198 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10199 }
10200
10201 /// Generate the default map information for a given capture \a CI,
10202 /// record field declaration \a RI and captured value \a CV.
10203 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10204 const FieldDecl &RI, llvm::Value *CV,
10205 MapCombinedInfoTy &CombinedInfo) const {
10206 bool IsImplicit = true;
10207 // Do the default mapping.
10208 if (CI.capturesThis()) {
10209 CombinedInfo.Exprs.push_back(nullptr);
10210 CombinedInfo.BasePointers.push_back(CV);
10211 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10212 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10213 CombinedInfo.Pointers.push_back(CV);
10214 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10215 CombinedInfo.Sizes.push_back(
10216 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10217 CGF.Int64Ty, /*isSigned=*/true));
10218 // Default map type.
10219 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10220 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10221 } else if (CI.capturesVariableByCopy()) {
10222 const VarDecl *VD = CI.getCapturedVar();
10223 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10224 CombinedInfo.BasePointers.push_back(CV);
10225 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10226 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10227 CombinedInfo.Pointers.push_back(CV);
10228 bool IsFirstprivate =
10229 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10230
10231 if (!RI.getType()->isAnyPointerType()) {
10232 // We have to signal to the runtime captures passed by value that are
10233 // not pointers.
10234 CombinedInfo.Types.push_back(
10235 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10236 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10237 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10238 } else if (IsFirstprivate) {
10239 // Firstprivate pointers should be passed by value (as literals)
10240 // without performing a present table lookup at runtime.
10241 CombinedInfo.Types.push_back(
10242 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10243 // Use zero size for pointer literals (just passing the pointer value)
10244 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10245 } else {
10246 // Pointers are implicitly mapped with a zero size and no flags
10247 // (other than first map that is added for all implicit maps).
10248 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10249 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10250 }
10251 auto I = FirstPrivateDecls.find(VD);
10252 if (I != FirstPrivateDecls.end())
10253 IsImplicit = I->getSecond();
10254 } else {
10255 assert(CI.capturesVariable() && "Expected captured reference.");
10256 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10257 QualType ElementType = PtrTy->getPointeeType();
10258 const VarDecl *VD = CI.getCapturedVar();
10259 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10260 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10261 CombinedInfo.BasePointers.push_back(CV);
10262 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10263 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10264
10265 // For firstprivate pointers, pass by value instead of dereferencing
10266 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10267 // Treat as a literal value (pass the pointer value itself)
10268 CombinedInfo.Pointers.push_back(CV);
10269 // Use zero size for pointer literals
10270 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10271 CombinedInfo.Types.push_back(
10272 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10273 } else {
10274 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10275 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10276 // The default map type for a scalar/complex type is 'to' because by
10277 // default the value doesn't have to be retrieved. For an aggregate
10278 // type, the default is 'tofrom'.
10279 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10280 CombinedInfo.Pointers.push_back(CV);
10281 }
10282 auto I = FirstPrivateDecls.find(VD);
10283 if (I != FirstPrivateDecls.end())
10284 IsImplicit = I->getSecond();
10285 }
10286 // Every default map produces a single argument which is a target parameter.
10287 CombinedInfo.Types.back() |=
10288 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10289
10290 // Add flag stating this is an implicit map.
10291 if (IsImplicit)
10292 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10293
10294 // No user-defined mapper for default mapping.
10295 CombinedInfo.Mappers.push_back(nullptr);
10296 }
10297};
10298} // anonymous namespace
10299
10300// Try to extract the base declaration from a `this->x` expression if possible.
10302 if (!E)
10303 return nullptr;
10304
10305 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10306 if (const MemberExpr *ME =
10307 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10308 return ME->getMemberDecl();
10309 return nullptr;
10310}
10311
10312/// Emit a string constant containing the names of the values mapped to the
10313/// offloading runtime library.
10314static llvm::Constant *
10315emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10316 MappableExprsHandler::MappingExprInfo &MapExprs) {
10317
10318 uint32_t SrcLocStrSize;
10319 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10320 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10321
10322 SourceLocation Loc;
10323 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10324 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10325 Loc = VD->getLocation();
10326 else
10327 Loc = MapExprs.getMapExpr()->getExprLoc();
10328 } else {
10329 Loc = MapExprs.getMapDecl()->getLocation();
10330 }
10331
10332 std::string ExprName;
10333 if (MapExprs.getMapExpr()) {
10335 llvm::raw_string_ostream OS(ExprName);
10336 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10337 } else {
10338 ExprName = MapExprs.getMapDecl()->getNameAsString();
10339 }
10340
10341 std::string FileName;
10343 if (auto *DbgInfo = CGF.getDebugInfo())
10344 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10345 else
10346 FileName = PLoc.getFilename();
10347 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10348 PLoc.getColumn(), SrcLocStrSize);
10349}
10350/// Emit the arrays used to pass the captures and map information to the
10351/// offloading runtime library. If there is no map or capture information,
10352/// return nullptr by reference.
10354 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10355 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10356 bool IsNonContiguous = false, bool ForEndCall = false) {
10357 CodeGenModule &CGM = CGF.CGM;
10358
10359 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10360 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10361 CGF.AllocaInsertPt->getIterator());
10362 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10363 CGF.Builder.GetInsertPoint());
10364
10365 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10366 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10367 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10368 }
10369 };
10370
10371 auto CustomMapperCB = [&](unsigned int I) {
10372 llvm::Function *MFunc = nullptr;
10373 if (CombinedInfo.Mappers[I]) {
10374 Info.HasMapper = true;
10376 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10377 }
10378 return MFunc;
10379 };
10380 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10381 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10382 IsNonContiguous, ForEndCall, DeviceAddrCB));
10383}
10384
10385/// Check for inner distribute directive.
10386static const OMPExecutableDirective *
10388 const auto *CS = D.getInnermostCapturedStmt();
10389 const auto *Body =
10390 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10391 const Stmt *ChildStmt =
10393
10394 if (const auto *NestedDir =
10395 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10396 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10397 switch (D.getDirectiveKind()) {
10398 case OMPD_target:
10399 // For now, treat 'target' with nested 'teams loop' as if it's
10400 // distributed (target teams distribute).
10401 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10402 return NestedDir;
10403 if (DKind == OMPD_teams) {
10404 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10405 /*IgnoreCaptured=*/true);
10406 if (!Body)
10407 return nullptr;
10408 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10409 if (const auto *NND =
10410 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10411 DKind = NND->getDirectiveKind();
10412 if (isOpenMPDistributeDirective(DKind))
10413 return NND;
10414 }
10415 }
10416 return nullptr;
10417 case OMPD_target_teams:
10418 if (isOpenMPDistributeDirective(DKind))
10419 return NestedDir;
10420 return nullptr;
10421 case OMPD_target_parallel:
10422 case OMPD_target_simd:
10423 case OMPD_target_parallel_for:
10424 case OMPD_target_parallel_for_simd:
10425 return nullptr;
10426 case OMPD_target_teams_distribute:
10427 case OMPD_target_teams_distribute_simd:
10428 case OMPD_target_teams_distribute_parallel_for:
10429 case OMPD_target_teams_distribute_parallel_for_simd:
10430 case OMPD_parallel:
10431 case OMPD_for:
10432 case OMPD_parallel_for:
10433 case OMPD_parallel_master:
10434 case OMPD_parallel_sections:
10435 case OMPD_for_simd:
10436 case OMPD_parallel_for_simd:
10437 case OMPD_cancel:
10438 case OMPD_cancellation_point:
10439 case OMPD_ordered:
10440 case OMPD_threadprivate:
10441 case OMPD_allocate:
10442 case OMPD_task:
10443 case OMPD_simd:
10444 case OMPD_tile:
10445 case OMPD_unroll:
10446 case OMPD_sections:
10447 case OMPD_section:
10448 case OMPD_single:
10449 case OMPD_master:
10450 case OMPD_critical:
10451 case OMPD_taskyield:
10452 case OMPD_barrier:
10453 case OMPD_taskwait:
10454 case OMPD_taskgroup:
10455 case OMPD_atomic:
10456 case OMPD_flush:
10457 case OMPD_depobj:
10458 case OMPD_scan:
10459 case OMPD_teams:
10460 case OMPD_target_data:
10461 case OMPD_target_exit_data:
10462 case OMPD_target_enter_data:
10463 case OMPD_distribute:
10464 case OMPD_distribute_simd:
10465 case OMPD_distribute_parallel_for:
10466 case OMPD_distribute_parallel_for_simd:
10467 case OMPD_teams_distribute:
10468 case OMPD_teams_distribute_simd:
10469 case OMPD_teams_distribute_parallel_for:
10470 case OMPD_teams_distribute_parallel_for_simd:
10471 case OMPD_target_update:
10472 case OMPD_declare_simd:
10473 case OMPD_declare_variant:
10474 case OMPD_begin_declare_variant:
10475 case OMPD_end_declare_variant:
10476 case OMPD_declare_target:
10477 case OMPD_end_declare_target:
10478 case OMPD_declare_reduction:
10479 case OMPD_declare_mapper:
10480 case OMPD_taskloop:
10481 case OMPD_taskloop_simd:
10482 case OMPD_master_taskloop:
10483 case OMPD_master_taskloop_simd:
10484 case OMPD_parallel_master_taskloop:
10485 case OMPD_parallel_master_taskloop_simd:
10486 case OMPD_requires:
10487 case OMPD_metadirective:
10488 case OMPD_unknown:
10489 default:
10490 llvm_unreachable("Unexpected directive.");
10491 }
10492 }
10493
10494 return nullptr;
10495}
10496
10497/// Emit the user-defined mapper function. The code generation follows the
10498/// pattern in the example below.
10499/// \code
10500/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10501/// void *base, void *begin,
10502/// int64_t size, int64_t type,
10503/// void *name = nullptr) {
10504/// // Allocate space for an array section first.
10505/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10506/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10507/// size*sizeof(Ty), clearToFromMember(type));
10508/// // Map members.
10509/// for (unsigned i = 0; i < size; i++) {
10510/// // For each component specified by this mapper:
10511/// for (auto c : begin[i]->all_components) {
10512/// if (c.hasMapper())
10513/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10514/// c.arg_type, c.arg_name);
10515/// else
10516/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10517/// c.arg_begin, c.arg_size, c.arg_type,
10518/// c.arg_name);
10519/// }
10520/// }
10521/// // Delete the array section.
10522/// if (size > 1 && maptype.IsDelete)
10523/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10524/// size*sizeof(Ty), clearToFromMember(type));
10525/// }
10526/// \endcode
10528 CodeGenFunction *CGF) {
10529 if (UDMMap.count(D) > 0)
10530 return;
10531 ASTContext &C = CGM.getContext();
10532 QualType Ty = D->getType();
10533 auto *MapperVarDecl =
10535 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10536 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10537
10538 CodeGenFunction MapperCGF(CGM);
10539 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10540 auto PrivatizeAndGenMapInfoCB =
10541 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10542 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10543 MapperCGF.Builder.restoreIP(CodeGenIP);
10544
10545 // Privatize the declared variable of mapper to be the current array
10546 // element.
10547 Address PtrCurrent(
10548 PtrPHI, ElemTy,
10549 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10550 .getAlignment()
10551 .alignmentOfArrayElement(ElementSize));
10553 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10554 (void)Scope.Privatize();
10555
10556 // Get map clause information.
10557 MappableExprsHandler MEHandler(*D, MapperCGF);
10558 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10559
10560 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10561 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10562 };
10563 if (CGM.getCodeGenOpts().getDebugInfo() !=
10564 llvm::codegenoptions::NoDebugInfo) {
10565 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10566 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10567 FillInfoMap);
10568 }
10569
10570 return CombinedInfo;
10571 };
10572
10573 auto CustomMapperCB = [&](unsigned I) {
10574 llvm::Function *MapperFunc = nullptr;
10575 if (CombinedInfo.Mappers[I]) {
10576 // Call the corresponding mapper function.
10578 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10579 assert(MapperFunc && "Expect a valid mapper function is available.");
10580 }
10581 return MapperFunc;
10582 };
10583
10584 SmallString<64> TyStr;
10585 llvm::raw_svector_ostream Out(TyStr);
10586 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10587 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10588
10589 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10590 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10591 UDMMap.try_emplace(D, NewFn);
10592 if (CGF)
10593 FunctionUDMMap[CGF->CurFn].push_back(D);
10594}
10595
10597 const OMPDeclareMapperDecl *D) {
10598 auto I = UDMMap.find(D);
10599 if (I != UDMMap.end())
10600 return I->second;
10602 return UDMMap.lookup(D);
10603}
10604
10607 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10608 const OMPLoopDirective &D)>
10609 SizeEmitter) {
10610 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10611 const OMPExecutableDirective *TD = &D;
10612 // Get nested teams distribute kind directive, if any. For now, treat
10613 // 'target_teams_loop' as if it's really a target_teams_distribute.
10614 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10615 Kind != OMPD_target_teams_loop)
10616 TD = getNestedDistributeDirective(CGM.getContext(), D);
10617 if (!TD)
10618 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10619
10620 const auto *LD = cast<OMPLoopDirective>(TD);
10621 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10622 return NumIterations;
10623 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10624}
10625
10626static void
10627emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10628 const OMPExecutableDirective &D,
10630 bool RequiresOuterTask, const CapturedStmt &CS,
10631 bool OffloadingMandatory, CodeGenFunction &CGF) {
10632 if (OffloadingMandatory) {
10633 CGF.Builder.CreateUnreachable();
10634 } else {
10635 if (RequiresOuterTask) {
10636 CapturedVars.clear();
10637 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10638 }
10639 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10640 CapturedVars);
10641 }
10642}
10643
10644static llvm::Value *emitDeviceID(
10645 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10646 CodeGenFunction &CGF) {
10647 // Emit device ID if any.
10648 llvm::Value *DeviceID;
10649 if (Device.getPointer()) {
10650 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10651 Device.getInt() == OMPC_DEVICE_device_num) &&
10652 "Expected device_num modifier.");
10653 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10654 DeviceID =
10655 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10656 } else {
10657 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10658 }
10659 return DeviceID;
10660}
10661
10662static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10664 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10665 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10666
10667 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10668 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10669 llvm::Value *DynGPVal =
10670 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10671 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10672 /*isSigned=*/false);
10673 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10674 switch (FallbackModifier) {
10675 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10676 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10677 break;
10678 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10679 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10680 break;
10681 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10683 // This is the default for dyn_groupprivate.
10684 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10685 break;
10686 default:
10687 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10688 }
10689 } else if (auto *OMPXDynCGClause =
10690 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10691 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10692 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10693 /*IgnoreResultAssign=*/true);
10694 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10695 /*isSigned=*/false);
10696 }
10697 return {DynGP, DynGPFallback};
10698}
10699
10701 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10702 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10703 llvm::OpenMPIRBuilder &OMPBuilder,
10704 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10705 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10706
10707 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10708 auto RI = CS.getCapturedRecordDecl()->field_begin();
10709 auto *CV = CapturedVars.begin();
10711 CE = CS.capture_end();
10712 CI != CE; ++CI, ++RI, ++CV) {
10713 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10714
10715 // VLA sizes are passed to the outlined region by copy and do not have map
10716 // information associated.
10717 if (CI->capturesVariableArrayType()) {
10718 CurInfo.Exprs.push_back(nullptr);
10719 CurInfo.BasePointers.push_back(*CV);
10720 CurInfo.DevicePtrDecls.push_back(nullptr);
10721 CurInfo.DevicePointers.push_back(
10722 MappableExprsHandler::DeviceInfoTy::None);
10723 CurInfo.Pointers.push_back(*CV);
10724 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10725 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10726 // Copy to the device as an argument. No need to retrieve it.
10727 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10728 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10729 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10730 CurInfo.Mappers.push_back(nullptr);
10731 } else {
10732 const ValueDecl *CapturedVD =
10733 CI->capturesThis() ? nullptr
10735 bool HasEntryWithCVAsAttachPtr = false;
10736 if (CapturedVD)
10737 HasEntryWithCVAsAttachPtr =
10738 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10739
10740 // Populate component lists for the captured variable from clauses.
10741 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10744 StorageForImplicitlyAddedComponentLists;
10745 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10746 CapturedVD, DeclComponentLists,
10747 StorageForImplicitlyAddedComponentLists);
10748
10749 // OpenMP 6.0, 15.8, target construct, restrictions:
10750 // * A list item in a map clause that is specified on a target construct
10751 // must have a base variable or base pointer.
10752 //
10753 // Map clauses on a target construct must either have a base pointer, or a
10754 // base-variable. So, if we don't have a base-pointer, that means that it
10755 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10756 // etc. In such cases, we do not need to handle default map generation
10757 // for `s`.
10758 bool HasEntryWithoutAttachPtr =
10759 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10761 Components = std::get<0>(MapData);
10762 return !MEHandler.getAttachPtrExpr(Components);
10763 });
10764
10765 // Generate default map info first if there's no direct map with CV as
10766 // the base-variable, or attach pointer.
10767 if (DeclComponentLists.empty() ||
10768 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10769 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10770
10771 // If we have any information in the map clause, we use it, otherwise we
10772 // just do a default mapping.
10773 MEHandler.generateInfoForCaptureFromClauseInfo(
10774 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10775 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10776
10777 if (!CI->capturesThis())
10778 MappedVarSet.insert(CI->getCapturedVar());
10779 else
10780 MappedVarSet.insert(nullptr);
10781
10782 // Generate correct mapping for variables captured by reference in
10783 // lambdas.
10784 if (CI->capturesVariable())
10785 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10786 CurInfo, LambdaPointers);
10787 }
10788 // We expect to have at least an element of information for this capture.
10789 assert(!CurInfo.BasePointers.empty() &&
10790 "Non-existing map pointer for capture!");
10791 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10792 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10793 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10794 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10795 "Inconsistent map information sizes!");
10796
10797 // We need to append the results of this capture to what we already have.
10798 CombinedInfo.append(CurInfo);
10799 }
10800 // Adjust MEMBER_OF flags for the lambdas captures.
10801 MEHandler.adjustMemberOfForLambdaCaptures(
10802 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10803 CombinedInfo.Pointers, CombinedInfo.Types);
10804}
10805static void
10806genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10807 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10808 llvm::OpenMPIRBuilder &OMPBuilder,
10809 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10810 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10811
10812 CodeGenModule &CGM = CGF.CGM;
10813 // Map any list items in a map clause that were not captures because they
10814 // weren't referenced within the construct.
10815 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10816
10817 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10818 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10819 };
10820 if (CGM.getCodeGenOpts().getDebugInfo() !=
10821 llvm::codegenoptions::NoDebugInfo) {
10822 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10823 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10824 FillInfoMap);
10825 }
10826}
10827
10829 const CapturedStmt &CS,
10831 llvm::OpenMPIRBuilder &OMPBuilder,
10832 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10833 // Get mappable expression information.
10834 MappableExprsHandler MEHandler(D, CGF);
10835 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10836
10837 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10838 MappedVarSet, CombinedInfo);
10839 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10840}
10841
10842template <typename ClauseTy>
10843static void
10845 const OMPExecutableDirective &D,
10847 const auto *C = D.getSingleClause<ClauseTy>();
10848 assert(!C->varlist_empty() &&
10849 "ompx_bare requires explicit num_teams and thread_limit");
10851 for (auto *E : C->varlist()) {
10852 llvm::Value *V = CGF.EmitScalarExpr(E);
10853 Values.push_back(
10854 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10855 }
10856}
10857
10859 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10860 const OMPExecutableDirective &D,
10861 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10862 const CapturedStmt &CS, bool OffloadingMandatory,
10863 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10864 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10865 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10866 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10867 const OMPLoopDirective &D)>
10868 SizeEmitter,
10869 CodeGenFunction &CGF, CodeGenModule &CGM) {
10870 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10871
10872 // Fill up the arrays with all the captured variables.
10873 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10875 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10876
10877 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10878 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10879
10880 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10881 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10882 CGF.VoidPtrTy, CGM.getPointerAlign());
10883 InputInfo.PointersArray =
10884 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10885 InputInfo.SizesArray =
10886 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10887 InputInfo.MappersArray =
10888 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10889 MapTypesArray = Info.RTArgs.MapTypesArray;
10890 MapNamesArray = Info.RTArgs.MapNamesArray;
10891
10892 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10893 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10894 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10895 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10896 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10897
10898 if (IsReverseOffloading) {
10899 // Reverse offloading is not supported, so just execute on the host.
10900 // FIXME: This fallback solution is incorrect since it ignores the
10901 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10902 // assert here and ensure SEMA emits an error.
10903 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10904 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10905 return;
10906 }
10907
10908 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10909 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10910
10911 llvm::Value *BasePointersArray =
10912 InputInfo.BasePointersArray.emitRawPointer(CGF);
10913 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10914 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10915 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10916
10917 auto &&EmitTargetCallFallbackCB =
10918 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10919 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10920 -> llvm::OpenMPIRBuilder::InsertPointTy {
10921 CGF.Builder.restoreIP(IP);
10922 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10923 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10924 return CGF.Builder.saveIP();
10925 };
10926
10927 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10930 if (IsBare) {
10933 NumThreads);
10934 } else {
10935 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10936 NumThreads.push_back(
10937 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10938 }
10939
10940 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10941 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10942 llvm::Value *NumIterations =
10943 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10944 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10945 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10946 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10947
10948 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10949 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10950 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10951
10952 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10953 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10954 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10955
10956 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10957 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10958 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10959 RTLoc, AllocaIP));
10960 CGF.Builder.restoreIP(AfterIP);
10961 };
10962
10963 if (RequiresOuterTask)
10964 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10965 else
10966 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10967}
10968
10969static void
10970emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10971 const OMPExecutableDirective &D,
10973 bool RequiresOuterTask, const CapturedStmt &CS,
10974 bool OffloadingMandatory, CodeGenFunction &CGF) {
10975
10976 // Notify that the host version must be executed.
10977 auto &&ElseGen =
10978 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10979 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10980 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10981 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10982 };
10983
10984 if (RequiresOuterTask) {
10986 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10987 } else {
10988 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10989 }
10990}
10991
10994 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10995 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10996 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10997 const OMPLoopDirective &D)>
10998 SizeEmitter) {
10999 if (!CGF.HaveInsertPoint())
11000 return;
11001
11002 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11003 CGM.getLangOpts().OpenMPOffloadMandatory;
11004
11005 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11006
11007 const bool RequiresOuterTask =
11008 D.hasClausesOfKind<OMPDependClause>() ||
11009 D.hasClausesOfKind<OMPNowaitClause>() ||
11010 D.hasClausesOfKind<OMPInReductionClause>() ||
11011 (CGM.getLangOpts().OpenMP >= 51 &&
11012 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
11013 D.hasClausesOfKind<OMPThreadLimitClause>());
11015 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
11016 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11017 PrePostActionTy &) {
11018 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
11019 };
11020 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
11021
11023 llvm::Value *MapTypesArray = nullptr;
11024 llvm::Value *MapNamesArray = nullptr;
11025
11026 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11027 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11028 OutlinedFnID, &InputInfo, &MapTypesArray,
11029 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11030 PrePostActionTy &) {
11031 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
11032 RequiresOuterTask, CS, OffloadingMandatory,
11033 Device, OutlinedFnID, InputInfo, MapTypesArray,
11034 MapNamesArray, SizeEmitter, CGF, CGM);
11035 };
11036
11037 auto &&TargetElseGen =
11038 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11039 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11040 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11041 CS, OffloadingMandatory, CGF);
11042 };
11043
11044 // If we have a target function ID it means that we need to support
11045 // offloading, otherwise, just execute on the host. We need to execute on host
11046 // regardless of the conditional in the if clause if, e.g., the user do not
11047 // specify target triples.
11048 if (OutlinedFnID) {
11049 if (IfCond) {
11050 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
11051 } else {
11052 RegionCodeGenTy ThenRCG(TargetThenGen);
11053 ThenRCG(CGF);
11054 }
11055 } else {
11056 RegionCodeGenTy ElseRCG(TargetElseGen);
11057 ElseRCG(CGF);
11058 }
11059}
11060
11062 StringRef ParentName) {
11063 if (!S)
11064 return;
11065
11066 // Register vtable from device for target data and target directives.
11067 // Add this block here since scanForTargetRegionsFunctions ignores
11068 // target data by checking if S is a executable directive (target).
11069 if (auto *E = dyn_cast<OMPExecutableDirective>(S);
11070 E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
11071 // Don't need to check if it's device compile
11072 // since scanForTargetRegionsFunctions currently only called
11073 // in device compilation.
11074 registerVTable(*E);
11075 }
11076
11077 // Codegen OMP target directives that offload compute to the device.
11078 bool RequiresDeviceCodegen =
11081 cast<OMPExecutableDirective>(S)->getDirectiveKind());
11082
11083 if (RequiresDeviceCodegen) {
11084 const auto &E = *cast<OMPExecutableDirective>(S);
11085
11086 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11087 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
11088
11089 // Is this a target region that should not be emitted as an entry point? If
11090 // so just signal we are done with this target region.
11091 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11092 return;
11093
11094 switch (E.getDirectiveKind()) {
11095 case OMPD_target:
11098 break;
11099 case OMPD_target_parallel:
11101 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
11102 break;
11103 case OMPD_target_teams:
11105 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
11106 break;
11107 case OMPD_target_teams_distribute:
11110 break;
11111 case OMPD_target_teams_distribute_simd:
11114 break;
11115 case OMPD_target_parallel_for:
11118 break;
11119 case OMPD_target_parallel_for_simd:
11122 break;
11123 case OMPD_target_simd:
11125 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
11126 break;
11127 case OMPD_target_teams_distribute_parallel_for:
11129 CGM, ParentName,
11131 break;
11132 case OMPD_target_teams_distribute_parallel_for_simd:
11135 CGM, ParentName,
11137 break;
11138 case OMPD_target_teams_loop:
11141 break;
11142 case OMPD_target_parallel_loop:
11145 break;
11146 case OMPD_parallel:
11147 case OMPD_for:
11148 case OMPD_parallel_for:
11149 case OMPD_parallel_master:
11150 case OMPD_parallel_sections:
11151 case OMPD_for_simd:
11152 case OMPD_parallel_for_simd:
11153 case OMPD_cancel:
11154 case OMPD_cancellation_point:
11155 case OMPD_ordered:
11156 case OMPD_threadprivate:
11157 case OMPD_allocate:
11158 case OMPD_task:
11159 case OMPD_simd:
11160 case OMPD_tile:
11161 case OMPD_unroll:
11162 case OMPD_sections:
11163 case OMPD_section:
11164 case OMPD_single:
11165 case OMPD_master:
11166 case OMPD_critical:
11167 case OMPD_taskyield:
11168 case OMPD_barrier:
11169 case OMPD_taskwait:
11170 case OMPD_taskgroup:
11171 case OMPD_atomic:
11172 case OMPD_flush:
11173 case OMPD_depobj:
11174 case OMPD_scan:
11175 case OMPD_teams:
11176 case OMPD_target_data:
11177 case OMPD_target_exit_data:
11178 case OMPD_target_enter_data:
11179 case OMPD_distribute:
11180 case OMPD_distribute_simd:
11181 case OMPD_distribute_parallel_for:
11182 case OMPD_distribute_parallel_for_simd:
11183 case OMPD_teams_distribute:
11184 case OMPD_teams_distribute_simd:
11185 case OMPD_teams_distribute_parallel_for:
11186 case OMPD_teams_distribute_parallel_for_simd:
11187 case OMPD_target_update:
11188 case OMPD_declare_simd:
11189 case OMPD_declare_variant:
11190 case OMPD_begin_declare_variant:
11191 case OMPD_end_declare_variant:
11192 case OMPD_declare_target:
11193 case OMPD_end_declare_target:
11194 case OMPD_declare_reduction:
11195 case OMPD_declare_mapper:
11196 case OMPD_taskloop:
11197 case OMPD_taskloop_simd:
11198 case OMPD_master_taskloop:
11199 case OMPD_master_taskloop_simd:
11200 case OMPD_parallel_master_taskloop:
11201 case OMPD_parallel_master_taskloop_simd:
11202 case OMPD_requires:
11203 case OMPD_metadirective:
11204 case OMPD_unknown:
11205 default:
11206 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11207 }
11208 return;
11209 }
11210
11211 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11212 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11213 return;
11214
11215 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11216 return;
11217 }
11218
11219 // If this is a lambda function, look into its body.
11220 if (const auto *L = dyn_cast<LambdaExpr>(S))
11221 S = L->getBody();
11222
11223 // Keep looking for target regions recursively.
11224 for (const Stmt *II : S->children())
11225 scanForTargetRegionsFunctions(II, ParentName);
11226}
11227
11228static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11229 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11230 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11231 if (!DevTy)
11232 return false;
11233 // Do not emit device_type(nohost) functions for the host.
11234 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11235 return true;
11236 // Do not emit device_type(host) functions for the device.
11237 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11238 return true;
11239 return false;
11240}
11241
11243 // If emitting code for the host, we do not process FD here. Instead we do
11244 // the normal code generation.
11245 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11246 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11248 CGM.getLangOpts().OpenMPIsTargetDevice))
11249 return true;
11250 return false;
11251 }
11252
11253 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11254 // Try to detect target regions in the function.
11255 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11256 StringRef Name = CGM.getMangledName(GD);
11259 CGM.getLangOpts().OpenMPIsTargetDevice))
11260 return true;
11261 }
11262
11263 // Do not to emit function if it is not marked as declare target.
11264 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11265 AlreadyEmittedTargetDecls.count(VD) == 0;
11266}
11267
11270 CGM.getLangOpts().OpenMPIsTargetDevice))
11271 return true;
11272
11273 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11274 return false;
11275
11276 // Check if there are Ctors/Dtors in this declaration and look for target
11277 // regions in it. We use the complete variant to produce the kernel name
11278 // mangling.
11279 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11280 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11281 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11282 StringRef ParentName =
11283 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11284 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11285 }
11286 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11287 StringRef ParentName =
11288 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11289 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11290 }
11291 }
11292
11293 // Do not to emit variable if it is not marked as declare target.
11294 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11295 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11296 cast<VarDecl>(GD.getDecl()));
11297 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11298 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11299 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11302 return true;
11303 }
11304 return false;
11305}
11306
11308 llvm::Constant *Addr) {
11309 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11310 !CGM.getLangOpts().OpenMPIsTargetDevice)
11311 return;
11312
11313 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11314 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11315
11316 // If this is an 'extern' declaration we defer to the canonical definition and
11317 // do not emit an offloading entry.
11318 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11319 VD->hasExternalStorage())
11320 return;
11321
11322 if (!Res) {
11323 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11324 // Register non-target variables being emitted in device code (debug info
11325 // may cause this).
11326 StringRef VarName = CGM.getMangledName(VD);
11327 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11328 }
11329 return;
11330 }
11331
11332 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11333 auto LinkageForVariable = [&VD, this]() {
11334 return CGM.getLLVMLinkageVarDefinition(VD);
11335 };
11336
11337 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11338 OMPBuilder.registerTargetGlobalVariable(
11340 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11341 VD->isExternallyVisible(),
11343 VD->getCanonicalDecl()->getBeginLoc()),
11344 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11345 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11346 CGM.getTypes().ConvertTypeForMem(
11347 CGM.getContext().getPointerType(VD->getType())),
11348 Addr);
11349
11350 for (auto *ref : GeneratedRefs)
11351 CGM.addCompilerUsedGlobal(ref);
11352}
11353
11355 if (isa<FunctionDecl>(GD.getDecl()) ||
11357 return emitTargetFunctions(GD);
11358
11359 return emitTargetGlobalVariable(GD);
11360}
11361
11363 for (const VarDecl *VD : DeferredGlobalVariables) {
11364 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11365 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11366 if (!Res)
11367 continue;
11368 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11369 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11371 CGM.EmitGlobal(VD);
11372 } else {
11373 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11374 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11375 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11377 "Expected link clause or to clause with unified memory.");
11378 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11379 }
11380 }
11381}
11382
11384 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11385 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11386 " Expected target-based directive.");
11387}
11388
11390 for (const OMPClause *Clause : D->clauselists()) {
11391 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11393 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11394 } else if (const auto *AC =
11395 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11396 switch (AC->getAtomicDefaultMemOrderKind()) {
11397 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11398 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11399 break;
11400 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11401 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11402 break;
11403 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11404 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11405 break;
11407 break;
11408 }
11409 }
11410 }
11411}
11412
11413llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11415}
11416
11418 LangAS &AS) {
11419 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11420 return false;
11421 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11422 switch(A->getAllocatorType()) {
11423 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11424 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11425 // Not supported, fallback to the default mem space.
11426 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11427 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11428 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11429 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11430 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11431 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11432 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11433 AS = LangAS::Default;
11434 return true;
11435 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11436 llvm_unreachable("Expected predefined allocator for the variables with the "
11437 "static storage.");
11438 }
11439 return false;
11440}
11441
11445
11447 CodeGenModule &CGM)
11448 : CGM(CGM) {
11449 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11450 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11451 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11452 }
11453}
11454
11456 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11457 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11458}
11459
11461 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11462 return true;
11463
11464 const auto *D = cast<FunctionDecl>(GD.getDecl());
11465 // Do not to emit function if it is marked as declare target as it was already
11466 // emitted.
11467 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11468 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11469 if (auto *F = dyn_cast_or_null<llvm::Function>(
11470 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11471 return !F->isDeclaration();
11472 return false;
11473 }
11474 return true;
11475 }
11476
11477 return !AlreadyEmittedTargetDecls.insert(D).second;
11478}
11479
11481 const OMPExecutableDirective &D,
11482 SourceLocation Loc,
11483 llvm::Function *OutlinedFn,
11484 ArrayRef<llvm::Value *> CapturedVars) {
11485 if (!CGF.HaveInsertPoint())
11486 return;
11487
11488 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11490
11491 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11492 llvm::Value *Args[] = {
11493 RTLoc,
11494 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11495 OutlinedFn};
11497 RealArgs.append(std::begin(Args), std::end(Args));
11498 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11499
11500 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11501 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11502 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11503}
11504
11506 const Expr *NumTeams,
11507 const Expr *ThreadLimit,
11508 SourceLocation Loc) {
11509 if (!CGF.HaveInsertPoint())
11510 return;
11511
11512 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11513
11514 llvm::Value *NumTeamsVal =
11515 NumTeams
11516 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11517 CGF.CGM.Int32Ty, /* isSigned = */ true)
11518 : CGF.Builder.getInt32(0);
11519
11520 llvm::Value *ThreadLimitVal =
11521 ThreadLimit
11522 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11523 CGF.CGM.Int32Ty, /* isSigned = */ true)
11524 : CGF.Builder.getInt32(0);
11525
11526 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11527 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11528 ThreadLimitVal};
11529 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11530 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11531 PushNumTeamsArgs);
11532}
11533
11535 const Expr *ThreadLimit,
11536 SourceLocation Loc) {
11537 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11538 llvm::Value *ThreadLimitVal =
11539 ThreadLimit
11540 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11541 CGF.CGM.Int32Ty, /* isSigned = */ true)
11542 : CGF.Builder.getInt32(0);
11543
11544 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11545 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11546 ThreadLimitVal};
11547 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11548 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11549 ThreadLimitArgs);
11550}
11551
11553 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11554 const Expr *Device, const RegionCodeGenTy &CodeGen,
11556 if (!CGF.HaveInsertPoint())
11557 return;
11558
11559 // Action used to replace the default codegen action and turn privatization
11560 // off.
11561 PrePostActionTy NoPrivAction;
11562
11563 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11564
11565 llvm::Value *IfCondVal = nullptr;
11566 if (IfCond)
11567 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11568
11569 // Emit device ID if any.
11570 llvm::Value *DeviceID = nullptr;
11571 if (Device) {
11572 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11573 CGF.Int64Ty, /*isSigned=*/true);
11574 } else {
11575 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11576 }
11577
11578 // Fill up the arrays with all the mapped variables.
11579 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11580 auto GenMapInfoCB =
11581 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11582 CGF.Builder.restoreIP(CodeGenIP);
11583 // Get map clause information.
11584 MappableExprsHandler MEHandler(D, CGF);
11585 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11586
11587 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11588 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11589 };
11590 if (CGM.getCodeGenOpts().getDebugInfo() !=
11591 llvm::codegenoptions::NoDebugInfo) {
11592 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11593 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11594 FillInfoMap);
11595 }
11596
11597 return CombinedInfo;
11598 };
11599 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11600 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11601 CGF.Builder.restoreIP(CodeGenIP);
11602 switch (BodyGenType) {
11603 case BodyGenTy::Priv:
11604 if (!Info.CaptureDeviceAddrMap.empty())
11605 CodeGen(CGF);
11606 break;
11607 case BodyGenTy::DupNoPriv:
11608 if (!Info.CaptureDeviceAddrMap.empty()) {
11609 CodeGen.setAction(NoPrivAction);
11610 CodeGen(CGF);
11611 }
11612 break;
11613 case BodyGenTy::NoPriv:
11614 if (Info.CaptureDeviceAddrMap.empty()) {
11615 CodeGen.setAction(NoPrivAction);
11616 CodeGen(CGF);
11617 }
11618 break;
11619 }
11620 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11621 CGF.Builder.GetInsertPoint());
11622 };
11623
11624 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11625 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11626 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11627 }
11628 };
11629
11630 auto CustomMapperCB = [&](unsigned int I) {
11631 llvm::Function *MFunc = nullptr;
11632 if (CombinedInfo.Mappers[I]) {
11633 Info.HasMapper = true;
11635 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11636 }
11637 return MFunc;
11638 };
11639
11640 // Source location for the ident struct
11641 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11642
11643 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11644 CGF.AllocaInsertPt->getIterator());
11645 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11646 CGF.Builder.GetInsertPoint());
11647 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11648 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11649 cantFail(OMPBuilder.createTargetData(
11650 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11651 CustomMapperCB,
11652 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11653 CGF.Builder.restoreIP(AfterIP);
11654}
11655
11657 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11658 const Expr *Device) {
11659 if (!CGF.HaveInsertPoint())
11660 return;
11661
11665 "Expecting either target enter, exit data, or update directives.");
11666
11668 llvm::Value *MapTypesArray = nullptr;
11669 llvm::Value *MapNamesArray = nullptr;
11670 // Generate the code for the opening of the data environment.
11671 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11672 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11673 // Emit device ID if any.
11674 llvm::Value *DeviceID = nullptr;
11675 if (Device) {
11676 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11677 CGF.Int64Ty, /*isSigned=*/true);
11678 } else {
11679 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11680 }
11681
11682 // Emit the number of elements in the offloading arrays.
11683 llvm::Constant *PointerNum =
11684 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11685
11686 // Source location for the ident struct
11687 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11688
11689 SmallVector<llvm::Value *, 13> OffloadingArgs(
11690 {RTLoc, DeviceID, PointerNum,
11691 InputInfo.BasePointersArray.emitRawPointer(CGF),
11692 InputInfo.PointersArray.emitRawPointer(CGF),
11693 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11694 InputInfo.MappersArray.emitRawPointer(CGF)});
11695
11696 // Select the right runtime function call for each standalone
11697 // directive.
11698 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11699 RuntimeFunction RTLFn;
11700 switch (D.getDirectiveKind()) {
11701 case OMPD_target_enter_data:
11702 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11703 : OMPRTL___tgt_target_data_begin_mapper;
11704 break;
11705 case OMPD_target_exit_data:
11706 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11707 : OMPRTL___tgt_target_data_end_mapper;
11708 break;
11709 case OMPD_target_update:
11710 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11711 : OMPRTL___tgt_target_data_update_mapper;
11712 break;
11713 case OMPD_parallel:
11714 case OMPD_for:
11715 case OMPD_parallel_for:
11716 case OMPD_parallel_master:
11717 case OMPD_parallel_sections:
11718 case OMPD_for_simd:
11719 case OMPD_parallel_for_simd:
11720 case OMPD_cancel:
11721 case OMPD_cancellation_point:
11722 case OMPD_ordered:
11723 case OMPD_threadprivate:
11724 case OMPD_allocate:
11725 case OMPD_task:
11726 case OMPD_simd:
11727 case OMPD_tile:
11728 case OMPD_unroll:
11729 case OMPD_sections:
11730 case OMPD_section:
11731 case OMPD_single:
11732 case OMPD_master:
11733 case OMPD_critical:
11734 case OMPD_taskyield:
11735 case OMPD_barrier:
11736 case OMPD_taskwait:
11737 case OMPD_taskgroup:
11738 case OMPD_atomic:
11739 case OMPD_flush:
11740 case OMPD_depobj:
11741 case OMPD_scan:
11742 case OMPD_teams:
11743 case OMPD_target_data:
11744 case OMPD_distribute:
11745 case OMPD_distribute_simd:
11746 case OMPD_distribute_parallel_for:
11747 case OMPD_distribute_parallel_for_simd:
11748 case OMPD_teams_distribute:
11749 case OMPD_teams_distribute_simd:
11750 case OMPD_teams_distribute_parallel_for:
11751 case OMPD_teams_distribute_parallel_for_simd:
11752 case OMPD_declare_simd:
11753 case OMPD_declare_variant:
11754 case OMPD_begin_declare_variant:
11755 case OMPD_end_declare_variant:
11756 case OMPD_declare_target:
11757 case OMPD_end_declare_target:
11758 case OMPD_declare_reduction:
11759 case OMPD_declare_mapper:
11760 case OMPD_taskloop:
11761 case OMPD_taskloop_simd:
11762 case OMPD_master_taskloop:
11763 case OMPD_master_taskloop_simd:
11764 case OMPD_parallel_master_taskloop:
11765 case OMPD_parallel_master_taskloop_simd:
11766 case OMPD_target:
11767 case OMPD_target_simd:
11768 case OMPD_target_teams_distribute:
11769 case OMPD_target_teams_distribute_simd:
11770 case OMPD_target_teams_distribute_parallel_for:
11771 case OMPD_target_teams_distribute_parallel_for_simd:
11772 case OMPD_target_teams:
11773 case OMPD_target_parallel:
11774 case OMPD_target_parallel_for:
11775 case OMPD_target_parallel_for_simd:
11776 case OMPD_requires:
11777 case OMPD_metadirective:
11778 case OMPD_unknown:
11779 default:
11780 llvm_unreachable("Unexpected standalone target data directive.");
11781 break;
11782 }
11783 if (HasNowait) {
11784 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11785 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11786 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11787 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11788 }
11789 CGF.EmitRuntimeCall(
11790 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11791 OffloadingArgs);
11792 };
11793
11794 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11795 &MapNamesArray](CodeGenFunction &CGF,
11796 PrePostActionTy &) {
11797 // Fill up the arrays with all the mapped variables.
11798 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11800 MappableExprsHandler MEHandler(D, CGF);
11801 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11802 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11803 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11804
11805 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11806 D.hasClausesOfKind<OMPNowaitClause>();
11807
11808 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11809 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11810 CGF.VoidPtrTy, CGM.getPointerAlign());
11811 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11812 CGM.getPointerAlign());
11813 InputInfo.SizesArray =
11814 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11815 InputInfo.MappersArray =
11816 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11817 MapTypesArray = Info.RTArgs.MapTypesArray;
11818 MapNamesArray = Info.RTArgs.MapNamesArray;
11819 if (RequiresOuterTask)
11820 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11821 else
11822 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11823 };
11824
11825 if (IfCond) {
11826 emitIfClause(CGF, IfCond, TargetThenGen,
11827 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11828 } else {
11829 RegionCodeGenTy ThenRCG(TargetThenGen);
11830 ThenRCG(CGF);
11831 }
11832}
11833
11834namespace {
11835 /// Kind of parameter in a function with 'declare simd' directive.
11836enum ParamKindTy {
11837 Linear,
11838 LinearRef,
11839 LinearUVal,
11840 LinearVal,
11841 Uniform,
11842 Vector,
11843};
11844/// Attribute set of the parameter.
11845struct ParamAttrTy {
11846 ParamKindTy Kind = Vector;
11847 llvm::APSInt StrideOrArg;
11848 llvm::APSInt Alignment;
11849 bool HasVarStride = false;
11850};
11851} // namespace
11852
11853static unsigned evaluateCDTSize(const FunctionDecl *FD,
11854 ArrayRef<ParamAttrTy> ParamAttrs) {
11855 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11856 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11857 // of that clause. The VLEN value must be power of 2.
11858 // In other case the notion of the function`s "characteristic data type" (CDT)
11859 // is used to compute the vector length.
11860 // CDT is defined in the following order:
11861 // a) For non-void function, the CDT is the return type.
11862 // b) If the function has any non-uniform, non-linear parameters, then the
11863 // CDT is the type of the first such parameter.
11864 // c) If the CDT determined by a) or b) above is struct, union, or class
11865 // type which is pass-by-value (except for the type that maps to the
11866 // built-in complex data type), the characteristic data type is int.
11867 // d) If none of the above three cases is applicable, the CDT is int.
11868 // The VLEN is then determined based on the CDT and the size of vector
11869 // register of that ISA for which current vector version is generated. The
11870 // VLEN is computed using the formula below:
11871 // VLEN = sizeof(vector_register) / sizeof(CDT),
11872 // where vector register size specified in section 3.2.1 Registers and the
11873 // Stack Frame of original AMD64 ABI document.
11874 QualType RetType = FD->getReturnType();
11875 if (RetType.isNull())
11876 return 0;
11877 ASTContext &C = FD->getASTContext();
11878 QualType CDT;
11879 if (!RetType.isNull() && !RetType->isVoidType()) {
11880 CDT = RetType;
11881 } else {
11882 unsigned Offset = 0;
11883 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11884 if (ParamAttrs[Offset].Kind == Vector)
11885 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11886 ++Offset;
11887 }
11888 if (CDT.isNull()) {
11889 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11890 if (ParamAttrs[I + Offset].Kind == Vector) {
11891 CDT = FD->getParamDecl(I)->getType();
11892 break;
11893 }
11894 }
11895 }
11896 }
11897 if (CDT.isNull())
11898 CDT = C.IntTy;
11899 CDT = CDT->getCanonicalTypeUnqualified();
11900 if (CDT->isRecordType() || CDT->isUnionType())
11901 CDT = C.IntTy;
11902 return C.getTypeSize(CDT);
11903}
11904
11905/// Mangle the parameter part of the vector function name according to
11906/// their OpenMP classification. The mangling function is defined in
11907/// section 4.5 of the AAVFABI(2021Q1).
11908static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11909 SmallString<256> Buffer;
11910 llvm::raw_svector_ostream Out(Buffer);
11911 for (const auto &ParamAttr : ParamAttrs) {
11912 switch (ParamAttr.Kind) {
11913 case Linear:
11914 Out << 'l';
11915 break;
11916 case LinearRef:
11917 Out << 'R';
11918 break;
11919 case LinearUVal:
11920 Out << 'U';
11921 break;
11922 case LinearVal:
11923 Out << 'L';
11924 break;
11925 case Uniform:
11926 Out << 'u';
11927 break;
11928 case Vector:
11929 Out << 'v';
11930 break;
11931 }
11932 if (ParamAttr.HasVarStride)
11933 Out << "s" << ParamAttr.StrideOrArg;
11934 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11935 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11936 // Don't print the step value if it is not present or if it is
11937 // equal to 1.
11938 if (ParamAttr.StrideOrArg < 0)
11939 Out << 'n' << -ParamAttr.StrideOrArg;
11940 else if (ParamAttr.StrideOrArg != 1)
11941 Out << ParamAttr.StrideOrArg;
11942 }
11943
11944 if (!!ParamAttr.Alignment)
11945 Out << 'a' << ParamAttr.Alignment;
11946 }
11947
11948 return std::string(Out.str());
11949}
11950
11951static void
11952emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11953 const llvm::APSInt &VLENVal,
11954 ArrayRef<ParamAttrTy> ParamAttrs,
11955 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11956 struct ISADataTy {
11957 char ISA;
11958 unsigned VecRegSize;
11959 };
11960 ISADataTy ISAData[] = {
11961 {
11962 'b', 128
11963 }, // SSE
11964 {
11965 'c', 256
11966 }, // AVX
11967 {
11968 'd', 256
11969 }, // AVX2
11970 {
11971 'e', 512
11972 }, // AVX512
11973 };
11975 switch (State) {
11976 case OMPDeclareSimdDeclAttr::BS_Undefined:
11977 Masked.push_back('N');
11978 Masked.push_back('M');
11979 break;
11980 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11981 Masked.push_back('N');
11982 break;
11983 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11984 Masked.push_back('M');
11985 break;
11986 }
11987 for (char Mask : Masked) {
11988 for (const ISADataTy &Data : ISAData) {
11989 SmallString<256> Buffer;
11990 llvm::raw_svector_ostream Out(Buffer);
11991 Out << "_ZGV" << Data.ISA << Mask;
11992 if (!VLENVal) {
11993 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11994 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11995 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11996 } else {
11997 Out << VLENVal;
11998 }
11999 Out << mangleVectorParameters(ParamAttrs);
12000 Out << '_' << Fn->getName();
12001 Fn->addFnAttr(Out.str());
12002 }
12003 }
12004}
12005
12006// This are the Functions that are needed to mangle the name of the
12007// vector functions generated by the compiler, according to the rules
12008// defined in the "Vector Function ABI specifications for AArch64",
12009// available at
12010// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
12011
12012/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
12013static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
12014 QT = QT.getCanonicalType();
12015
12016 if (QT->isVoidType())
12017 return false;
12018
12019 if (Kind == ParamKindTy::Uniform)
12020 return false;
12021
12022 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
12023 return false;
12024
12025 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
12026 !QT->isReferenceType())
12027 return false;
12028
12029 return true;
12030}
12031
12032/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
12034 QT = QT.getCanonicalType();
12035 unsigned Size = C.getTypeSize(QT);
12036
12037 // Only scalars and complex within 16 bytes wide set PVB to true.
12038 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
12039 return false;
12040
12041 if (QT->isFloatingType())
12042 return true;
12043
12044 if (QT->isIntegerType())
12045 return true;
12046
12047 if (QT->isPointerType())
12048 return true;
12049
12050 // TODO: Add support for complex types (section 3.1.2, item 2).
12051
12052 return false;
12053}
12054
12055/// Computes the lane size (LS) of a return type or of an input parameter,
12056/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
12057/// TODO: Add support for references, section 3.2.1, item 1.
12058static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
12059 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
12061 if (getAArch64PBV(PTy, C))
12062 return C.getTypeSize(PTy);
12063 }
12064 if (getAArch64PBV(QT, C))
12065 return C.getTypeSize(QT);
12066
12067 return C.getTypeSize(C.getUIntPtrType());
12068}
12069
12070// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12071// signature of the scalar function, as defined in 3.2.2 of the
12072// AAVFABI.
12073static std::tuple<unsigned, unsigned, bool>
12075 QualType RetType = FD->getReturnType().getCanonicalType();
12076
12077 ASTContext &C = FD->getASTContext();
12078
12079 bool OutputBecomesInput = false;
12080
12082 if (!RetType->isVoidType()) {
12083 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
12084 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
12085 OutputBecomesInput = true;
12086 }
12087 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12089 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
12090 }
12091
12092 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12093 // The LS of a function parameter / return value can only be a power
12094 // of 2, starting from 8 bits, up to 128.
12095 assert(llvm::all_of(Sizes,
12096 [](unsigned Size) {
12097 return Size == 8 || Size == 16 || Size == 32 ||
12098 Size == 64 || Size == 128;
12099 }) &&
12100 "Invalid size");
12101
12102 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
12103 OutputBecomesInput);
12104}
12105
12106// Function used to add the attribute. The parameter `VLEN` is
12107// templated to allow the use of "x" when targeting scalable functions
12108// for SVE.
12109template <typename T>
12110static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
12111 char ISA, StringRef ParSeq,
12112 StringRef MangledName, bool OutputBecomesInput,
12113 llvm::Function *Fn) {
12114 SmallString<256> Buffer;
12115 llvm::raw_svector_ostream Out(Buffer);
12116 Out << Prefix << ISA << LMask << VLEN;
12117 if (OutputBecomesInput)
12118 Out << "v";
12119 Out << ParSeq << "_" << MangledName;
12120 Fn->addFnAttr(Out.str());
12121}
12122
12123// Helper function to generate the Advanced SIMD names depending on
12124// the value of the NDS when simdlen is not present.
12125static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
12126 StringRef Prefix, char ISA,
12127 StringRef ParSeq, StringRef MangledName,
12128 bool OutputBecomesInput,
12129 llvm::Function *Fn) {
12130 switch (NDS) {
12131 case 8:
12132 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
12133 OutputBecomesInput, Fn);
12134 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
12135 OutputBecomesInput, Fn);
12136 break;
12137 case 16:
12138 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
12139 OutputBecomesInput, Fn);
12140 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
12141 OutputBecomesInput, Fn);
12142 break;
12143 case 32:
12144 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
12145 OutputBecomesInput, Fn);
12146 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
12147 OutputBecomesInput, Fn);
12148 break;
12149 case 64:
12150 case 128:
12151 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
12152 OutputBecomesInput, Fn);
12153 break;
12154 default:
12155 llvm_unreachable("Scalar type is too wide.");
12156 }
12157}
12158
12159/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
12161 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
12162 ArrayRef<ParamAttrTy> ParamAttrs,
12163 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
12164 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
12165
12166 // Get basic data for building the vector signature.
12167 const auto Data = getNDSWDS(FD, ParamAttrs);
12168 const unsigned NDS = std::get<0>(Data);
12169 const unsigned WDS = std::get<1>(Data);
12170 const bool OutputBecomesInput = std::get<2>(Data);
12171
12172 // Check the values provided via `simdlen` by the user.
12173 // 1. A `simdlen(1)` doesn't produce vector signatures,
12174 if (UserVLEN == 1) {
12175 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12176 return;
12177 }
12178
12179 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
12180 // Advanced SIMD output.
12181 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12182 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12183 return;
12184 }
12185
12186 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
12187 // limits.
12188 if (ISA == 's' && UserVLEN != 0) {
12189 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
12190 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12191 return;
12192 }
12193 }
12194
12195 // Sort out parameter sequence.
12196 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
12197 StringRef Prefix = "_ZGV";
12198 // Generate simdlen from user input (if any).
12199 if (UserVLEN) {
12200 if (ISA == 's') {
12201 // SVE generates only a masked function.
12202 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12203 OutputBecomesInput, Fn);
12204 } else {
12205 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12206 // Advanced SIMD generates one or two functions, depending on
12207 // the `[not]inbranch` clause.
12208 switch (State) {
12209 case OMPDeclareSimdDeclAttr::BS_Undefined:
12210 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12211 OutputBecomesInput, Fn);
12212 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12213 OutputBecomesInput, Fn);
12214 break;
12215 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12216 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12217 OutputBecomesInput, Fn);
12218 break;
12219 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12220 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12221 OutputBecomesInput, Fn);
12222 break;
12223 }
12224 }
12225 } else {
12226 // If no user simdlen is provided, follow the AAVFABI rules for
12227 // generating the vector length.
12228 if (ISA == 's') {
12229 // SVE, section 3.4.1, item 1.
12230 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
12231 OutputBecomesInput, Fn);
12232 } else {
12233 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12234 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
12235 // two vector names depending on the use of the clause
12236 // `[not]inbranch`.
12237 switch (State) {
12238 case OMPDeclareSimdDeclAttr::BS_Undefined:
12239 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12240 OutputBecomesInput, Fn);
12241 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12242 OutputBecomesInput, Fn);
12243 break;
12244 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12245 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12246 OutputBecomesInput, Fn);
12247 break;
12248 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12249 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12250 OutputBecomesInput, Fn);
12251 break;
12252 }
12253 }
12254 }
12255}
12256
12258 llvm::Function *Fn) {
12259 ASTContext &C = CGM.getContext();
12260 FD = FD->getMostRecentDecl();
12261 while (FD) {
12262 // Map params to their positions in function decl.
12263 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12264 if (isa<CXXMethodDecl>(FD))
12265 ParamPositions.try_emplace(FD, 0);
12266 unsigned ParamPos = ParamPositions.size();
12267 for (const ParmVarDecl *P : FD->parameters()) {
12268 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12269 ++ParamPos;
12270 }
12271 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12272 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12273 // Mark uniform parameters.
12274 for (const Expr *E : Attr->uniforms()) {
12275 E = E->IgnoreParenImpCasts();
12276 unsigned Pos;
12277 if (isa<CXXThisExpr>(E)) {
12278 Pos = ParamPositions[FD];
12279 } else {
12280 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12281 ->getCanonicalDecl();
12282 auto It = ParamPositions.find(PVD);
12283 assert(It != ParamPositions.end() && "Function parameter not found");
12284 Pos = It->second;
12285 }
12286 ParamAttrs[Pos].Kind = Uniform;
12287 }
12288 // Get alignment info.
12289 auto *NI = Attr->alignments_begin();
12290 for (const Expr *E : Attr->aligneds()) {
12291 E = E->IgnoreParenImpCasts();
12292 unsigned Pos;
12293 QualType ParmTy;
12294 if (isa<CXXThisExpr>(E)) {
12295 Pos = ParamPositions[FD];
12296 ParmTy = E->getType();
12297 } else {
12298 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12299 ->getCanonicalDecl();
12300 auto It = ParamPositions.find(PVD);
12301 assert(It != ParamPositions.end() && "Function parameter not found");
12302 Pos = It->second;
12303 ParmTy = PVD->getType();
12304 }
12305 ParamAttrs[Pos].Alignment =
12306 (*NI)
12307 ? (*NI)->EvaluateKnownConstInt(C)
12308 : llvm::APSInt::getUnsigned(
12309 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12310 .getQuantity());
12311 ++NI;
12312 }
12313 // Mark linear parameters.
12314 auto *SI = Attr->steps_begin();
12315 auto *MI = Attr->modifiers_begin();
12316 for (const Expr *E : Attr->linears()) {
12317 E = E->IgnoreParenImpCasts();
12318 unsigned Pos;
12319 bool IsReferenceType = false;
12320 // Rescaling factor needed to compute the linear parameter
12321 // value in the mangled name.
12322 unsigned PtrRescalingFactor = 1;
12323 if (isa<CXXThisExpr>(E)) {
12324 Pos = ParamPositions[FD];
12325 auto *P = cast<PointerType>(E->getType());
12326 PtrRescalingFactor = CGM.getContext()
12327 .getTypeSizeInChars(P->getPointeeType())
12328 .getQuantity();
12329 } else {
12330 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12331 ->getCanonicalDecl();
12332 auto It = ParamPositions.find(PVD);
12333 assert(It != ParamPositions.end() && "Function parameter not found");
12334 Pos = It->second;
12335 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12336 PtrRescalingFactor = CGM.getContext()
12337 .getTypeSizeInChars(P->getPointeeType())
12338 .getQuantity();
12339 else if (PVD->getType()->isReferenceType()) {
12340 IsReferenceType = true;
12341 PtrRescalingFactor =
12342 CGM.getContext()
12343 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12344 .getQuantity();
12345 }
12346 }
12347 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12348 if (*MI == OMPC_LINEAR_ref)
12349 ParamAttr.Kind = LinearRef;
12350 else if (*MI == OMPC_LINEAR_uval)
12351 ParamAttr.Kind = LinearUVal;
12352 else if (IsReferenceType)
12353 ParamAttr.Kind = LinearVal;
12354 else
12355 ParamAttr.Kind = Linear;
12356 // Assuming a stride of 1, for `linear` without modifiers.
12357 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12358 if (*SI) {
12360 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12361 if (const auto *DRE =
12362 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12363 if (const auto *StridePVD =
12364 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12365 ParamAttr.HasVarStride = true;
12366 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12367 assert(It != ParamPositions.end() &&
12368 "Function parameter not found");
12369 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12370 }
12371 }
12372 } else {
12373 ParamAttr.StrideOrArg = Result.Val.getInt();
12374 }
12375 }
12376 // If we are using a linear clause on a pointer, we need to
12377 // rescale the value of linear_step with the byte size of the
12378 // pointee type.
12379 if (!ParamAttr.HasVarStride &&
12380 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
12381 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12382 ++SI;
12383 ++MI;
12384 }
12385 llvm::APSInt VLENVal;
12386 SourceLocation ExprLoc;
12387 const Expr *VLENExpr = Attr->getSimdlen();
12388 if (VLENExpr) {
12389 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12390 ExprLoc = VLENExpr->getExprLoc();
12391 }
12392 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12393 if (CGM.getTriple().isX86()) {
12394 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12395 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12396 unsigned VLEN = VLENVal.getExtValue();
12397 StringRef MangledName = Fn->getName();
12398 if (CGM.getTarget().hasFeature("sve"))
12399 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12400 MangledName, 's', 128, Fn, ExprLoc);
12401 else if (CGM.getTarget().hasFeature("neon"))
12402 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12403 MangledName, 'n', 128, Fn, ExprLoc);
12404 }
12405 }
12406 FD = FD->getPreviousDecl();
12407 }
12408}
12409
12410namespace {
12411/// Cleanup action for doacross support.
12412class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12413public:
12414 static const int DoacrossFinArgs = 2;
12415
12416private:
12417 llvm::FunctionCallee RTLFn;
12418 llvm::Value *Args[DoacrossFinArgs];
12419
12420public:
12421 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12422 ArrayRef<llvm::Value *> CallArgs)
12423 : RTLFn(RTLFn) {
12424 assert(CallArgs.size() == DoacrossFinArgs);
12425 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12426 }
12427 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12428 if (!CGF.HaveInsertPoint())
12429 return;
12430 CGF.EmitRuntimeCall(RTLFn, Args);
12431 }
12432};
12433} // namespace
12434
12436 const OMPLoopDirective &D,
12437 ArrayRef<Expr *> NumIterations) {
12438 if (!CGF.HaveInsertPoint())
12439 return;
12440
12441 ASTContext &C = CGM.getContext();
12442 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12443 RecordDecl *RD;
12444 if (KmpDimTy.isNull()) {
12445 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12446 // kmp_int64 lo; // lower
12447 // kmp_int64 up; // upper
12448 // kmp_int64 st; // stride
12449 // };
12450 RD = C.buildImplicitRecord("kmp_dim");
12451 RD->startDefinition();
12452 addFieldToRecordDecl(C, RD, Int64Ty);
12453 addFieldToRecordDecl(C, RD, Int64Ty);
12454 addFieldToRecordDecl(C, RD, Int64Ty);
12455 RD->completeDefinition();
12456 KmpDimTy = C.getCanonicalTagType(RD);
12457 } else {
12458 RD = KmpDimTy->castAsRecordDecl();
12459 }
12460 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12461 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12463
12464 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12465 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12466 enum { LowerFD = 0, UpperFD, StrideFD };
12467 // Fill dims with data.
12468 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12469 LValue DimsLVal = CGF.MakeAddrLValue(
12470 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12471 // dims.upper = num_iterations;
12472 LValue UpperLVal = CGF.EmitLValueForField(
12473 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12474 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12475 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12476 Int64Ty, NumIterations[I]->getExprLoc());
12477 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12478 // dims.stride = 1;
12479 LValue StrideLVal = CGF.EmitLValueForField(
12480 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12481 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12482 StrideLVal);
12483 }
12484
12485 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12486 // kmp_int32 num_dims, struct kmp_dim * dims);
12487 llvm::Value *Args[] = {
12488 emitUpdateLocation(CGF, D.getBeginLoc()),
12489 getThreadID(CGF, D.getBeginLoc()),
12490 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12492 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12493 CGM.VoidPtrTy)};
12494
12495 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12496 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12497 CGF.EmitRuntimeCall(RTLFn, Args);
12498 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12499 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12500 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12501 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12502 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12503 llvm::ArrayRef(FiniArgs));
12504}
12505
12506template <typename T>
12508 const T *C, llvm::Value *ULoc,
12509 llvm::Value *ThreadID) {
12510 QualType Int64Ty =
12511 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12512 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12514 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12515 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12516 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12517 const Expr *CounterVal = C->getLoopData(I);
12518 assert(CounterVal);
12519 llvm::Value *CntVal = CGF.EmitScalarConversion(
12520 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12521 CounterVal->getExprLoc());
12522 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12523 /*Volatile=*/false, Int64Ty);
12524 }
12525 llvm::Value *Args[] = {
12526 ULoc, ThreadID,
12527 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12528 llvm::FunctionCallee RTLFn;
12529 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12530 OMPDoacrossKind<T> ODK;
12531 if (ODK.isSource(C)) {
12532 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12533 OMPRTL___kmpc_doacross_post);
12534 } else {
12535 assert(ODK.isSink(C) && "Expect sink modifier.");
12536 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12537 OMPRTL___kmpc_doacross_wait);
12538 }
12539 CGF.EmitRuntimeCall(RTLFn, Args);
12540}
12541
12543 const OMPDependClause *C) {
12545 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12546 getThreadID(CGF, C->getBeginLoc()));
12547}
12548
12550 const OMPDoacrossClause *C) {
12552 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12553 getThreadID(CGF, C->getBeginLoc()));
12554}
12555
12557 llvm::FunctionCallee Callee,
12558 ArrayRef<llvm::Value *> Args) const {
12559 assert(Loc.isValid() && "Outlined function call location must be valid.");
12561
12562 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12563 if (Fn->doesNotThrow()) {
12564 CGF.EmitNounwindRuntimeCall(Fn, Args);
12565 return;
12566 }
12567 }
12568 CGF.EmitRuntimeCall(Callee, Args);
12569}
12570
12572 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12573 ArrayRef<llvm::Value *> Args) const {
12574 emitCall(CGF, Loc, OutlinedFn, Args);
12575}
12576
12578 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12579 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12581}
12582
12584 const VarDecl *NativeParam,
12585 const VarDecl *TargetParam) const {
12586 return CGF.GetAddrOfLocalVar(NativeParam);
12587}
12588
12589/// Return allocator value from expression, or return a null allocator (default
12590/// when no allocator specified).
12591static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12592 const Expr *Allocator) {
12593 llvm::Value *AllocVal;
12594 if (Allocator) {
12595 AllocVal = CGF.EmitScalarExpr(Allocator);
12596 // According to the standard, the original allocator type is a enum
12597 // (integer). Convert to pointer type, if required.
12598 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12599 CGF.getContext().VoidPtrTy,
12600 Allocator->getExprLoc());
12601 } else {
12602 // If no allocator specified, it defaults to the null allocator.
12603 AllocVal = llvm::Constant::getNullValue(
12605 }
12606 return AllocVal;
12607}
12608
12609/// Return the alignment from an allocate directive if present.
12610static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12611 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12612
12613 if (!AllocateAlignment)
12614 return nullptr;
12615
12616 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12617}
12618
12620 const VarDecl *VD) {
12621 if (!VD)
12622 return Address::invalid();
12623 Address UntiedAddr = Address::invalid();
12624 Address UntiedRealAddr = Address::invalid();
12625 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12626 if (It != FunctionToUntiedTaskStackMap.end()) {
12627 const UntiedLocalVarsAddressesMap &UntiedData =
12628 UntiedLocalVarsStack[It->second];
12629 auto I = UntiedData.find(VD);
12630 if (I != UntiedData.end()) {
12631 UntiedAddr = I->second.first;
12632 UntiedRealAddr = I->second.second;
12633 }
12634 }
12635 const VarDecl *CVD = VD->getCanonicalDecl();
12636 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12637 // Use the default allocation.
12638 if (!isAllocatableDecl(VD))
12639 return UntiedAddr;
12640 llvm::Value *Size;
12641 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12642 if (CVD->getType()->isVariablyModifiedType()) {
12643 Size = CGF.getTypeSize(CVD->getType());
12644 // Align the size: ((size + align - 1) / align) * align
12645 Size = CGF.Builder.CreateNUWAdd(
12646 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12647 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12648 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12649 } else {
12650 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12651 Size = CGM.getSize(Sz.alignTo(Align));
12652 }
12653 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12654 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12655 const Expr *Allocator = AA->getAllocator();
12656 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12657 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12659 Args.push_back(ThreadID);
12660 if (Alignment)
12661 Args.push_back(Alignment);
12662 Args.push_back(Size);
12663 Args.push_back(AllocVal);
12664 llvm::omp::RuntimeFunction FnID =
12665 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12666 llvm::Value *Addr = CGF.EmitRuntimeCall(
12667 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12668 getName({CVD->getName(), ".void.addr"}));
12669 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12670 CGM.getModule(), OMPRTL___kmpc_free);
12671 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12673 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12674 if (UntiedAddr.isValid())
12675 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12676
12677 // Cleanup action for allocate support.
12678 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12679 llvm::FunctionCallee RTLFn;
12680 SourceLocation::UIntTy LocEncoding;
12681 Address Addr;
12682 const Expr *AllocExpr;
12683
12684 public:
12685 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12686 SourceLocation::UIntTy LocEncoding, Address Addr,
12687 const Expr *AllocExpr)
12688 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12689 AllocExpr(AllocExpr) {}
12690 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12691 if (!CGF.HaveInsertPoint())
12692 return;
12693 llvm::Value *Args[3];
12694 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12695 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12697 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12698 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12699 Args[2] = AllocVal;
12700 CGF.EmitRuntimeCall(RTLFn, Args);
12701 }
12702 };
12703 Address VDAddr =
12704 UntiedRealAddr.isValid()
12705 ? UntiedRealAddr
12706 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12707 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12708 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12709 VDAddr, Allocator);
12710 if (UntiedRealAddr.isValid())
12711 if (auto *Region =
12712 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12713 Region->emitUntiedSwitch(CGF);
12714 return VDAddr;
12715 }
12716 return UntiedAddr;
12717}
12718
12720 const VarDecl *VD) const {
12721 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12722 if (It == FunctionToUntiedTaskStackMap.end())
12723 return false;
12724 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12725}
12726
12728 CodeGenModule &CGM, const OMPLoopDirective &S)
12729 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12730 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12731 if (!NeedToPush)
12732 return;
12734 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12735 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12736 for (const Stmt *Ref : C->private_refs()) {
12737 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12738 const ValueDecl *VD;
12739 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12740 VD = DRE->getDecl();
12741 } else {
12742 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12743 assert((ME->isImplicitCXXThis() ||
12744 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12745 "Expected member of current class.");
12746 VD = ME->getMemberDecl();
12747 }
12748 DS.insert(VD);
12749 }
12750 }
12751}
12752
12754 if (!NeedToPush)
12755 return;
12756 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12757}
12758
12760 CodeGenFunction &CGF,
12761 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12762 std::pair<Address, Address>> &LocalVars)
12763 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12764 if (!NeedToPush)
12765 return;
12766 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12767 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12768 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12769}
12770
12772 if (!NeedToPush)
12773 return;
12774 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12775}
12776
12778 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12779
12780 return llvm::any_of(
12781 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12782 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12783}
12784
12785void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12786 const OMPExecutableDirective &S,
12787 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12788 const {
12789 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12790 // Vars in target/task regions must be excluded completely.
12791 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12792 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12794 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12795 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12796 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12797 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12798 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12799 }
12800 }
12801 // Exclude vars in private clauses.
12802 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12803 for (const Expr *Ref : C->varlist()) {
12804 if (!Ref->getType()->isScalarType())
12805 continue;
12806 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12807 if (!DRE)
12808 continue;
12809 NeedToCheckForLPCs.insert(DRE->getDecl());
12810 }
12811 }
12812 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12813 for (const Expr *Ref : C->varlist()) {
12814 if (!Ref->getType()->isScalarType())
12815 continue;
12816 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12817 if (!DRE)
12818 continue;
12819 NeedToCheckForLPCs.insert(DRE->getDecl());
12820 }
12821 }
12822 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12823 for (const Expr *Ref : C->varlist()) {
12824 if (!Ref->getType()->isScalarType())
12825 continue;
12826 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12827 if (!DRE)
12828 continue;
12829 NeedToCheckForLPCs.insert(DRE->getDecl());
12830 }
12831 }
12832 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12833 for (const Expr *Ref : C->varlist()) {
12834 if (!Ref->getType()->isScalarType())
12835 continue;
12836 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12837 if (!DRE)
12838 continue;
12839 NeedToCheckForLPCs.insert(DRE->getDecl());
12840 }
12841 }
12842 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12843 for (const Expr *Ref : C->varlist()) {
12844 if (!Ref->getType()->isScalarType())
12845 continue;
12846 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12847 if (!DRE)
12848 continue;
12849 NeedToCheckForLPCs.insert(DRE->getDecl());
12850 }
12851 }
12852 for (const Decl *VD : NeedToCheckForLPCs) {
12853 for (const LastprivateConditionalData &Data :
12854 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12855 if (Data.DeclToUniqueName.count(VD) > 0) {
12856 if (!Data.Disabled)
12857 NeedToAddForLPCsAsDisabled.insert(VD);
12858 break;
12859 }
12860 }
12861 }
12862}
12863
12864CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12865 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12866 : CGM(CGF.CGM),
12867 Action((CGM.getLangOpts().OpenMP >= 50 &&
12868 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12869 [](const OMPLastprivateClause *C) {
12870 return C->getKind() ==
12871 OMPC_LASTPRIVATE_conditional;
12872 }))
12873 ? ActionToDo::PushAsLastprivateConditional
12874 : ActionToDo::DoNotPush) {
12875 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12876 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12877 return;
12878 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12879 "Expected a push action.");
12881 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12882 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12883 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12884 continue;
12885
12886 for (const Expr *Ref : C->varlist()) {
12887 Data.DeclToUniqueName.insert(std::make_pair(
12888 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12889 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12890 }
12891 }
12892 Data.IVLVal = IVLVal;
12893 Data.Fn = CGF.CurFn;
12894}
12895
12896CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12898 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12899 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12900 if (CGM.getLangOpts().OpenMP < 50)
12901 return;
12902 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12903 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12904 if (!NeedToAddForLPCsAsDisabled.empty()) {
12905 Action = ActionToDo::DisableLastprivateConditional;
12906 LastprivateConditionalData &Data =
12908 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12909 Data.DeclToUniqueName.try_emplace(VD);
12910 Data.Fn = CGF.CurFn;
12911 Data.Disabled = true;
12912 }
12913}
12914
12915CGOpenMPRuntime::LastprivateConditionalRAII
12917 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12918 return LastprivateConditionalRAII(CGF, S);
12919}
12920
12922 if (CGM.getLangOpts().OpenMP < 50)
12923 return;
12924 if (Action == ActionToDo::DisableLastprivateConditional) {
12925 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12926 "Expected list of disabled private vars.");
12927 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12928 }
12929 if (Action == ActionToDo::PushAsLastprivateConditional) {
12930 assert(
12931 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12932 "Expected list of lastprivate conditional vars.");
12933 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12934 }
12935}
12936
12938 const VarDecl *VD) {
12939 ASTContext &C = CGM.getContext();
12940 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12941 QualType NewType;
12942 const FieldDecl *VDField;
12943 const FieldDecl *FiredField;
12944 LValue BaseLVal;
12945 auto VI = I->getSecond().find(VD);
12946 if (VI == I->getSecond().end()) {
12947 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12948 RD->startDefinition();
12949 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12950 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12951 RD->completeDefinition();
12952 NewType = C.getCanonicalTagType(RD);
12953 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12954 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12955 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12956 } else {
12957 NewType = std::get<0>(VI->getSecond());
12958 VDField = std::get<1>(VI->getSecond());
12959 FiredField = std::get<2>(VI->getSecond());
12960 BaseLVal = std::get<3>(VI->getSecond());
12961 }
12962 LValue FiredLVal =
12963 CGF.EmitLValueForField(BaseLVal, FiredField);
12965 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12966 FiredLVal);
12967 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12968}
12969
12970namespace {
12971/// Checks if the lastprivate conditional variable is referenced in LHS.
12972class LastprivateConditionalRefChecker final
12973 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12975 const Expr *FoundE = nullptr;
12976 const Decl *FoundD = nullptr;
12977 StringRef UniqueDeclName;
12978 LValue IVLVal;
12979 llvm::Function *FoundFn = nullptr;
12980 SourceLocation Loc;
12981
12982public:
12983 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12985 llvm::reverse(LPM)) {
12986 auto It = D.DeclToUniqueName.find(E->getDecl());
12987 if (It == D.DeclToUniqueName.end())
12988 continue;
12989 if (D.Disabled)
12990 return false;
12991 FoundE = E;
12992 FoundD = E->getDecl()->getCanonicalDecl();
12993 UniqueDeclName = It->second;
12994 IVLVal = D.IVLVal;
12995 FoundFn = D.Fn;
12996 break;
12997 }
12998 return FoundE == E;
12999 }
13000 bool VisitMemberExpr(const MemberExpr *E) {
13002 return false;
13004 llvm::reverse(LPM)) {
13005 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
13006 if (It == D.DeclToUniqueName.end())
13007 continue;
13008 if (D.Disabled)
13009 return false;
13010 FoundE = E;
13011 FoundD = E->getMemberDecl()->getCanonicalDecl();
13012 UniqueDeclName = It->second;
13013 IVLVal = D.IVLVal;
13014 FoundFn = D.Fn;
13015 break;
13016 }
13017 return FoundE == E;
13018 }
13019 bool VisitStmt(const Stmt *S) {
13020 for (const Stmt *Child : S->children()) {
13021 if (!Child)
13022 continue;
13023 if (const auto *E = dyn_cast<Expr>(Child))
13024 if (!E->isGLValue())
13025 continue;
13026 if (Visit(Child))
13027 return true;
13028 }
13029 return false;
13030 }
13031 explicit LastprivateConditionalRefChecker(
13032 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
13033 : LPM(LPM) {}
13034 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
13035 getFoundData() const {
13036 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
13037 }
13038};
13039} // namespace
13040
13042 LValue IVLVal,
13043 StringRef UniqueDeclName,
13044 LValue LVal,
13045 SourceLocation Loc) {
13046 // Last updated loop counter for the lastprivate conditional var.
13047 // int<xx> last_iv = 0;
13048 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
13049 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
13050 LLIVTy, getName({UniqueDeclName, "iv"}));
13051 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
13052 IVLVal.getAlignment().getAsAlign());
13053 LValue LastIVLVal =
13054 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
13055
13056 // Last value of the lastprivate conditional.
13057 // decltype(priv_a) last_a;
13058 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
13059 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
13060 cast<llvm::GlobalVariable>(Last)->setAlignment(
13061 LVal.getAlignment().getAsAlign());
13062 LValue LastLVal =
13063 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
13064
13065 // Global loop counter. Required to handle inner parallel-for regions.
13066 // iv
13067 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
13068
13069 // #pragma omp critical(a)
13070 // if (last_iv <= iv) {
13071 // last_iv = iv;
13072 // last_a = priv_a;
13073 // }
13074 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
13075 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
13076 Action.Enter(CGF);
13077 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
13078 // (last_iv <= iv) ? Check if the variable is updated and store new
13079 // value in global var.
13080 llvm::Value *CmpRes;
13081 if (IVLVal.getType()->isSignedIntegerType()) {
13082 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
13083 } else {
13084 assert(IVLVal.getType()->isUnsignedIntegerType() &&
13085 "Loop iteration variable must be integer.");
13086 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
13087 }
13088 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
13089 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
13090 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
13091 // {
13092 CGF.EmitBlock(ThenBB);
13093
13094 // last_iv = iv;
13095 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
13096
13097 // last_a = priv_a;
13098 switch (CGF.getEvaluationKind(LVal.getType())) {
13099 case TEK_Scalar: {
13100 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
13101 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
13102 break;
13103 }
13104 case TEK_Complex: {
13105 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
13106 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
13107 break;
13108 }
13109 case TEK_Aggregate:
13110 llvm_unreachable(
13111 "Aggregates are not supported in lastprivate conditional.");
13112 }
13113 // }
13114 CGF.EmitBranch(ExitBB);
13115 // There is no need to emit line number for unconditional branch.
13117 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
13118 };
13119
13120 if (CGM.getLangOpts().OpenMPSimd) {
13121 // Do not emit as a critical region as no parallel region could be emitted.
13122 RegionCodeGenTy ThenRCG(CodeGen);
13123 ThenRCG(CGF);
13124 } else {
13125 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
13126 }
13127}
13128
13130 const Expr *LHS) {
13131 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13132 return;
13133 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
13134 if (!Checker.Visit(LHS))
13135 return;
13136 const Expr *FoundE;
13137 const Decl *FoundD;
13138 StringRef UniqueDeclName;
13139 LValue IVLVal;
13140 llvm::Function *FoundFn;
13141 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
13142 Checker.getFoundData();
13143 if (FoundFn != CGF.CurFn) {
13144 // Special codegen for inner parallel regions.
13145 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
13146 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
13147 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
13148 "Lastprivate conditional is not found in outer region.");
13149 QualType StructTy = std::get<0>(It->getSecond());
13150 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
13151 LValue PrivLVal = CGF.EmitLValue(FoundE);
13153 PrivLVal.getAddress(),
13154 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
13155 CGF.ConvertTypeForMem(StructTy));
13156 LValue BaseLVal =
13157 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
13158 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
13159 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
13160 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
13161 FiredLVal, llvm::AtomicOrdering::Unordered,
13162 /*IsVolatile=*/true, /*isInit=*/false);
13163 return;
13164 }
13165
13166 // Private address of the lastprivate conditional in the current context.
13167 // priv_a
13168 LValue LVal = CGF.EmitLValue(FoundE);
13169 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13170 FoundE->getExprLoc());
13171}
13172
13175 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13176 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13177 return;
13178 auto Range = llvm::reverse(LastprivateConditionalStack);
13179 auto It = llvm::find_if(
13180 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13181 if (It == Range.end() || It->Fn != CGF.CurFn)
13182 return;
13183 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13184 assert(LPCI != LastprivateConditionalToTypes.end() &&
13185 "Lastprivates must be registered already.");
13187 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13188 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13189 for (const auto &Pair : It->DeclToUniqueName) {
13190 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13191 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13192 continue;
13193 auto I = LPCI->getSecond().find(Pair.first);
13194 assert(I != LPCI->getSecond().end() &&
13195 "Lastprivate must be rehistered already.");
13196 // bool Cmp = priv_a.Fired != 0;
13197 LValue BaseLVal = std::get<3>(I->getSecond());
13198 LValue FiredLVal =
13199 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13200 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13201 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13202 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13203 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13204 // if (Cmp) {
13205 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13206 CGF.EmitBlock(ThenBB);
13207 Address Addr = CGF.GetAddrOfLocalVar(VD);
13208 LValue LVal;
13209 if (VD->getType()->isReferenceType())
13210 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13212 else
13213 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13215 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13216 D.getBeginLoc());
13218 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13219 // }
13220 }
13221}
13222
13224 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13225 SourceLocation Loc) {
13226 if (CGF.getLangOpts().OpenMP < 50)
13227 return;
13228 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13229 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13230 "Unknown lastprivate conditional variable.");
13231 StringRef UniqueName = It->second;
13232 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13233 // The variable was not updated in the region - exit.
13234 if (!GV)
13235 return;
13236 LValue LPLVal = CGF.MakeRawAddrLValue(
13237 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13238 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13239 CGF.EmitStoreOfScalar(Res, PrivLVal);
13240}
13241
13244 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13245 const RegionCodeGenTy &CodeGen) {
13246 llvm_unreachable("Not supported in SIMD-only mode");
13247}
13248
13251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13252 const RegionCodeGenTy &CodeGen) {
13253 llvm_unreachable("Not supported in SIMD-only mode");
13254}
13255
13257 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13258 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13259 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13260 bool Tied, unsigned &NumberOfParts) {
13261 llvm_unreachable("Not supported in SIMD-only mode");
13262}
13263
13265 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13266 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13267 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13268 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13269 llvm_unreachable("Not supported in SIMD-only mode");
13270}
13271
13273 CodeGenFunction &CGF, StringRef CriticalName,
13274 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13275 const Expr *Hint) {
13276 llvm_unreachable("Not supported in SIMD-only mode");
13277}
13278
13280 const RegionCodeGenTy &MasterOpGen,
13281 SourceLocation Loc) {
13282 llvm_unreachable("Not supported in SIMD-only mode");
13283}
13284
13286 const RegionCodeGenTy &MasterOpGen,
13287 SourceLocation Loc,
13288 const Expr *Filter) {
13289 llvm_unreachable("Not supported in SIMD-only mode");
13290}
13291
13293 SourceLocation Loc) {
13294 llvm_unreachable("Not supported in SIMD-only mode");
13295}
13296
13298 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13299 SourceLocation Loc) {
13300 llvm_unreachable("Not supported in SIMD-only mode");
13301}
13302
13304 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13305 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13307 ArrayRef<const Expr *> AssignmentOps) {
13308 llvm_unreachable("Not supported in SIMD-only mode");
13309}
13310
13312 const RegionCodeGenTy &OrderedOpGen,
13313 SourceLocation Loc,
13314 bool IsThreads) {
13315 llvm_unreachable("Not supported in SIMD-only mode");
13316}
13317
13319 SourceLocation Loc,
13321 bool EmitChecks,
13322 bool ForceSimpleCall) {
13323 llvm_unreachable("Not supported in SIMD-only mode");
13324}
13325
13328 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13329 bool Ordered, const DispatchRTInput &DispatchValues) {
13330 llvm_unreachable("Not supported in SIMD-only mode");
13331}
13332
13334 SourceLocation Loc) {
13335 llvm_unreachable("Not supported in SIMD-only mode");
13336}
13337
13340 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13341 llvm_unreachable("Not supported in SIMD-only mode");
13342}
13343
13346 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13347 llvm_unreachable("Not supported in SIMD-only mode");
13348}
13349
13351 SourceLocation Loc,
13352 unsigned IVSize,
13353 bool IVSigned) {
13354 llvm_unreachable("Not supported in SIMD-only mode");
13355}
13356
13358 SourceLocation Loc,
13359 OpenMPDirectiveKind DKind) {
13360 llvm_unreachable("Not supported in SIMD-only mode");
13361}
13362
13364 SourceLocation Loc,
13365 unsigned IVSize, bool IVSigned,
13366 Address IL, Address LB,
13367 Address UB, Address ST) {
13368 llvm_unreachable("Not supported in SIMD-only mode");
13369}
13370
13372 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13374 SourceLocation SeverityLoc, const Expr *Message,
13375 SourceLocation MessageLoc) {
13376 llvm_unreachable("Not supported in SIMD-only mode");
13377}
13378
13380 ProcBindKind ProcBind,
13381 SourceLocation Loc) {
13382 llvm_unreachable("Not supported in SIMD-only mode");
13383}
13384
13386 const VarDecl *VD,
13387 Address VDAddr,
13388 SourceLocation Loc) {
13389 llvm_unreachable("Not supported in SIMD-only mode");
13390}
13391
13393 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13394 CodeGenFunction *CGF) {
13395 llvm_unreachable("Not supported in SIMD-only mode");
13396}
13397
13399 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13400 llvm_unreachable("Not supported in SIMD-only mode");
13401}
13402
13405 SourceLocation Loc,
13406 llvm::AtomicOrdering AO) {
13407 llvm_unreachable("Not supported in SIMD-only mode");
13408}
13409
13411 const OMPExecutableDirective &D,
13412 llvm::Function *TaskFunction,
13413 QualType SharedsTy, Address Shareds,
13414 const Expr *IfCond,
13415 const OMPTaskDataTy &Data) {
13416 llvm_unreachable("Not supported in SIMD-only mode");
13417}
13418
13421 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13422 const Expr *IfCond, const OMPTaskDataTy &Data) {
13423 llvm_unreachable("Not supported in SIMD-only mode");
13424}
13425
13429 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13430 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13431 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13432 ReductionOps, Options);
13433}
13434
13437 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13438 llvm_unreachable("Not supported in SIMD-only mode");
13439}
13440
13442 SourceLocation Loc,
13443 bool IsWorksharingReduction) {
13444 llvm_unreachable("Not supported in SIMD-only mode");
13445}
13446
13448 SourceLocation Loc,
13449 ReductionCodeGen &RCG,
13450 unsigned N) {
13451 llvm_unreachable("Not supported in SIMD-only mode");
13452}
13453
13455 SourceLocation Loc,
13456 llvm::Value *ReductionsPtr,
13457 LValue SharedLVal) {
13458 llvm_unreachable("Not supported in SIMD-only mode");
13459}
13460
13462 SourceLocation Loc,
13463 const OMPTaskDataTy &Data) {
13464 llvm_unreachable("Not supported in SIMD-only mode");
13465}
13466
13469 OpenMPDirectiveKind CancelRegion) {
13470 llvm_unreachable("Not supported in SIMD-only mode");
13471}
13472
13474 SourceLocation Loc, const Expr *IfCond,
13475 OpenMPDirectiveKind CancelRegion) {
13476 llvm_unreachable("Not supported in SIMD-only mode");
13477}
13478
13480 const OMPExecutableDirective &D, StringRef ParentName,
13481 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13482 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13483 llvm_unreachable("Not supported in SIMD-only mode");
13484}
13485
13488 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13489 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13490 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13491 const OMPLoopDirective &D)>
13492 SizeEmitter) {
13493 llvm_unreachable("Not supported in SIMD-only mode");
13494}
13495
13497 llvm_unreachable("Not supported in SIMD-only mode");
13498}
13499
13501 llvm_unreachable("Not supported in SIMD-only mode");
13502}
13503
13505 return false;
13506}
13507
13509 const OMPExecutableDirective &D,
13510 SourceLocation Loc,
13511 llvm::Function *OutlinedFn,
13512 ArrayRef<llvm::Value *> CapturedVars) {
13513 llvm_unreachable("Not supported in SIMD-only mode");
13514}
13515
13517 const Expr *NumTeams,
13518 const Expr *ThreadLimit,
13519 SourceLocation Loc) {
13520 llvm_unreachable("Not supported in SIMD-only mode");
13521}
13522
13524 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13525 const Expr *Device, const RegionCodeGenTy &CodeGen,
13527 llvm_unreachable("Not supported in SIMD-only mode");
13528}
13529
13531 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13532 const Expr *Device) {
13533 llvm_unreachable("Not supported in SIMD-only mode");
13534}
13535
13537 const OMPLoopDirective &D,
13538 ArrayRef<Expr *> NumIterations) {
13539 llvm_unreachable("Not supported in SIMD-only mode");
13540}
13541
13543 const OMPDependClause *C) {
13544 llvm_unreachable("Not supported in SIMD-only mode");
13545}
13546
13548 const OMPDoacrossClause *C) {
13549 llvm_unreachable("Not supported in SIMD-only mode");
13550}
13551
13552const VarDecl *
13554 const VarDecl *NativeParam) const {
13555 llvm_unreachable("Not supported in SIMD-only mode");
13556}
13557
13558Address
13560 const VarDecl *NativeParam,
13561 const VarDecl *TargetParam) const {
13562 llvm_unreachable("Not supported in SIMD-only mode");
13563}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
This represents clause 'affinity' in the 'pragma omp task'-based directives.
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
ArrayRef< MappableComponent > MappableExprComponentListRef
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents clause 'map' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:851
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:944
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5272
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3723
Attr - This represents one attribute.
Definition Attr.h:46
Represents a base class of a C++ class.
Definition DeclCXX.h:146
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
Represents a C++ struct/union/class.
Definition DeclCXX.h:258
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3942
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3976
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1386
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3982
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3970
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3973
This captures a statement into a function.
Definition Stmt.h:3929
const Capture * const_capture_iterator
Definition Stmt.h:4063
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4080
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4050
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4033
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1512
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4075
capture_range captures()
Definition Stmt.h:4067
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
std::string SampleProfileFile
Name of the profile file to use with -fprofile-sample-use.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, const VarDecl *VD)
Register VTable to OpenMP offload entry.
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitAndRegisterVTable(CodeGenModule &CGM, CXXRecordDecl *CXXRecord, const VarDecl *VD)
Emit and register VTable for the C++ class in OpenMP offload entry.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
virtual void registerVTable(const OMPExecutableDirective &D)
Emit code for registering vtable by scanning through map clause in OpenMP target region.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3285
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2409
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2293
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3294
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2266
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5564
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:177
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:245
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2407
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5109
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:226
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5738
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2217
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2643
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3304
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:296
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1576
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:672
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1634
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5374
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1692
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1822
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:652
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2107
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2959
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1703
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:741
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::GlobalVariable * GetAddrOfVTable(const CXXRecordDecl *RD)
GetAddrOfVTable - Get the address of the VTable for the given record decl.
Definition CGVTables.cpp:41
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:355
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:350
Address getAddress() const
Definition CGValue.h:373
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:358
QualType getType() const
Definition CGValue.h:303
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:347
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3116
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3089
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3669
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4299
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4035
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4700
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3747
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3826
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5597
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:974
An lvalue reference type, per C++11 [dcl.ref].
Definition TypeBase.h:3618
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3364
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3447
Expr * getBase() const
Definition Expr.h:3441
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5480
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'threadset' clause in the 'pragma omp task ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8302
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8342
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8487
QualType getCanonicalType() const
Definition TypeBase.h:8354
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4324
field_iterator field_end() const
Definition Decl.h:4530
field_range fields() const
Definition Decl.h:4527
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5286
bool field_empty() const
Definition Decl.h:4535
field_iterator field_begin() const
Definition Decl.cpp:5270
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1485
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4906
bool isUnion() const
Definition Decl.h:3925
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8901
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9081
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2206
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8638
bool isPointerType() const
Definition TypeBase.h:8539
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8945
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9188
bool isReferenceType() const
Definition TypeBase.h:8563
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
bool isLValueReferenceType() const
Definition TypeBase.h:8567
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2412
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3120
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9074
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2801
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9174
bool isFloatingType() const
Definition Type.cpp:2305
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2254
bool isAnyPointerType() const
Definition TypeBase.h:8547
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9121
bool isRecordType() const
Definition TypeBase.h:8666
bool isUnionType() const
Definition Type.cpp:719
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2268
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2377
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2386
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3967
Expr * getSizeExpr() const
Definition TypeBase.h:3981
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:818
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
static bool classof(const OMPClause *T)
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5901
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:564
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5354
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.