clang 19.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
829 cast<OpaqueValueExpr>(
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
846 cast<OpaqueValueExpr>(
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Base);
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Base);
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095 Name, &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106 Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111 .getAddress(CGF));
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115 .getAddress(CGF));
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Out->getInit())) {
1119 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120 Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1132 if (UDRMap.count(D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1142 CGM, D->getType(),
1144 : nullptr,
1145 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(D, Combiner, Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152 Decls.second.push_back(D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName({"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1253 return getOutlinedHelperName(CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293 TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 CGM.getModule(), OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1367 SourceLocation Loc,
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389}
1390
1392 SourceLocation Loc) {
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454 OMPRTL___kmpc_global_thread_num),
1455 emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1465 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(D);
1470 FunctionUDRMap.erase(CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(D);
1476 FunctionUDMMap.erase(I);
1477 }
1480}
1481
1483 return OMPBuilder.IdentPtr;
1484}
1485
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492 }
1493 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1547 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558}
1559
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 VD->isExternallyVisible(),
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579 LinkageForVariable);
1580
1581 if (!addr)
1582 return ConstantAddress::invalid();
1583 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName({"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 OMPBuilder.getOrCreateRuntimeFunction(
1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614 Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626 OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc,
1631 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1632 Ctor, CopyCtor, Dtor};
1633 CGF.EmitRuntimeCall(
1634 OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1636 Args);
1637}
1638
1640 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1641 bool PerformInit, CodeGenFunction *CGF) {
1642 if (CGM.getLangOpts().OpenMPUseTLS &&
1644 return nullptr;
1645
1646 VD = VD->getDefinition(CGM.getContext());
1647 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1648 QualType ASTTy = VD->getType();
1649
1650 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1651 const Expr *Init = VD->getAnyInitializer();
1652 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1653 // Generate function that re-emits the declaration's initializer into the
1654 // threadprivate copy of the variable VD
1655 CodeGenFunction CtorCGF(CGM);
1656 FunctionArgList Args;
1657 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1658 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1660 Args.push_back(&Dst);
1661
1663 CGM.getContext().VoidPtrTy, Args);
1664 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1665 std::string Name = getName({"__kmpc_global_ctor_", ""});
1666 llvm::Function *Fn =
1667 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1668 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1669 Args, Loc, Loc);
1670 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1673 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1674 VDAddr.getAlignment());
1675 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1676 /*IsInitializer=*/true);
1677 ArgVal = CtorCGF.EmitLoadOfScalar(
1678 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1680 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1681 CtorCGF.FinishFunction();
1682 Ctor = Fn;
1683 }
1685 // Generate function that emits destructor call for the threadprivate copy
1686 // of the variable VD
1687 CodeGenFunction DtorCGF(CGM);
1688 FunctionArgList Args;
1689 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1690 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1692 Args.push_back(&Dst);
1693
1695 CGM.getContext().VoidTy, Args);
1696 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1697 std::string Name = getName({"__kmpc_global_dtor_", ""});
1698 llvm::Function *Fn =
1699 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1700 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1701 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1702 Loc, Loc);
1703 // Create a scope with an artificial location for the body of this function.
1704 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1705 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1706 DtorCGF.GetAddrOfLocalVar(&Dst),
1707 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1708 DtorCGF.emitDestroy(
1709 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1710 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1711 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1712 DtorCGF.FinishFunction();
1713 Dtor = Fn;
1714 }
1715 // Do not emit init function if it is not required.
1716 if (!Ctor && !Dtor)
1717 return nullptr;
1718
1719 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1720 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1721 /*isVarArg=*/false)
1722 ->getPointerTo();
1723 // Copying constructor for the threadprivate variable.
1724 // Must be NULL - reserved by runtime, but currently it requires that this
1725 // parameter is always NULL. Otherwise it fires assertion.
1726 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1727 if (Ctor == nullptr) {
1728 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1729 /*isVarArg=*/false)
1730 ->getPointerTo();
1731 Ctor = llvm::Constant::getNullValue(CtorTy);
1732 }
1733 if (Dtor == nullptr) {
1734 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1735 /*isVarArg=*/false)
1736 ->getPointerTo();
1737 Dtor = llvm::Constant::getNullValue(DtorTy);
1738 }
1739 if (!CGF) {
1740 auto *InitFunctionTy =
1741 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1742 std::string Name = getName({"__omp_threadprivate_init_", ""});
1743 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1744 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1745 CodeGenFunction InitCGF(CGM);
1746 FunctionArgList ArgList;
1747 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1748 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1749 Loc, Loc);
1750 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1751 InitCGF.FinishFunction();
1752 return InitFunction;
1753 }
1754 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1755 }
1756 return nullptr;
1757}
1758
1760 llvm::GlobalValue *GV) {
1761 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1762 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1763
1764 // We only need to handle active 'indirect' declare target functions.
1765 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1766 return;
1767
1768 // Get a mangled name to store the new device global in.
1769 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1771 SmallString<128> Name;
1772 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1773
1774 // We need to generate a new global to hold the address of the indirectly
1775 // called device function. Doing this allows us to keep the visibility and
1776 // linkage of the associated function unchanged while allowing the runtime to
1777 // access its value.
1778 llvm::GlobalValue *Addr = GV;
1779 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1780 Addr = new llvm::GlobalVariable(
1782 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1783 nullptr, llvm::GlobalValue::NotThreadLocal,
1784 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1785 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1786 }
1787
1788 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1790 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1791 llvm::GlobalValue::WeakODRLinkage);
1792}
1793
1795 QualType VarType,
1796 StringRef Name) {
1797 std::string Suffix = getName({"artificial", ""});
1798 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1799 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1800 VarLVType, Twine(Name).concat(Suffix).str());
1801 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1803 GAddr->setThreadLocal(/*Val=*/true);
1804 return Address(GAddr, GAddr->getValueType(),
1806 }
1807 std::string CacheSuffix = getName({"cache", ""});
1808 llvm::Value *Args[] = {
1812 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1813 /*isSigned=*/false),
1814 OMPBuilder.getOrCreateInternalVariable(
1816 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1817 return Address(
1819 CGF.EmitRuntimeCall(
1820 OMPBuilder.getOrCreateRuntimeFunction(
1821 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1822 Args),
1823 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1824 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1825}
1826
1828 const RegionCodeGenTy &ThenGen,
1829 const RegionCodeGenTy &ElseGen) {
1830 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1831
1832 // If the condition constant folds and can be elided, try to avoid emitting
1833 // the condition and the dead arm of the if/else.
1834 bool CondConstant;
1835 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1836 if (CondConstant)
1837 ThenGen(CGF);
1838 else
1839 ElseGen(CGF);
1840 return;
1841 }
1842
1843 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1844 // emit the conditional branch.
1845 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1846 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1847 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1848 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1849
1850 // Emit the 'then' code.
1851 CGF.EmitBlock(ThenBlock);
1852 ThenGen(CGF);
1853 CGF.EmitBranch(ContBlock);
1854 // Emit the 'else' code if present.
1855 // There is no need to emit line number for unconditional branch.
1857 CGF.EmitBlock(ElseBlock);
1858 ElseGen(CGF);
1859 // There is no need to emit line number for unconditional branch.
1861 CGF.EmitBranch(ContBlock);
1862 // Emit the continuation block for code after the if.
1863 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1864}
1865
1867 llvm::Function *OutlinedFn,
1868 ArrayRef<llvm::Value *> CapturedVars,
1869 const Expr *IfCond,
1870 llvm::Value *NumThreads) {
1871 if (!CGF.HaveInsertPoint())
1872 return;
1873 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1874 auto &M = CGM.getModule();
1875 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1876 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1879 llvm::Value *Args[] = {
1880 RTLoc,
1881 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1882 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1884 RealArgs.append(std::begin(Args), std::end(Args));
1885 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1886
1887 llvm::FunctionCallee RTLFn =
1888 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1889 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1890 };
1891 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1892 this](CodeGenFunction &CGF, PrePostActionTy &) {
1894 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1895 // Build calls:
1896 // __kmpc_serialized_parallel(&Loc, GTid);
1897 llvm::Value *Args[] = {RTLoc, ThreadID};
1898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1899 M, OMPRTL___kmpc_serialized_parallel),
1900 Args);
1901
1902 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1903 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1904 RawAddress ZeroAddrBound =
1906 /*Name=*/".bound.zero.addr");
1907 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1909 // ThreadId for serialized parallels is 0.
1910 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1911 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1912 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1913
1914 // Ensure we do not inline the function. This is trivially true for the ones
1915 // passed to __kmpc_fork_call but the ones called in serialized regions
1916 // could be inlined. This is not a perfect but it is closer to the invariant
1917 // we want, namely, every data environment starts with a new function.
1918 // TODO: We should pass the if condition to the runtime function and do the
1919 // handling there. Much cleaner code.
1920 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1921 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1922 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1923
1924 // __kmpc_end_serialized_parallel(&Loc, GTid);
1925 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1926 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1927 M, OMPRTL___kmpc_end_serialized_parallel),
1928 EndArgs);
1929 };
1930 if (IfCond) {
1931 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1932 } else {
1933 RegionCodeGenTy ThenRCG(ThenGen);
1934 ThenRCG(CGF);
1935 }
1936}
1937
1938// If we're inside an (outlined) parallel region, use the region info's
1939// thread-ID variable (it is passed in a first argument of the outlined function
1940// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1941// regular serial code region, get thread ID by calling kmp_int32
1942// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1943// return the address of that temp.
1945 SourceLocation Loc) {
1946 if (auto *OMPRegionInfo =
1947 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1948 if (OMPRegionInfo->getThreadIDVariable())
1949 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1950
1951 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1952 QualType Int32Ty =
1953 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1954 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1955 CGF.EmitStoreOfScalar(ThreadID,
1956 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1957
1958 return ThreadIDTemp;
1959}
1960
1961llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1962 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1963 std::string Name = getName({Prefix, "var"});
1964 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1965}
1966
1967namespace {
1968/// Common pre(post)-action for different OpenMP constructs.
1969class CommonActionTy final : public PrePostActionTy {
1970 llvm::FunctionCallee EnterCallee;
1971 ArrayRef<llvm::Value *> EnterArgs;
1972 llvm::FunctionCallee ExitCallee;
1973 ArrayRef<llvm::Value *> ExitArgs;
1974 bool Conditional;
1975 llvm::BasicBlock *ContBlock = nullptr;
1976
1977public:
1978 CommonActionTy(llvm::FunctionCallee EnterCallee,
1979 ArrayRef<llvm::Value *> EnterArgs,
1980 llvm::FunctionCallee ExitCallee,
1981 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1982 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1983 ExitArgs(ExitArgs), Conditional(Conditional) {}
1984 void Enter(CodeGenFunction &CGF) override {
1985 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1986 if (Conditional) {
1987 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1988 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1989 ContBlock = CGF.createBasicBlock("omp_if.end");
1990 // Generate the branch (If-stmt)
1991 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1992 CGF.EmitBlock(ThenBlock);
1993 }
1994 }
1995 void Done(CodeGenFunction &CGF) {
1996 // Emit the rest of blocks/branches
1997 CGF.EmitBranch(ContBlock);
1998 CGF.EmitBlock(ContBlock, true);
1999 }
2000 void Exit(CodeGenFunction &CGF) override {
2001 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2002 }
2003};
2004} // anonymous namespace
2005
2007 StringRef CriticalName,
2008 const RegionCodeGenTy &CriticalOpGen,
2009 SourceLocation Loc, const Expr *Hint) {
2010 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2011 // CriticalOpGen();
2012 // __kmpc_end_critical(ident_t *, gtid, Lock);
2013 // Prepare arguments and build a call to __kmpc_critical
2014 if (!CGF.HaveInsertPoint())
2015 return;
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 getCriticalRegionLock(CriticalName)};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2019 std::end(Args));
2020 if (Hint) {
2021 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2022 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 CGM.getModule(),
2027 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2028 EnterArgs,
2029 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2030 OMPRTL___kmpc_end_critical),
2031 Args);
2032 CriticalOpGen.setAction(Action);
2033 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2034}
2035
2037 const RegionCodeGenTy &MasterOpGen,
2038 SourceLocation Loc) {
2039 if (!CGF.HaveInsertPoint())
2040 return;
2041 // if(__kmpc_master(ident_t *, gtid)) {
2042 // MasterOpGen();
2043 // __kmpc_end_master(ident_t *, gtid);
2044 // }
2045 // Prepare arguments and build a call to __kmpc_master
2046 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2047 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2048 CGM.getModule(), OMPRTL___kmpc_master),
2049 Args,
2050 OMPBuilder.getOrCreateRuntimeFunction(
2051 CGM.getModule(), OMPRTL___kmpc_end_master),
2052 Args,
2053 /*Conditional=*/true);
2054 MasterOpGen.setAction(Action);
2055 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2056 Action.Done(CGF);
2057}
2058
2060 const RegionCodeGenTy &MaskedOpGen,
2061 SourceLocation Loc, const Expr *Filter) {
2062 if (!CGF.HaveInsertPoint())
2063 return;
2064 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2065 // MaskedOpGen();
2066 // __kmpc_end_masked(iden_t *, gtid);
2067 // }
2068 // Prepare arguments and build a call to __kmpc_masked
2069 llvm::Value *FilterVal = Filter
2070 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2071 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2072 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2073 FilterVal};
2074 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2075 getThreadID(CGF, Loc)};
2076 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2077 CGM.getModule(), OMPRTL___kmpc_masked),
2078 Args,
2079 OMPBuilder.getOrCreateRuntimeFunction(
2080 CGM.getModule(), OMPRTL___kmpc_end_masked),
2081 ArgsEnd,
2082 /*Conditional=*/true);
2083 MaskedOpGen.setAction(Action);
2084 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2085 Action.Done(CGF);
2086}
2087
2089 SourceLocation Loc) {
2090 if (!CGF.HaveInsertPoint())
2091 return;
2092 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2093 OMPBuilder.createTaskyield(CGF.Builder);
2094 } else {
2095 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2096 llvm::Value *Args[] = {
2097 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2098 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2101 Args);
2102 }
2103
2104 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2105 Region->emitUntiedSwitch(CGF);
2106}
2107
2109 const RegionCodeGenTy &TaskgroupOpGen,
2110 SourceLocation Loc) {
2111 if (!CGF.HaveInsertPoint())
2112 return;
2113 // __kmpc_taskgroup(ident_t *, gtid);
2114 // TaskgroupOpGen();
2115 // __kmpc_end_taskgroup(ident_t *, gtid);
2116 // Prepare arguments and build a call to __kmpc_taskgroup
2117 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2118 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2119 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2120 Args,
2121 OMPBuilder.getOrCreateRuntimeFunction(
2122 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2123 Args);
2124 TaskgroupOpGen.setAction(Action);
2125 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2126}
2127
2128/// Given an array of pointers to variables, project the address of a
2129/// given variable.
2131 unsigned Index, const VarDecl *Var) {
2132 // Pull out the pointer to the variable.
2133 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2134 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2135
2136 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2137 return Address(
2138 CGF.Builder.CreateBitCast(
2139 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2140 ElemTy, CGF.getContext().getDeclAlign(Var));
2141}
2142
2144 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2145 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2146 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2147 SourceLocation Loc) {
2148 ASTContext &C = CGM.getContext();
2149 // void copy_func(void *LHSArg, void *RHSArg);
2150 FunctionArgList Args;
2151 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 Args.push_back(&LHSArg);
2156 Args.push_back(&RHSArg);
2157 const auto &CGFI =
2158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2159 std::string Name =
2160 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2161 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2162 llvm::GlobalValue::InternalLinkage, Name,
2163 &CGM.getModule());
2165 Fn->setDoesNotRecurse();
2166 CodeGenFunction CGF(CGM);
2167 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2168 // Dest = (void*[n])(LHSArg);
2169 // Src = (void*[n])(RHSArg);
2171 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2172 ArgsElemType->getPointerTo()),
2173 ArgsElemType, CGF.getPointerAlign());
2175 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2176 ArgsElemType->getPointerTo()),
2177 ArgsElemType, CGF.getPointerAlign());
2178 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2179 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2180 // ...
2181 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2182 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2183 const auto *DestVar =
2184 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2185 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2186
2187 const auto *SrcVar =
2188 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2189 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2190
2191 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2192 QualType Type = VD->getType();
2193 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2194 }
2195 CGF.FinishFunction();
2196 return Fn;
2197}
2198
2200 const RegionCodeGenTy &SingleOpGen,
2201 SourceLocation Loc,
2202 ArrayRef<const Expr *> CopyprivateVars,
2203 ArrayRef<const Expr *> SrcExprs,
2204 ArrayRef<const Expr *> DstExprs,
2205 ArrayRef<const Expr *> AssignmentOps) {
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 assert(CopyprivateVars.size() == SrcExprs.size() &&
2209 CopyprivateVars.size() == DstExprs.size() &&
2210 CopyprivateVars.size() == AssignmentOps.size());
2212 // int32 did_it = 0;
2213 // if(__kmpc_single(ident_t *, gtid)) {
2214 // SingleOpGen();
2215 // __kmpc_end_single(ident_t *, gtid);
2216 // did_it = 1;
2217 // }
2218 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2219 // <copy_func>, did_it);
2220
2221 Address DidIt = Address::invalid();
2222 if (!CopyprivateVars.empty()) {
2223 // int32 did_it = 0;
2224 QualType KmpInt32Ty =
2225 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2226 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2227 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2228 }
2229 // Prepare arguments and build a call to __kmpc_single
2230 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2232 CGM.getModule(), OMPRTL___kmpc_single),
2233 Args,
2234 OMPBuilder.getOrCreateRuntimeFunction(
2235 CGM.getModule(), OMPRTL___kmpc_end_single),
2236 Args,
2237 /*Conditional=*/true);
2238 SingleOpGen.setAction(Action);
2239 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2240 if (DidIt.isValid()) {
2241 // did_it = 1;
2242 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2243 }
2244 Action.Done(CGF);
2245 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2246 // <copy_func>, did_it);
2247 if (DidIt.isValid()) {
2248 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2249 QualType CopyprivateArrayTy = C.getConstantArrayType(
2250 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2251 /*IndexTypeQuals=*/0);
2252 // Create a list of all private variables for copyprivate.
2253 Address CopyprivateList =
2254 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2255 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2256 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2257 CGF.Builder.CreateStore(
2259 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2260 CGF.VoidPtrTy),
2261 Elem);
2262 }
2263 // Build function that copies private values from single region to all other
2264 // threads in the corresponding parallel region.
2265 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2266 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2267 SrcExprs, DstExprs, AssignmentOps, Loc);
2268 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2270 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2271 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2272 llvm::Value *Args[] = {
2273 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2274 getThreadID(CGF, Loc), // i32 <gtid>
2275 BufSize, // size_t <buf_size>
2276 CL.emitRawPointer(CGF), // void *<copyprivate list>
2277 CpyFn, // void (*) (void *, void *) <copy_func>
2278 DidItVal // i32 did_it
2279 };
2280 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2282 Args);
2283 }
2284}
2285
2287 const RegionCodeGenTy &OrderedOpGen,
2288 SourceLocation Loc, bool IsThreads) {
2289 if (!CGF.HaveInsertPoint())
2290 return;
2291 // __kmpc_ordered(ident_t *, gtid);
2292 // OrderedOpGen();
2293 // __kmpc_end_ordered(ident_t *, gtid);
2294 // Prepare arguments and build a call to __kmpc_ordered
2295 if (IsThreads) {
2296 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2297 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2298 CGM.getModule(), OMPRTL___kmpc_ordered),
2299 Args,
2300 OMPBuilder.getOrCreateRuntimeFunction(
2301 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2302 Args);
2303 OrderedOpGen.setAction(Action);
2304 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2305 return;
2306 }
2307 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2308}
2309
2311 unsigned Flags;
2312 if (Kind == OMPD_for)
2313 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2314 else if (Kind == OMPD_sections)
2315 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2316 else if (Kind == OMPD_single)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2318 else if (Kind == OMPD_barrier)
2319 Flags = OMP_IDENT_BARRIER_EXPL;
2320 else
2321 Flags = OMP_IDENT_BARRIER_IMPL;
2322 return Flags;
2323}
2324
2326 CodeGenFunction &CGF, const OMPLoopDirective &S,
2327 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2328 // Check if the loop directive is actually a doacross loop directive. In this
2329 // case choose static, 1 schedule.
2330 if (llvm::any_of(
2331 S.getClausesOfKind<OMPOrderedClause>(),
2332 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2333 ScheduleKind = OMPC_SCHEDULE_static;
2334 // Chunk size is 1 in this case.
2335 llvm::APInt ChunkSize(32, 1);
2336 ChunkExpr = IntegerLiteral::Create(
2337 CGF.getContext(), ChunkSize,
2338 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2339 SourceLocation());
2340 }
2341}
2342
2344 OpenMPDirectiveKind Kind, bool EmitChecks,
2345 bool ForceSimpleCall) {
2346 // Check if we should use the OMPBuilder
2347 auto *OMPRegionInfo =
2348 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2349 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2350 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2351 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2352 return;
2353 }
2354
2355 if (!CGF.HaveInsertPoint())
2356 return;
2357 // Build call __kmpc_cancel_barrier(loc, thread_id);
2358 // Build call __kmpc_barrier(loc, thread_id);
2359 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2360 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2361 // thread_id);
2362 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2363 getThreadID(CGF, Loc)};
2364 if (OMPRegionInfo) {
2365 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2366 llvm::Value *Result = CGF.EmitRuntimeCall(
2367 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2368 OMPRTL___kmpc_cancel_barrier),
2369 Args);
2370 if (EmitChecks) {
2371 // if (__kmpc_cancel_barrier()) {
2372 // exit from construct;
2373 // }
2374 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2375 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2376 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2377 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2378 CGF.EmitBlock(ExitBB);
2379 // exit from construct;
2380 CodeGenFunction::JumpDest CancelDestination =
2381 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2382 CGF.EmitBranchThroughCleanup(CancelDestination);
2383 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2384 }
2385 return;
2386 }
2387 }
2388 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2389 CGM.getModule(), OMPRTL___kmpc_barrier),
2390 Args);
2391}
2392
2394 Expr *ME, bool IsFatal) {
2395 llvm::Value *MVL =
2396 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2397 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2398 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2399 // *message)
2400 llvm::Value *Args[] = {
2401 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2402 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2403 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2404 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2405 CGM.getModule(), OMPRTL___kmpc_error),
2406 Args);
2407}
2408
2409/// Map the OpenMP loop schedule to the runtime enumeration.
2410static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2411 bool Chunked, bool Ordered) {
2412 switch (ScheduleKind) {
2413 case OMPC_SCHEDULE_static:
2414 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2415 : (Ordered ? OMP_ord_static : OMP_sch_static);
2416 case OMPC_SCHEDULE_dynamic:
2417 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2418 case OMPC_SCHEDULE_guided:
2419 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2420 case OMPC_SCHEDULE_runtime:
2421 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2422 case OMPC_SCHEDULE_auto:
2423 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2425 assert(!Chunked && "chunk was specified but schedule kind not known");
2426 return Ordered ? OMP_ord_static : OMP_sch_static;
2427 }
2428 llvm_unreachable("Unexpected runtime schedule");
2429}
2430
2431/// Map the OpenMP distribute schedule to the runtime enumeration.
2432static OpenMPSchedType
2434 // only static is allowed for dist_schedule
2435 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2436}
2437
2439 bool Chunked) const {
2440 OpenMPSchedType Schedule =
2441 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2442 return Schedule == OMP_sch_static;
2443}
2444
2446 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2447 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2448 return Schedule == OMP_dist_sch_static;
2449}
2450
2452 bool Chunked) const {
2453 OpenMPSchedType Schedule =
2454 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2455 return Schedule == OMP_sch_static_chunked;
2456}
2457
2459 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2460 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2461 return Schedule == OMP_dist_sch_static_chunked;
2462}
2463
2465 OpenMPSchedType Schedule =
2466 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2467 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2468 return Schedule != OMP_sch_static;
2469}
2470
2471static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474 int Modifier = 0;
2475 switch (M1) {
2476 case OMPC_SCHEDULE_MODIFIER_monotonic:
2477 Modifier = OMP_sch_modifier_monotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2480 Modifier = OMP_sch_modifier_nonmonotonic;
2481 break;
2482 case OMPC_SCHEDULE_MODIFIER_simd:
2483 if (Schedule == OMP_sch_static_chunked)
2484 Schedule = OMP_sch_static_balanced_chunked;
2485 break;
2488 break;
2489 }
2490 switch (M2) {
2491 case OMPC_SCHEDULE_MODIFIER_monotonic:
2492 Modifier = OMP_sch_modifier_monotonic;
2493 break;
2494 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2495 Modifier = OMP_sch_modifier_nonmonotonic;
2496 break;
2497 case OMPC_SCHEDULE_MODIFIER_simd:
2498 if (Schedule == OMP_sch_static_chunked)
2499 Schedule = OMP_sch_static_balanced_chunked;
2500 break;
2503 break;
2504 }
2505 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2506 // If the static schedule kind is specified or if the ordered clause is
2507 // specified, and if the nonmonotonic modifier is not specified, the effect is
2508 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2509 // modifier is specified, the effect is as if the nonmonotonic modifier is
2510 // specified.
2511 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2512 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2513 Schedule == OMP_sch_static_balanced_chunked ||
2514 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2515 Schedule == OMP_dist_sch_static_chunked ||
2516 Schedule == OMP_dist_sch_static))
2517 Modifier = OMP_sch_modifier_nonmonotonic;
2518 }
2519 return Schedule | Modifier;
2520}
2521
2524 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2525 bool Ordered, const DispatchRTInput &DispatchValues) {
2526 if (!CGF.HaveInsertPoint())
2527 return;
2528 OpenMPSchedType Schedule = getRuntimeSchedule(
2529 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2530 assert(Ordered ||
2531 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2532 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2533 Schedule != OMP_sch_static_balanced_chunked));
2534 // Call __kmpc_dispatch_init(
2535 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2536 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2537 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2538
2539 // If the Chunk was not specified in the clause - use default value 1.
2540 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2541 : CGF.Builder.getIntN(IVSize, 1);
2542 llvm::Value *Args[] = {
2543 emitUpdateLocation(CGF, Loc),
2544 getThreadID(CGF, Loc),
2545 CGF.Builder.getInt32(addMonoNonMonoModifier(
2546 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2547 DispatchValues.LB, // Lower
2548 DispatchValues.UB, // Upper
2549 CGF.Builder.getIntN(IVSize, 1), // Stride
2550 Chunk // Chunk
2551 };
2552 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2553 Args);
2554}
2555
2557 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2558 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2560 const CGOpenMPRuntime::StaticRTInput &Values) {
2561 if (!CGF.HaveInsertPoint())
2562 return;
2563
2564 assert(!Values.Ordered);
2565 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2566 Schedule == OMP_sch_static_balanced_chunked ||
2567 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2568 Schedule == OMP_dist_sch_static ||
2569 Schedule == OMP_dist_sch_static_chunked);
2570
2571 // Call __kmpc_for_static_init(
2572 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2573 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2574 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2575 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2576 llvm::Value *Chunk = Values.Chunk;
2577 if (Chunk == nullptr) {
2578 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2579 Schedule == OMP_dist_sch_static) &&
2580 "expected static non-chunked schedule");
2581 // If the Chunk was not specified in the clause - use default value 1.
2582 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2583 } else {
2584 assert((Schedule == OMP_sch_static_chunked ||
2585 Schedule == OMP_sch_static_balanced_chunked ||
2586 Schedule == OMP_ord_static_chunked ||
2587 Schedule == OMP_dist_sch_static_chunked) &&
2588 "expected static chunked schedule");
2589 }
2590 llvm::Value *Args[] = {
2591 UpdateLocation,
2592 ThreadId,
2593 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2594 M2)), // Schedule type
2595 Values.IL.emitRawPointer(CGF), // &isLastIter
2596 Values.LB.emitRawPointer(CGF), // &LB
2597 Values.UB.emitRawPointer(CGF), // &UB
2598 Values.ST.emitRawPointer(CGF), // &Stride
2599 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2600 Chunk // Chunk
2601 };
2602 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2603}
2604
2606 SourceLocation Loc,
2607 OpenMPDirectiveKind DKind,
2608 const OpenMPScheduleTy &ScheduleKind,
2609 const StaticRTInput &Values) {
2610 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2611 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2612 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2613 "Expected loop-based or sections-based directive.");
2614 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2616 ? OMP_IDENT_WORK_LOOP
2617 : OMP_IDENT_WORK_SECTIONS);
2618 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2619 llvm::FunctionCallee StaticInitFunction =
2620 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2621 false);
2623 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2624 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2625}
2626
2630 const CGOpenMPRuntime::StaticRTInput &Values) {
2631 OpenMPSchedType ScheduleNum =
2632 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2633 llvm::Value *UpdatedLocation =
2634 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2635 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2636 llvm::FunctionCallee StaticInitFunction;
2637 bool isGPUDistribute =
2638 CGM.getLangOpts().OpenMPIsTargetDevice &&
2639 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2640 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2641 Values.IVSize, Values.IVSigned, isGPUDistribute);
2642
2643 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2644 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2646}
2647
2649 SourceLocation Loc,
2650 OpenMPDirectiveKind DKind) {
2651 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2652 DKind == OMPD_sections) &&
2653 "Expected distribute, for, or sections directive kind");
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc,
2660 (DKind == OMPD_target_teams_loop)
2661 ? OMP_IDENT_WORK_DISTRIBUTE
2662 : isOpenMPLoopDirective(DKind)
2663 ? OMP_IDENT_WORK_LOOP
2664 : OMP_IDENT_WORK_SECTIONS),
2665 getThreadID(CGF, Loc)};
2667 if (isOpenMPDistributeDirective(DKind) &&
2668 CGM.getLangOpts().OpenMPIsTargetDevice &&
2669 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2670 CGF.EmitRuntimeCall(
2671 OMPBuilder.getOrCreateRuntimeFunction(
2672 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2673 Args);
2674 else
2675 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2676 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2677 Args);
2678}
2679
2681 SourceLocation Loc,
2682 unsigned IVSize,
2683 bool IVSigned) {
2684 if (!CGF.HaveInsertPoint())
2685 return;
2686 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2687 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2688 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2689 Args);
2690}
2691
2693 SourceLocation Loc, unsigned IVSize,
2694 bool IVSigned, Address IL,
2695 Address LB, Address UB,
2696 Address ST) {
2697 // Call __kmpc_dispatch_next(
2698 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2699 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2700 // kmp_int[32|64] *p_stride);
2701 llvm::Value *Args[] = {
2702 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2703 IL.emitRawPointer(CGF), // &isLastIter
2704 LB.emitRawPointer(CGF), // &Lower
2705 UB.emitRawPointer(CGF), // &Upper
2706 ST.emitRawPointer(CGF) // &Stride
2707 };
2708 llvm::Value *Call = CGF.EmitRuntimeCall(
2709 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2710 return CGF.EmitScalarConversion(
2711 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2712 CGF.getContext().BoolTy, Loc);
2713}
2714
2716 llvm::Value *NumThreads,
2717 SourceLocation Loc) {
2718 if (!CGF.HaveInsertPoint())
2719 return;
2720 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2721 llvm::Value *Args[] = {
2722 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2723 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2724 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2725 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2726 Args);
2727}
2728
2730 ProcBindKind ProcBind,
2731 SourceLocation Loc) {
2732 if (!CGF.HaveInsertPoint())
2733 return;
2734 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2735 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2736 llvm::Value *Args[] = {
2737 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2738 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2741 Args);
2742}
2743
2745 SourceLocation Loc, llvm::AtomicOrdering AO) {
2746 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2747 OMPBuilder.createFlush(CGF.Builder);
2748 } else {
2749 if (!CGF.HaveInsertPoint())
2750 return;
2751 // Build call void __kmpc_flush(ident_t *loc)
2752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2753 CGM.getModule(), OMPRTL___kmpc_flush),
2754 emitUpdateLocation(CGF, Loc));
2755 }
2756}
2757
2758namespace {
2759/// Indexes of fields for type kmp_task_t.
2760enum KmpTaskTFields {
2761 /// List of shared variables.
2762 KmpTaskTShareds,
2763 /// Task routine.
2764 KmpTaskTRoutine,
2765 /// Partition id for the untied tasks.
2766 KmpTaskTPartId,
2767 /// Function with call of destructors for private variables.
2768 Data1,
2769 /// Task priority.
2770 Data2,
2771 /// (Taskloops only) Lower bound.
2772 KmpTaskTLowerBound,
2773 /// (Taskloops only) Upper bound.
2774 KmpTaskTUpperBound,
2775 /// (Taskloops only) Stride.
2776 KmpTaskTStride,
2777 /// (Taskloops only) Is last iteration flag.
2778 KmpTaskTLastIter,
2779 /// (Taskloops only) Reduction data.
2780 KmpTaskTReductions,
2781};
2782} // anonymous namespace
2783
2785 // If we are in simd mode or there are no entries, we don't need to do
2786 // anything.
2787 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2788 return;
2789
2790 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2791 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2792 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2793 SourceLocation Loc;
2794 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2795 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2797 I != E; ++I) {
2798 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2799 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2801 I->getFirst(), EntryInfo.Line, 1);
2802 break;
2803 }
2804 }
2805 }
2806 switch (Kind) {
2807 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2808 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2809 DiagnosticsEngine::Error, "Offloading entry for target region in "
2810 "%0 is incorrect: either the "
2811 "address or the ID is invalid.");
2812 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2813 } break;
2814 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2815 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2816 DiagnosticsEngine::Error, "Offloading entry for declare target "
2817 "variable %0 is incorrect: the "
2818 "address is invalid.");
2819 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2820 } break;
2821 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2822 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2824 "Offloading entry for declare target variable is incorrect: the "
2825 "address is invalid.");
2826 CGM.getDiags().Report(DiagID);
2827 } break;
2828 }
2829 };
2830
2831 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2832}
2833
2835 if (!KmpRoutineEntryPtrTy) {
2836 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2838 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2840 KmpRoutineEntryPtrQTy = C.getPointerType(
2841 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2843 }
2844}
2845
2846namespace {
2847struct PrivateHelpersTy {
2848 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2849 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2850 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2851 PrivateElemInit(PrivateElemInit) {}
2852 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2853 const Expr *OriginalRef = nullptr;
2854 const VarDecl *Original = nullptr;
2855 const VarDecl *PrivateCopy = nullptr;
2856 const VarDecl *PrivateElemInit = nullptr;
2857 bool isLocalPrivate() const {
2858 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2859 }
2860};
2861typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2862} // anonymous namespace
2863
2864static bool isAllocatableDecl(const VarDecl *VD) {
2865 const VarDecl *CVD = VD->getCanonicalDecl();
2866 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2867 return false;
2868 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2869 // Use the default allocation.
2870 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2871 !AA->getAllocator());
2872}
2873
2874static RecordDecl *
2876 if (!Privates.empty()) {
2877 ASTContext &C = CGM.getContext();
2878 // Build struct .kmp_privates_t. {
2879 // /* private vars */
2880 // };
2881 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2882 RD->startDefinition();
2883 for (const auto &Pair : Privates) {
2884 const VarDecl *VD = Pair.second.Original;
2886 // If the private variable is a local variable with lvalue ref type,
2887 // allocate the pointer instead of the pointee type.
2888 if (Pair.second.isLocalPrivate()) {
2889 if (VD->getType()->isLValueReferenceType())
2890 Type = C.getPointerType(Type);
2891 if (isAllocatableDecl(VD))
2892 Type = C.getPointerType(Type);
2893 }
2895 if (VD->hasAttrs()) {
2896 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2897 E(VD->getAttrs().end());
2898 I != E; ++I)
2899 FD->addAttr(*I);
2900 }
2901 }
2902 RD->completeDefinition();
2903 return RD;
2904 }
2905 return nullptr;
2906}
2907
2908static RecordDecl *
2910 QualType KmpInt32Ty,
2911 QualType KmpRoutineEntryPointerQTy) {
2912 ASTContext &C = CGM.getContext();
2913 // Build struct kmp_task_t {
2914 // void * shareds;
2915 // kmp_routine_entry_t routine;
2916 // kmp_int32 part_id;
2917 // kmp_cmplrdata_t data1;
2918 // kmp_cmplrdata_t data2;
2919 // For taskloops additional fields:
2920 // kmp_uint64 lb;
2921 // kmp_uint64 ub;
2922 // kmp_int64 st;
2923 // kmp_int32 liter;
2924 // void * reductions;
2925 // };
2926 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2927 UD->startDefinition();
2928 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2930 UD->completeDefinition();
2931 QualType KmpCmplrdataTy = C.getRecordType(UD);
2932 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2933 RD->startDefinition();
2934 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2935 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2936 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2938 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2939 if (isOpenMPTaskLoopDirective(Kind)) {
2940 QualType KmpUInt64Ty =
2941 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2942 QualType KmpInt64Ty =
2943 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2944 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2945 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2946 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2949 }
2950 RD->completeDefinition();
2951 return RD;
2952}
2953
2954static RecordDecl *
2956 ArrayRef<PrivateDataTy> Privates) {
2957 ASTContext &C = CGM.getContext();
2958 // Build struct kmp_task_t_with_privates {
2959 // kmp_task_t task_data;
2960 // .kmp_privates_t. privates;
2961 // };
2962 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2963 RD->startDefinition();
2964 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2965 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2966 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2967 RD->completeDefinition();
2968 return RD;
2969}
2970
2971/// Emit a proxy function which accepts kmp_task_t as the second
2972/// argument.
2973/// \code
2974/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2975/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2976/// For taskloops:
2977/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2978/// tt->reductions, tt->shareds);
2979/// return 0;
2980/// }
2981/// \endcode
2982static llvm::Function *
2984 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2985 QualType KmpTaskTWithPrivatesPtrQTy,
2986 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2987 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2988 llvm::Value *TaskPrivatesMap) {
2989 ASTContext &C = CGM.getContext();
2990 FunctionArgList Args;
2991 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2993 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2994 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2996 Args.push_back(&GtidArg);
2997 Args.push_back(&TaskTypeArg);
2998 const auto &TaskEntryFnInfo =
2999 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3000 llvm::FunctionType *TaskEntryTy =
3001 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3002 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3003 auto *TaskEntry = llvm::Function::Create(
3004 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3005 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3006 TaskEntry->setDoesNotRecurse();
3007 CodeGenFunction CGF(CGM);
3008 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3009 Loc, Loc);
3010
3011 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3012 // tt,
3013 // For taskloops:
3014 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3015 // tt->task_data.shareds);
3016 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3017 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3018 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3019 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3020 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3021 const auto *KmpTaskTWithPrivatesQTyRD =
3022 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3023 LValue Base =
3024 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3025 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3026 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3027 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3028 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3029
3030 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3031 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3032 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3034 CGF.ConvertTypeForMem(SharedsPtrTy));
3035
3036 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3037 llvm::Value *PrivatesParam;
3038 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3039 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3040 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3041 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3042 } else {
3043 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3044 }
3045
3046 llvm::Value *CommonArgs[] = {
3047 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3048 CGF.Builder
3050 CGF.VoidPtrTy, CGF.Int8Ty)
3051 .emitRawPointer(CGF)};
3052 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3053 std::end(CommonArgs));
3054 if (isOpenMPTaskLoopDirective(Kind)) {
3055 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3056 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3057 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3058 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3059 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3060 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3061 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3062 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3063 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3064 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3065 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3066 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3067 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3068 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3069 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3070 CallArgs.push_back(LBParam);
3071 CallArgs.push_back(UBParam);
3072 CallArgs.push_back(StParam);
3073 CallArgs.push_back(LIParam);
3074 CallArgs.push_back(RParam);
3075 }
3076 CallArgs.push_back(SharedsParam);
3077
3078 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3079 CallArgs);
3080 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3081 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3082 CGF.FinishFunction();
3083 return TaskEntry;
3084}
3085
3087 SourceLocation Loc,
3088 QualType KmpInt32Ty,
3089 QualType KmpTaskTWithPrivatesPtrQTy,
3090 QualType KmpTaskTWithPrivatesQTy) {
3091 ASTContext &C = CGM.getContext();
3092 FunctionArgList Args;
3093 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3095 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3096 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3098 Args.push_back(&GtidArg);
3099 Args.push_back(&TaskTypeArg);
3100 const auto &DestructorFnInfo =
3101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3102 llvm::FunctionType *DestructorFnTy =
3103 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3104 std::string Name =
3105 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3106 auto *DestructorFn =
3107 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3108 Name, &CGM.getModule());
3109 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3110 DestructorFnInfo);
3111 DestructorFn->setDoesNotRecurse();
3112 CodeGenFunction CGF(CGM);
3113 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3114 Args, Loc, Loc);
3115
3117 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3118 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3119 const auto *KmpTaskTWithPrivatesQTyRD =
3120 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3121 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3122 Base = CGF.EmitLValueForField(Base, *FI);
3123 for (const auto *Field :
3124 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3125 if (QualType::DestructionKind DtorKind =
3126 Field->getType().isDestructedType()) {
3127 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3128 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3129 }
3130 }
3131 CGF.FinishFunction();
3132 return DestructorFn;
3133}
3134
3135/// Emit a privates mapping function for correct handling of private and
3136/// firstprivate variables.
3137/// \code
3138/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3139/// **noalias priv1,..., <tyn> **noalias privn) {
3140/// *priv1 = &.privates.priv1;
3141/// ...;
3142/// *privn = &.privates.privn;
3143/// }
3144/// \endcode
3145static llvm::Value *
3147 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3148 ArrayRef<PrivateDataTy> Privates) {
3149 ASTContext &C = CGM.getContext();
3150 FunctionArgList Args;
3151 ImplicitParamDecl TaskPrivatesArg(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3155 Args.push_back(&TaskPrivatesArg);
3156 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3157 unsigned Counter = 1;
3158 for (const Expr *E : Data.PrivateVars) {
3159 Args.push_back(ImplicitParamDecl::Create(
3160 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3161 C.getPointerType(C.getPointerType(E->getType()))
3162 .withConst()
3163 .withRestrict(),
3165 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3166 PrivateVarsPos[VD] = Counter;
3167 ++Counter;
3168 }
3169 for (const Expr *E : Data.FirstprivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.LastprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const VarDecl *VD : Data.PrivateLocals) {
3193 if (VD->getType()->isLValueReferenceType())
3194 Ty = C.getPointerType(Ty);
3195 if (isAllocatableDecl(VD))
3196 Ty = C.getPointerType(Ty);
3197 Args.push_back(ImplicitParamDecl::Create(
3198 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3199 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3201 PrivateVarsPos[VD] = Counter;
3202 ++Counter;
3203 }
3204 const auto &TaskPrivatesMapFnInfo =
3205 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3206 llvm::FunctionType *TaskPrivatesMapTy =
3207 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3208 std::string Name =
3209 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3210 auto *TaskPrivatesMap = llvm::Function::Create(
3211 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3212 &CGM.getModule());
3213 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3214 TaskPrivatesMapFnInfo);
3215 if (CGM.getLangOpts().Optimize) {
3216 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3217 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3218 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3219 }
3220 CodeGenFunction CGF(CGM);
3221 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3222 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3223
3224 // *privi = &.privates.privi;
3226 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3227 TaskPrivatesArg.getType()->castAs<PointerType>());
3228 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3229 Counter = 0;
3230 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3231 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3232 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3233 LValue RefLVal =
3234 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3235 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3236 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3237 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3238 ++Counter;
3239 }
3240 CGF.FinishFunction();
3241 return TaskPrivatesMap;
3242}
3243
3244/// Emit initialization for private variables in task-based directives.
3246 const OMPExecutableDirective &D,
3247 Address KmpTaskSharedsPtr, LValue TDBase,
3248 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3249 QualType SharedsTy, QualType SharedsPtrTy,
3250 const OMPTaskDataTy &Data,
3251 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3252 ASTContext &C = CGF.getContext();
3253 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3254 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3256 ? OMPD_taskloop
3257 : OMPD_task;
3258 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3259 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3260 LValue SrcBase;
3261 bool IsTargetTask =
3264 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3265 // PointersArray, SizesArray, and MappersArray. The original variables for
3266 // these arrays are not captured and we get their addresses explicitly.
3267 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3268 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3269 SrcBase = CGF.MakeAddrLValue(
3271 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3272 CGF.ConvertTypeForMem(SharedsTy)),
3273 SharedsTy);
3274 }
3275 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3276 for (const PrivateDataTy &Pair : Privates) {
3277 // Do not initialize private locals.
3278 if (Pair.second.isLocalPrivate()) {
3279 ++FI;
3280 continue;
3281 }
3282 const VarDecl *VD = Pair.second.PrivateCopy;
3283 const Expr *Init = VD->getAnyInitializer();
3284 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3285 !CGF.isTrivialInitializer(Init)))) {
3286 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3287 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3288 const VarDecl *OriginalVD = Pair.second.Original;
3289 // Check if the variable is the target-based BasePointersArray,
3290 // PointersArray, SizesArray, or MappersArray.
3291 LValue SharedRefLValue;
3292 QualType Type = PrivateLValue.getType();
3293 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3294 if (IsTargetTask && !SharedField) {
3295 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3296 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3297 cast<CapturedDecl>(OriginalVD->getDeclContext())
3298 ->getNumParams() == 0 &&
3299 isa<TranslationUnitDecl>(
3300 cast<CapturedDecl>(OriginalVD->getDeclContext())
3301 ->getDeclContext()) &&
3302 "Expected artificial target data variable.");
3303 SharedRefLValue =
3304 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3305 } else if (ForDup) {
3306 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3307 SharedRefLValue = CGF.MakeAddrLValue(
3308 SharedRefLValue.getAddress(CGF).withAlignment(
3309 C.getDeclAlign(OriginalVD)),
3310 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3311 SharedRefLValue.getTBAAInfo());
3312 } else if (CGF.LambdaCaptureFields.count(
3313 Pair.second.Original->getCanonicalDecl()) > 0 ||
3314 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3315 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3316 } else {
3317 // Processing for implicitly captured variables.
3318 InlinedOpenMPRegionRAII Region(
3319 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3320 /*HasCancel=*/false, /*NoInheritance=*/true);
3321 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3322 }
3323 if (Type->isArrayType()) {
3324 // Initialize firstprivate array.
3325 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3326 // Perform simple memcpy.
3327 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3328 } else {
3329 // Initialize firstprivate array using element-by-element
3330 // initialization.
3332 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3333 Type,
3334 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3335 Address SrcElement) {
3336 // Clean up any temporaries needed by the initialization.
3337 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3338 InitScope.addPrivate(Elem, SrcElement);
3339 (void)InitScope.Privatize();
3340 // Emit initialization for single element.
3341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3342 CGF, &CapturesInfo);
3343 CGF.EmitAnyExprToMem(Init, DestElement,
3344 Init->getType().getQualifiers(),
3345 /*IsInitializer=*/false);
3346 });
3347 }
3348 } else {
3349 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3350 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3351 (void)InitScope.Privatize();
3352 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3353 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3354 /*capturedByInit=*/false);
3355 }
3356 } else {
3357 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3358 }
3359 }
3360 ++FI;
3361 }
3362}
3363
3364/// Check if duplication function is required for taskloops.
3366 ArrayRef<PrivateDataTy> Privates) {
3367 bool InitRequired = false;
3368 for (const PrivateDataTy &Pair : Privates) {
3369 if (Pair.second.isLocalPrivate())
3370 continue;
3371 const VarDecl *VD = Pair.second.PrivateCopy;
3372 const Expr *Init = VD->getAnyInitializer();
3373 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3375 if (InitRequired)
3376 break;
3377 }
3378 return InitRequired;
3379}
3380
3381
3382/// Emit task_dup function (for initialization of
3383/// private/firstprivate/lastprivate vars and last_iter flag)
3384/// \code
3385/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3386/// lastpriv) {
3387/// // setup lastprivate flag
3388/// task_dst->last = lastpriv;
3389/// // could be constructor calls here...
3390/// }
3391/// \endcode
3392static llvm::Value *
3394 const OMPExecutableDirective &D,
3395 QualType KmpTaskTWithPrivatesPtrQTy,
3396 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3397 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3398 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3399 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3400 ASTContext &C = CGM.getContext();
3401 FunctionArgList Args;
3402 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3403 KmpTaskTWithPrivatesPtrQTy,
3405 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3406 KmpTaskTWithPrivatesPtrQTy,
3408 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3410 Args.push_back(&DstArg);
3411 Args.push_back(&SrcArg);
3412 Args.push_back(&LastprivArg);
3413 const auto &TaskDupFnInfo =
3414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3415 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3416 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3417 auto *TaskDup = llvm::Function::Create(
3418 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3419 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3420 TaskDup->setDoesNotRecurse();
3421 CodeGenFunction CGF(CGM);
3422 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3423 Loc);
3424
3425 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3426 CGF.GetAddrOfLocalVar(&DstArg),
3427 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3428 // task_dst->liter = lastpriv;
3429 if (WithLastIter) {
3430 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3432 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3433 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3434 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3435 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3436 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3437 }
3438
3439 // Emit initial values for private copies (if any).
3440 assert(!Privates.empty());
3441 Address KmpTaskSharedsPtr = Address::invalid();
3442 if (!Data.FirstprivateVars.empty()) {
3443 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3444 CGF.GetAddrOfLocalVar(&SrcArg),
3445 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3447 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3448 KmpTaskSharedsPtr = Address(
3450 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3451 KmpTaskTShareds)),
3452 Loc),
3453 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3454 }
3455 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3456 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3457 CGF.FinishFunction();
3458 return TaskDup;
3459}
3460
3461/// Checks if destructor function is required to be generated.
3462/// \return true if cleanups are required, false otherwise.
3463static bool
3464checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3465 ArrayRef<PrivateDataTy> Privates) {
3466 for (const PrivateDataTy &P : Privates) {
3467 if (P.second.isLocalPrivate())
3468 continue;
3469 QualType Ty = P.second.Original->getType().getNonReferenceType();
3470 if (Ty.isDestructedType())
3471 return true;
3472 }
3473 return false;
3474}
3475
3476namespace {
3477/// Loop generator for OpenMP iterator expression.
3478class OMPIteratorGeneratorScope final
3479 : public CodeGenFunction::OMPPrivateScope {
3480 CodeGenFunction &CGF;
3481 const OMPIteratorExpr *E = nullptr;
3484 OMPIteratorGeneratorScope() = delete;
3485 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3486
3487public:
3488 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3489 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3490 if (!E)
3491 return;
3493 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3494 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3495 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3496 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3497 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3498 addPrivate(
3499 HelperData.CounterVD,
3500 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3501 }
3502 Privatize();
3503
3504 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3505 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3506 LValue CLVal =
3507 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3508 HelperData.CounterVD->getType());
3509 // Counter = 0;
3511 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3512 CLVal);
3513 CodeGenFunction::JumpDest &ContDest =
3514 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3515 CodeGenFunction::JumpDest &ExitDest =
3516 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3517 // N = <number-of_iterations>;
3518 llvm::Value *N = Uppers[I];
3519 // cont:
3520 // if (Counter < N) goto body; else goto exit;
3521 CGF.EmitBlock(ContDest.getBlock());
3522 auto *CVal =
3523 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3524 llvm::Value *Cmp =
3526 ? CGF.Builder.CreateICmpSLT(CVal, N)
3527 : CGF.Builder.CreateICmpULT(CVal, N);
3528 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3529 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3530 // body:
3531 CGF.EmitBlock(BodyBB);
3532 // Iteri = Begini + Counter * Stepi;
3533 CGF.EmitIgnoredExpr(HelperData.Update);
3534 }
3535 }
3536 ~OMPIteratorGeneratorScope() {
3537 if (!E)
3538 return;
3539 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3540 // Counter = Counter + 1;
3541 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3542 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3543 // goto cont;
3544 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3545 // exit:
3546 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3547 }
3548 }
3549};
3550} // namespace
3551
3552static std::pair<llvm::Value *, llvm::Value *>
3554 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3555 llvm::Value *Addr;
3556 if (OASE) {
3557 const Expr *Base = OASE->getBase();
3558 Addr = CGF.EmitScalarExpr(Base);
3559 } else {
3560 Addr = CGF.EmitLValue(E).getPointer(CGF);
3561 }
3562 llvm::Value *SizeVal;
3563 QualType Ty = E->getType();
3564 if (OASE) {
3565 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3566 for (const Expr *SE : OASE->getDimensions()) {
3567 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3568 Sz = CGF.EmitScalarConversion(
3569 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3570 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3571 }
3572 } else if (const auto *ASE =
3573 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3574 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3575 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3576 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3577 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3578 /*Idx0=*/1);
3579 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3580 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3581 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3582 } else {
3583 SizeVal = CGF.getTypeSize(Ty);
3584 }
3585 return std::make_pair(Addr, SizeVal);
3586}
3587
3588/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3589static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3590 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3591 if (KmpTaskAffinityInfoTy.isNull()) {
3592 RecordDecl *KmpAffinityInfoRD =
3593 C.buildImplicitRecord("kmp_task_affinity_info_t");
3594 KmpAffinityInfoRD->startDefinition();
3595 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3596 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3597 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3598 KmpAffinityInfoRD->completeDefinition();
3599 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3600 }
3601}
3602
3605 const OMPExecutableDirective &D,
3606 llvm::Function *TaskFunction, QualType SharedsTy,
3607 Address Shareds, const OMPTaskDataTy &Data) {
3610 // Aggregate privates and sort them by the alignment.
3611 const auto *I = Data.PrivateCopies.begin();
3612 for (const Expr *E : Data.PrivateVars) {
3613 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3614 Privates.emplace_back(
3615 C.getDeclAlign(VD),
3616 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3617 /*PrivateElemInit=*/nullptr));
3618 ++I;
3619 }
3620 I = Data.FirstprivateCopies.begin();
3621 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3622 for (const Expr *E : Data.FirstprivateVars) {
3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624 Privates.emplace_back(
3625 C.getDeclAlign(VD),
3626 PrivateHelpersTy(
3627 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3628 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3629 ++I;
3630 ++IElemInitRef;
3631 }
3632 I = Data.LastprivateCopies.begin();
3633 for (const Expr *E : Data.LastprivateVars) {
3634 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3635 Privates.emplace_back(
3636 C.getDeclAlign(VD),
3637 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638 /*PrivateElemInit=*/nullptr));
3639 ++I;
3640 }
3641 for (const VarDecl *VD : Data.PrivateLocals) {
3642 if (isAllocatableDecl(VD))
3643 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3644 else
3645 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3646 }
3647 llvm::stable_sort(Privates,
3648 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3649 return L.first > R.first;
3650 });
3651 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3652 // Build type kmp_routine_entry_t (if not built yet).
3653 emitKmpRoutineEntryT(KmpInt32Ty);
3654 // Build type kmp_task_t (if not built yet).
3658 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3659 }
3661 } else {
3662 assert((D.getDirectiveKind() == OMPD_task ||
3665 "Expected taskloop, task or target directive");
3666 if (SavedKmpTaskTQTy.isNull()) {
3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669 }
3671 }
3672 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3673 // Build particular struct kmp_task_t for the given task.
3674 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3676 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3677 QualType KmpTaskTWithPrivatesPtrQTy =
3678 C.getPointerType(KmpTaskTWithPrivatesQTy);
3679 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3680 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3681 KmpTaskTWithPrivatesTy->getPointerTo();
3682 llvm::Value *KmpTaskTWithPrivatesTySize =
3683 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3684 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3685
3686 // Emit initial values for private copies (if any).
3687 llvm::Value *TaskPrivatesMap = nullptr;
3688 llvm::Type *TaskPrivatesMapTy =
3689 std::next(TaskFunction->arg_begin(), 3)->getType();
3690 if (!Privates.empty()) {
3691 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3692 TaskPrivatesMap =
3693 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3694 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3695 TaskPrivatesMap, TaskPrivatesMapTy);
3696 } else {
3697 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3698 cast<llvm::PointerType>(TaskPrivatesMapTy));
3699 }
3700 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3701 // kmp_task_t *tt);
3702 llvm::Function *TaskEntry = emitProxyTaskFunction(
3703 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3704 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3705 TaskPrivatesMap);
3706
3707 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3708 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3709 // kmp_routine_entry_t *task_entry);
3710 // Task flags. Format is taken from
3711 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3712 // description of kmp_tasking_flags struct.
3713 enum {
3714 TiedFlag = 0x1,
3715 FinalFlag = 0x2,
3716 DestructorsFlag = 0x8,
3717 PriorityFlag = 0x20,
3718 DetachableFlag = 0x40,
3719 };
3720 unsigned Flags = Data.Tied ? TiedFlag : 0;
3721 bool NeedsCleanup = false;
3722 if (!Privates.empty()) {
3723 NeedsCleanup =
3724 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3725 if (NeedsCleanup)
3726 Flags = Flags | DestructorsFlag;
3727 }
3728 if (Data.Priority.getInt())
3729 Flags = Flags | PriorityFlag;
3731 Flags = Flags | DetachableFlag;
3732 llvm::Value *TaskFlags =
3733 Data.Final.getPointer()
3734 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3735 CGF.Builder.getInt32(FinalFlag),
3736 CGF.Builder.getInt32(/*C=*/0))
3737 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3738 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3739 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3741 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3743 TaskEntry, KmpRoutineEntryPtrTy)};
3744 llvm::Value *NewTask;
3746 // Check if we have any device clause associated with the directive.
3747 const Expr *Device = nullptr;
3748 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3749 Device = C->getDevice();
3750 // Emit device ID if any otherwise use default value.
3751 llvm::Value *DeviceID;
3752 if (Device)
3753 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3754 CGF.Int64Ty, /*isSigned=*/true);
3755 else
3756 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3757 AllocArgs.push_back(DeviceID);
3758 NewTask = CGF.EmitRuntimeCall(
3759 OMPBuilder.getOrCreateRuntimeFunction(
3760 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3761 AllocArgs);
3762 } else {
3763 NewTask =
3764 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3765 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3766 AllocArgs);
3767 }
3768 // Emit detach clause initialization.
3769 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3770 // task_descriptor);
3771 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3772 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3773 LValue EvtLVal = CGF.EmitLValue(Evt);
3774
3775 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3776 // int gtid, kmp_task_t *task);
3777 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3778 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3779 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3780 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3781 OMPBuilder.getOrCreateRuntimeFunction(
3782 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3783 {Loc, Tid, NewTask});
3784 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3785 Evt->getExprLoc());
3786 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3787 }
3788 // Process affinity clauses.
3790 // Process list of affinity data.
3792 Address AffinitiesArray = Address::invalid();
3793 // Calculate number of elements to form the array of affinity data.
3794 llvm::Value *NumOfElements = nullptr;
3795 unsigned NumAffinities = 0;
3796 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3797 if (const Expr *Modifier = C->getModifier()) {
3798 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3799 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3800 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3801 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3802 NumOfElements =
3803 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3804 }
3805 } else {
3806 NumAffinities += C->varlist_size();
3807 }
3808 }
3810 // Fields ids in kmp_task_affinity_info record.
3811 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3812
3813 QualType KmpTaskAffinityInfoArrayTy;
3814 if (NumOfElements) {
3815 NumOfElements = CGF.Builder.CreateNUWAdd(
3816 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3817 auto *OVE = new (C) OpaqueValueExpr(
3818 Loc,
3819 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3820 VK_PRValue);
3821 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3822 RValue::get(NumOfElements));
3823 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3825 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3826 // Properly emit variable-sized array.
3827 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3829 CGF.EmitVarDecl(*PD);
3830 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3831 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3832 /*isSigned=*/false);
3833 } else {
3834 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3836 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3837 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3838 AffinitiesArray =
3839 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3840 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3841 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3842 /*isSigned=*/false);
3843 }
3844
3845 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3846 // Fill array by elements without iterators.
3847 unsigned Pos = 0;
3848 bool HasIterator = false;
3849 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3850 if (C->getModifier()) {
3851 HasIterator = true;
3852 continue;
3853 }
3854 for (const Expr *E : C->varlists()) {
3855 llvm::Value *Addr;
3856 llvm::Value *Size;
3857 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3858 LValue Base =
3859 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3861 // affs[i].base_addr = &<Affinities[i].second>;
3862 LValue BaseAddrLVal = CGF.EmitLValueForField(
3863 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3864 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3865 BaseAddrLVal);
3866 // affs[i].len = sizeof(<Affinities[i].second>);
3867 LValue LenLVal = CGF.EmitLValueForField(
3868 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3869 CGF.EmitStoreOfScalar(Size, LenLVal);
3870 ++Pos;
3871 }
3872 }
3873 LValue PosLVal;
3874 if (HasIterator) {
3875 PosLVal = CGF.MakeAddrLValue(
3876 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3877 C.getSizeType());
3878 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3879 }
3880 // Process elements with iterators.
3881 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3882 const Expr *Modifier = C->getModifier();
3883 if (!Modifier)
3884 continue;
3885 OMPIteratorGeneratorScope IteratorScope(
3886 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3887 for (const Expr *E : C->varlists()) {
3888 llvm::Value *Addr;
3889 llvm::Value *Size;
3890 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3891 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3892 LValue Base =
3893 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3895 // affs[i].base_addr = &<Affinities[i].second>;
3896 LValue BaseAddrLVal = CGF.EmitLValueForField(
3897 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3898 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3899 BaseAddrLVal);
3900 // affs[i].len = sizeof(<Affinities[i].second>);
3901 LValue LenLVal = CGF.EmitLValueForField(
3902 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3903 CGF.EmitStoreOfScalar(Size, LenLVal);
3904 Idx = CGF.Builder.CreateNUWAdd(
3905 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3906 CGF.EmitStoreOfScalar(Idx, PosLVal);
3907 }
3908 }
3909 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3910 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3911 // naffins, kmp_task_affinity_info_t *affin_list);
3912 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3913 llvm::Value *GTid = getThreadID(CGF, Loc);
3914 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3915 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3916 // FIXME: Emit the function and ignore its result for now unless the
3917 // runtime function is properly implemented.
3918 (void)CGF.EmitRuntimeCall(
3919 OMPBuilder.getOrCreateRuntimeFunction(
3920 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3921 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3922 }
3923 llvm::Value *NewTaskNewTaskTTy =
3925 NewTask, KmpTaskTWithPrivatesPtrTy);
3926 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3927 KmpTaskTWithPrivatesQTy);
3928 LValue TDBase =
3929 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3930 // Fill the data in the resulting kmp_task_t record.
3931 // Copy shareds if there are any.
3932 Address KmpTaskSharedsPtr = Address::invalid();
3933 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3934 KmpTaskSharedsPtr = Address(
3935 CGF.EmitLoadOfScalar(
3937 TDBase,
3938 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3939 Loc),
3940 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3941 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3942 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3943 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3944 }
3945 // Emit initial values for private copies (if any).
3947 if (!Privates.empty()) {
3948 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3949 SharedsTy, SharedsPtrTy, Data, Privates,
3950 /*ForDup=*/false);
3952 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3953 Result.TaskDupFn = emitTaskDupFunction(
3954 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3955 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3956 /*WithLastIter=*/!Data.LastprivateVars.empty());
3957 }
3958 }
3959 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3960 enum { Priority = 0, Destructors = 1 };
3961 // Provide pointer to function with destructors for privates.
3962 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3963 const RecordDecl *KmpCmplrdataUD =
3964 (*FI)->getType()->getAsUnionType()->getDecl();
3965 if (NeedsCleanup) {
3966 llvm::Value *DestructorFn = emitDestructorsFunction(
3967 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3968 KmpTaskTWithPrivatesQTy);
3969 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3970 LValue DestructorsLV = CGF.EmitLValueForField(
3971 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3973 DestructorFn, KmpRoutineEntryPtrTy),
3974 DestructorsLV);
3975 }
3976 // Set priority.
3977 if (Data.Priority.getInt()) {
3978 LValue Data2LV = CGF.EmitLValueForField(
3979 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3980 LValue PriorityLV = CGF.EmitLValueForField(
3981 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3982 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3983 }
3984 Result.NewTask = NewTask;
3985 Result.TaskEntry = TaskEntry;
3986 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3987 Result.TDBase = TDBase;
3988 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3989 return Result;
3990}
3991
3992/// Translates internal dependency kind into the runtime kind.
3994 RTLDependenceKindTy DepKind;
3995 switch (K) {
3996 case OMPC_DEPEND_in:
3997 DepKind = RTLDependenceKindTy::DepIn;
3998 break;
3999 // Out and InOut dependencies must use the same code.
4000 case OMPC_DEPEND_out:
4001 case OMPC_DEPEND_inout:
4002 DepKind = RTLDependenceKindTy::DepInOut;
4003 break;
4004 case OMPC_DEPEND_mutexinoutset:
4005 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4006 break;
4007 case OMPC_DEPEND_inoutset:
4008 DepKind = RTLDependenceKindTy::DepInOutSet;
4009 break;
4010 case OMPC_DEPEND_outallmemory:
4011 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4012 break;
4013 case OMPC_DEPEND_source:
4014 case OMPC_DEPEND_sink:
4015 case OMPC_DEPEND_depobj:
4016 case OMPC_DEPEND_inoutallmemory:
4018 llvm_unreachable("Unknown task dependence type");
4019 }
4020 return DepKind;
4021}
4022
4023/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4024static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4025 QualType &FlagsTy) {
4026 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4027 if (KmpDependInfoTy.isNull()) {
4028 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4029 KmpDependInfoRD->startDefinition();
4030 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4031 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4032 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4033 KmpDependInfoRD->completeDefinition();
4034 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4035 }
4036}
4037
4038std::pair<llvm::Value *, LValue>
4040 SourceLocation Loc) {
4042 QualType FlagsTy;
4043 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4044 RecordDecl *KmpDependInfoRD =
4045 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4046 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4048 DepobjLVal.getAddress(CGF).withElementType(
4049 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4050 KmpDependInfoPtrTy->castAs<PointerType>());
4051 Address DepObjAddr = CGF.Builder.CreateGEP(
4052 CGF, Base.getAddress(CGF),
4053 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4054 LValue NumDepsBase = CGF.MakeAddrLValue(
4055 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4056 // NumDeps = deps[i].base_addr;
4057 LValue BaseAddrLVal = CGF.EmitLValueForField(
4058 NumDepsBase,
4059 *std::next(KmpDependInfoRD->field_begin(),
4060 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4061 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4062 return std::make_pair(NumDeps, Base);
4063}
4064
4065static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4066 llvm::PointerUnion<unsigned *, LValue *> Pos,
4068 Address DependenciesArray) {
4069 CodeGenModule &CGM = CGF.CGM;
4070 ASTContext &C = CGM.getContext();
4071 QualType FlagsTy;
4072 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4073 RecordDecl *KmpDependInfoRD =
4074 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4075 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4076
4077 OMPIteratorGeneratorScope IteratorScope(
4078 CGF, cast_or_null<OMPIteratorExpr>(
4079 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4080 : nullptr));
4081 for (const Expr *E : Data.DepExprs) {
4082 llvm::Value *Addr;
4083 llvm::Value *Size;
4084
4085 // The expression will be a nullptr in the 'omp_all_memory' case.
4086 if (E) {
4087 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4088 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4089 } else {
4090 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4091 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4092 }
4093 LValue Base;
4094 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4095 Base = CGF.MakeAddrLValue(
4096 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4097 } else {
4098 assert(E && "Expected a non-null expression");
4099 LValue &PosLVal = *Pos.get<LValue *>();
4100 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4101 Base = CGF.MakeAddrLValue(
4102 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4103 }
4104 // deps[i].base_addr = &<Dependencies[i].second>;
4105 LValue BaseAddrLVal = CGF.EmitLValueForField(
4106 Base,
4107 *std::next(KmpDependInfoRD->field_begin(),
4108 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4109 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4110 // deps[i].len = sizeof(<Dependencies[i].second>);
4111 LValue LenLVal = CGF.EmitLValueForField(
4112 Base, *std::next(KmpDependInfoRD->field_begin(),
4113 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4114 CGF.EmitStoreOfScalar(Size, LenLVal);
4115 // deps[i].flags = <Dependencies[i].first>;
4116 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4117 LValue FlagsLVal = CGF.EmitLValueForField(
4118 Base,
4119 *std::next(KmpDependInfoRD->field_begin(),
4120 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4122 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4123 FlagsLVal);
4124 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4125 ++(*P);
4126 } else {
4127 LValue &PosLVal = *Pos.get<LValue *>();
4128 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4129 Idx = CGF.Builder.CreateNUWAdd(Idx,
4130 llvm::ConstantInt::get(Idx->getType(), 1));
4131 CGF.EmitStoreOfScalar(Idx, PosLVal);
4132 }
4133 }
4134}
4135
4137 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4139 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4140 "Expected depobj dependency kind.");
4142 SmallVector<LValue, 4> SizeLVals;
4143 ASTContext &C = CGF.getContext();
4144 {
4145 OMPIteratorGeneratorScope IteratorScope(
4146 CGF, cast_or_null<OMPIteratorExpr>(
4147 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4148 : nullptr));
4149 for (const Expr *E : Data.DepExprs) {
4150 llvm::Value *NumDeps;
4151 LValue Base;
4152 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4153 std::tie(NumDeps, Base) =
4154 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4155 LValue NumLVal = CGF.MakeAddrLValue(
4156 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4157 C.getUIntPtrType());
4158 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4159 NumLVal.getAddress(CGF));
4160 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4161 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4162 CGF.EmitStoreOfScalar(Add, NumLVal);
4163 SizeLVals.push_back(NumLVal);
4164 }
4165 }
4166 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4167 llvm::Value *Size =
4168 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4169 Sizes.push_back(Size);
4170 }
4171 return Sizes;
4172}
4173
4175 QualType &KmpDependInfoTy,
4176 LValue PosLVal,
4178 Address DependenciesArray) {
4179 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4180 "Expected depobj dependency kind.");
4181 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4182 {
4183 OMPIteratorGeneratorScope IteratorScope(
4184 CGF, cast_or_null<OMPIteratorExpr>(
4185 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4186 : nullptr));
4187 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4188 const Expr *E = Data.DepExprs[I];
4189 llvm::Value *NumDeps;
4190 LValue Base;
4191 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4192 std::tie(NumDeps, Base) =
4193 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4194
4195 // memcopy dependency data.
4196 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4197 ElSize,
4198 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4199 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4200 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4201 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4202
4203 // Increase pos.
4204 // pos += size;
4205 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4206 CGF.EmitStoreOfScalar(Add, PosLVal);
4207 }
4208 }
4209}
4210
4211std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4213 SourceLocation Loc) {
4214 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4215 return D.DepExprs.empty();
4216 }))
4217 return std::make_pair(nullptr, Address::invalid());
4218 // Process list of dependencies.
4220 Address DependenciesArray = Address::invalid();
4221 llvm::Value *NumOfElements = nullptr;
4222 unsigned NumDependencies = std::accumulate(
4223 Dependencies.begin(), Dependencies.end(), 0,
4224 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4225 return D.DepKind == OMPC_DEPEND_depobj
4226 ? V
4227 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4228 });
4229 QualType FlagsTy;
4230 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4231 bool HasDepobjDeps = false;
4232 bool HasRegularWithIterators = false;
4233 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4234 llvm::Value *NumOfRegularWithIterators =
4235 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4236 // Calculate number of depobj dependencies and regular deps with the
4237 // iterators.
4238 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4239 if (D.DepKind == OMPC_DEPEND_depobj) {
4242 for (llvm::Value *Size : Sizes) {
4243 NumOfDepobjElements =
4244 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4245 }
4246 HasDepobjDeps = true;
4247 continue;
4248 }
4249 // Include number of iterations, if any.
4250
4251 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4252 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4253 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4254 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4255 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4256 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4257 NumOfRegularWithIterators =
4258 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4259 }
4260 HasRegularWithIterators = true;
4261 continue;
4262 }
4263 }
4264
4265 QualType KmpDependInfoArrayTy;
4266 if (HasDepobjDeps || HasRegularWithIterators) {
4267 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4268 /*isSigned=*/false);
4269 if (HasDepobjDeps) {
4270 NumOfElements =
4271 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4272 }
4273 if (HasRegularWithIterators) {
4274 NumOfElements =
4275 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4276 }
4277 auto *OVE = new (C) OpaqueValueExpr(
4278 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4279 VK_PRValue);
4280 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4281 RValue::get(NumOfElements));
4282 KmpDependInfoArrayTy =
4283 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4284 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4285 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4286 // Properly emit variable-sized array.
4287 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4289 CGF.EmitVarDecl(*PD);
4290 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4291 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4292 /*isSigned=*/false);
4293 } else {
4294 KmpDependInfoArrayTy = C.getConstantArrayType(
4295 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4296 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4297 DependenciesArray =
4298 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4299 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4300 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4301 /*isSigned=*/false);
4302 }
4303 unsigned Pos = 0;
4304 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4305 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4306 Dependencies[I].IteratorExpr)
4307 continue;
4308 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4309 DependenciesArray);
4310 }
4311 // Copy regular dependencies with iterators.
4312 LValue PosLVal = CGF.MakeAddrLValue(
4313 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4314 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4315 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4316 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4317 !Dependencies[I].IteratorExpr)
4318 continue;
4319 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4320 DependenciesArray);
4321 }
4322 // Copy final depobj arrays without iterators.
4323 if (HasDepobjDeps) {
4324 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4325 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4326 continue;
4327 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4328 DependenciesArray);
4329 }
4330 }
4331 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4332 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4333 return std::make_pair(NumOfElements, DependenciesArray);
4334}
4335
4337 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4338 SourceLocation Loc) {
4339 if (Dependencies.DepExprs.empty())
4340 return Address::invalid();
4341 // Process list of dependencies.
4343 Address DependenciesArray = Address::invalid();
4344 unsigned NumDependencies = Dependencies.DepExprs.size();
4345 QualType FlagsTy;
4346 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4347 RecordDecl *KmpDependInfoRD =
4348 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4349
4350 llvm::Value *Size;
4351 // Define type kmp_depend_info[<Dependencies.size()>];
4352 // For depobj reserve one extra element to store the number of elements.
4353 // It is required to handle depobj(x) update(in) construct.
4354 // kmp_depend_info[<Dependencies.size()>] deps;
4355 llvm::Value *NumDepsVal;
4356 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4357 if (const auto *IE =
4358 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4359 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4360 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4361 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4362 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4363 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4364 }
4365 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4366 NumDepsVal);
4367 CharUnits SizeInBytes =
4368 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4369 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4370 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4371 NumDepsVal =
4372 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4373 } else {
4374 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4375 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4376 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4377 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4378 Size = CGM.getSize(Sz.alignTo(Align));
4379 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4380 }
4381 // Need to allocate on the dynamic memory.
4382 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4383 // Use default allocator.
4384 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4385 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4386
4387 llvm::Value *Addr =
4388 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4389 CGM.getModule(), OMPRTL___kmpc_alloc),
4390 Args, ".dep.arr.addr");
4391 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4393 Addr, KmpDependInfoLlvmTy->getPointerTo());
4394 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4395 // Write number of elements in the first element of array for depobj.
4396 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4397 // deps[i].base_addr = NumDependencies;
4398 LValue BaseAddrLVal = CGF.EmitLValueForField(
4399 Base,
4400 *std::next(KmpDependInfoRD->field_begin(),
4401 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4402 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4403 llvm::PointerUnion<unsigned *, LValue *> Pos;
4404 unsigned Idx = 1;
4405 LValue PosLVal;
4406 if (Dependencies.IteratorExpr) {
4407 PosLVal = CGF.MakeAddrLValue(
4408 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4409 C.getSizeType());
4410 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4411 /*IsInit=*/true);
4412 Pos = &PosLVal;
4413 } else {
4414 Pos = &Idx;
4415 }
4416 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4417 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4418 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4419 CGF.Int8Ty);
4420 return DependenciesArray;
4421}
4422
4424 SourceLocation Loc) {
4426 QualType FlagsTy;
4427 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4429 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4430 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4432 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4434 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4435 Addr.getElementType(), Addr.emitRawPointer(CGF),
4436 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4437 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4438 CGF.VoidPtrTy);
4439 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4440 // Use default allocator.
4441 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4442 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4443
4444 // _kmpc_free(gtid, addr, nullptr);
4445 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4446 CGM.getModule(), OMPRTL___kmpc_free),
4447 Args);
4448}
4449
4451 OpenMPDependClauseKind NewDepKind,
4452 SourceLocation Loc) {
4454 QualType FlagsTy;
4455 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4456 RecordDecl *KmpDependInfoRD =
4457 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4458 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4459 llvm::Value *NumDeps;
4460 LValue Base;
4461 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4462
4463 Address Begin = Base.getAddress(CGF);
4464 // Cast from pointer to array type to pointer to single element.
4465 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4466 Begin.emitRawPointer(CGF), NumDeps);
4467 // The basic structure here is a while-do loop.
4468 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4469 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4470 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4471 CGF.EmitBlock(BodyBB);
4472 llvm::PHINode *ElementPHI =
4473 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4474 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4475 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4476 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4477 Base.getTBAAInfo());
4478 // deps[i].flags = NewDepKind;
4479 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4480 LValue FlagsLVal = CGF.EmitLValueForField(
4481 Base, *std::next(KmpDependInfoRD->field_begin(),
4482 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4484 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4485 FlagsLVal);
4486
4487 // Shift the address forward by one element.
4488 llvm::Value *ElementNext =
4489 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4490 .emitRawPointer(CGF);
4491 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4492 llvm::Value *IsEmpty =
4493 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4494 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4495 // Done.
4496 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4497}
4498
4500 const OMPExecutableDirective &D,
4501 llvm::Function *TaskFunction,
4502 QualType SharedsTy, Address Shareds,
4503 const Expr *IfCond,
4504 const OMPTaskDataTy &Data) {
4505 if (!CGF.HaveInsertPoint())
4506 return;
4507
4509 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4510 llvm::Value *NewTask = Result.NewTask;
4511 llvm::Function *TaskEntry = Result.TaskEntry;
4512 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4513 LValue TDBase = Result.TDBase;
4514 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4515 // Process list of dependences.
4516 Address DependenciesArray = Address::invalid();
4517 llvm::Value *NumOfElements;
4518 std::tie(NumOfElements, DependenciesArray) =
4519 emitDependClause(CGF, Data.Dependences, Loc);
4520
4521 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4522 // libcall.
4523 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4524 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4525 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4526 // list is not empty
4527 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4528 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4529 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4530 llvm::Value *DepTaskArgs[7];
4531 if (!Data.Dependences.empty()) {
4532 DepTaskArgs[0] = UpLoc;
4533 DepTaskArgs[1] = ThreadID;
4534 DepTaskArgs[2] = NewTask;
4535 DepTaskArgs[3] = NumOfElements;
4536 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4537 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4538 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4539 }
4540 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4541 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4542 if (!Data.Tied) {
4543 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4544 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4545 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4546 }
4547 if (!Data.Dependences.empty()) {
4548 CGF.EmitRuntimeCall(
4549 OMPBuilder.getOrCreateRuntimeFunction(
4550 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4551 DepTaskArgs);
4552 } else {
4553 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4554 CGM.getModule(), OMPRTL___kmpc_omp_task),
4555 TaskArgs);
4556 }
4557 // Check if parent region is untied and build return for untied task;
4558 if (auto *Region =
4559 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4560 Region->emitUntiedSwitch(CGF);
4561 };
4562
4563 llvm::Value *DepWaitTaskArgs[7];
4564 if (!Data.Dependences.empty()) {
4565 DepWaitTaskArgs[0] = UpLoc;
4566 DepWaitTaskArgs[1] = ThreadID;
4567 DepWaitTaskArgs[2] = NumOfElements;
4568 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4569 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4570 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4571 DepWaitTaskArgs[6] =
4572 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4573 }
4574 auto &M = CGM.getModule();
4575 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4576 TaskEntry, &Data, &DepWaitTaskArgs,
4577 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4578 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4579 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4580 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4581 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4582 // is specified.
4583 if (!Data.Dependences.empty())
4584 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4585 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4586 DepWaitTaskArgs);
4587 // Call proxy_task_entry(gtid, new_task);
4588 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4589 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4590 Action.Enter(CGF);
4591 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4592 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4593 OutlinedFnArgs);
4594 };
4595
4596 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4597 // kmp_task_t *new_task);
4598 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4599 // kmp_task_t *new_task);
4600 RegionCodeGenTy RCG(CodeGen);
4601 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4602 M, OMPRTL___kmpc_omp_task_begin_if0),
4603 TaskArgs,
4604 OMPBuilder.getOrCreateRuntimeFunction(
4605 M, OMPRTL___kmpc_omp_task_complete_if0),
4606 TaskArgs);
4607 RCG.setAction(Action);
4608 RCG(CGF);
4609 };
4610
4611 if (IfCond) {
4612 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4613 } else {
4614 RegionCodeGenTy ThenRCG(ThenCodeGen);
4615 ThenRCG(CGF);
4616 }
4617}
4618
4620 const OMPLoopDirective &D,
4621 llvm::Function *TaskFunction,
4622 QualType SharedsTy, Address Shareds,
4623 const Expr *IfCond,
4624 const OMPTaskDataTy &Data) {
4625 if (!CGF.HaveInsertPoint())
4626 return;
4628 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4629 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4630 // libcall.
4631 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4632 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4633 // sched, kmp_uint64 grainsize, void *task_dup);
4634 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4635 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4636 llvm::Value *IfVal;
4637 if (IfCond) {
4638 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4639 /*isSigned=*/true);
4640 } else {
4641 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4642 }
4643
4644 LValue LBLVal = CGF.EmitLValueForField(
4645 Result.TDBase,
4646 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4647 const auto *LBVar =
4648 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4649 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4650 LBLVal.getQuals(),
4651 /*IsInitializer=*/true);
4652 LValue UBLVal = CGF.EmitLValueForField(
4653 Result.TDBase,
4654 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4655 const auto *UBVar =
4656 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4657 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4658 UBLVal.getQuals(),
4659 /*IsInitializer=*/true);
4660 LValue StLVal = CGF.EmitLValueForField(
4661 Result.TDBase,
4662 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4663 const auto *StVar =
4664 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4665 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4666 StLVal.getQuals(),
4667 /*IsInitializer=*/true);
4668 // Store reductions address.
4669 LValue RedLVal = CGF.EmitLValueForField(
4670 Result.TDBase,
4671 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4672 if (Data.Reductions) {
4673 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4674 } else {
4675 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4676 CGF.getContext().VoidPtrTy);
4677 }
4678 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4679 llvm::Value *TaskArgs[] = {
4680 UpLoc,
4681 ThreadID,
4682 Result.NewTask,
4683 IfVal,
4684 LBLVal.getPointer(CGF),
4685 UBLVal.getPointer(CGF),
4686 CGF.EmitLoadOfScalar(StLVal, Loc),
4687 llvm::ConstantInt::getSigned(
4688 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4689 llvm::ConstantInt::getSigned(
4690 CGF.IntTy, Data.Schedule.getPointer()
4691 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4692 : NoSchedule),
4693 Data.Schedule.getPointer()
4694 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4695 /*isSigned=*/false)
4696 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4698 Result.TaskDupFn, CGF.VoidPtrTy)
4699 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4700 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4701 CGM.getModule(), OMPRTL___kmpc_taskloop),
4702 TaskArgs);
4703}
4704
4705/// Emit reduction operation for each element of array (required for
4706/// array sections) LHS op = RHS.
4707/// \param Type Type of array.
4708/// \param LHSVar Variable on the left side of the reduction operation
4709/// (references element of array in original variable).
4710/// \param RHSVar Variable on the right side of the reduction operation
4711/// (references element of array in original variable).
4712/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4713/// RHSVar.
4715 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4716 const VarDecl *RHSVar,
4717 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4718 const Expr *, const Expr *)> &RedOpGen,
4719 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4720 const Expr *UpExpr = nullptr) {
4721 // Perform element-by-element initialization.
4722 QualType ElementTy;
4723 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4724 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4725
4726 // Drill down to the base element type on both arrays.
4727 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4728 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4729
4730 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4731 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4732 // Cast from pointer to array type to pointer to single element.
4733 llvm::Value *LHSEnd =
4734 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4735 // The basic structure here is a while-do loop.
4736 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4737 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4738 llvm::Value *IsEmpty =
4739 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4740 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4741
4742 // Enter the loop body, making that address the current address.
4743 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4744 CGF.EmitBlock(BodyBB);
4745
4746 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4747
4748 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4749 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4750 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4751 Address RHSElementCurrent(
4752 RHSElementPHI, RHSAddr.getElementType(),
4753 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4754
4755 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4756 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4757 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4758 Address LHSElementCurrent(
4759 LHSElementPHI, LHSAddr.getElementType(),
4760 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4761
4762 // Emit copy.
4763 CodeGenFunction::OMPPrivateScope Scope(CGF);
4764 Scope.addPrivate(LHSVar, LHSElementCurrent);
4765 Scope.addPrivate(RHSVar, RHSElementCurrent);
4766 Scope.Privatize();
4767 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4768 Scope.ForceCleanup();
4769
4770 // Shift the address forward by one element.
4771 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4772 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4773 "omp.arraycpy.dest.element");
4774 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4775 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4776 "omp.arraycpy.src.element");
4777 // Check whether we've reached the end.
4778 llvm::Value *Done =
4779 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4780 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4781 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4782 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4783
4784 // Done.
4785 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4786}
4787
4788/// Emit reduction combiner. If the combiner is a simple expression emit it as
4789/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4790/// UDR combiner function.
4792 const Expr *ReductionOp) {
4793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4795 if (const auto *DRE =
4796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4797 if (const auto *DRD =
4798 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4799 std::pair<llvm::Function *, llvm::Function *> Reduction =
4802 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4803 CGF.EmitIgnoredExpr(ReductionOp);
4804 return;
4805 }
4806 CGF.EmitIgnoredExpr(ReductionOp);
4807}
4808
4810 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4812 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4814
4815 // void reduction_func(void *LHSArg, void *RHSArg);
4816 FunctionArgList Args;
4817 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4819 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4821 Args.push_back(&LHSArg);
4822 Args.push_back(&RHSArg);
4823 const auto &CGFI =
4825 std::string Name = getReductionFuncName(ReducerName);
4826 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4827 llvm::GlobalValue::InternalLinkage, Name,
4828 &CGM.getModule());
4830 Fn->setDoesNotRecurse();
4831 CodeGenFunction CGF(CGM);
4832 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4833
4834 // Dst = (void*[n])(LHSArg);
4835 // Src = (void*[n])(RHSArg);
4837 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4838 ArgsElemType->getPointerTo()),
4839 ArgsElemType, CGF.getPointerAlign());
4841 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4842 ArgsElemType->getPointerTo()),
4843 ArgsElemType, CGF.getPointerAlign());
4844
4845 // ...
4846 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4847 // ...
4849 const auto *IPriv = Privates.begin();
4850 unsigned Idx = 0;
4851 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4852 const auto *RHSVar =
4853 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4854 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4855 const auto *LHSVar =
4856 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4857 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4858 QualType PrivTy = (*IPriv)->getType();
4859 if (PrivTy->isVariablyModifiedType()) {
4860 // Get array size and emit VLA type.
4861 ++Idx;
4862 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4863 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4864 const VariableArrayType *VLA =
4865 CGF.getContext().getAsVariableArrayType(PrivTy);
4866 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4868 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4869 CGF.EmitVariablyModifiedType(PrivTy);
4870 }
4871 }
4872 Scope.Privatize();
4873 IPriv = Privates.begin();
4874 const auto *ILHS = LHSExprs.begin();
4875 const auto *IRHS = RHSExprs.begin();
4876 for (const Expr *E : ReductionOps) {
4877 if ((*IPriv)->getType()->isArrayType()) {
4878 // Emit reduction for array section.
4879 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4880 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4882 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4883 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4884 emitReductionCombiner(CGF, E);
4885 });
4886 } else {
4887 // Emit reduction for array subscript or single variable.
4888 emitReductionCombiner(CGF, E);
4889 }
4890 ++IPriv;
4891 ++ILHS;
4892 ++IRHS;
4893 }
4894 Scope.ForceCleanup();
4895 CGF.FinishFunction();
4896 return Fn;
4897}
4898
4900 const Expr *ReductionOp,
4901 const Expr *PrivateRef,
4902 const DeclRefExpr *LHS,
4903 const DeclRefExpr *RHS) {
4904 if (PrivateRef->getType()->isArrayType()) {
4905 // Emit reduction for array section.
4906 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4907 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4909 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4910 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4911 emitReductionCombiner(CGF, ReductionOp);
4912 });
4913 } else {
4914 // Emit reduction for array subscript or single variable.
4915 emitReductionCombiner(CGF, ReductionOp);
4916 }
4917}
4918
4920 ArrayRef<const Expr *> Privates,
4921 ArrayRef<const Expr *> LHSExprs,
4922 ArrayRef<const Expr *> RHSExprs,
4923 ArrayRef<const Expr *> ReductionOps,
4924 ReductionOptionsTy Options) {
4925 if (!CGF.HaveInsertPoint())
4926 return;
4927
4928 bool WithNowait = Options.WithNowait;
4929 bool SimpleReduction = Options.SimpleReduction;
4930
4931 // Next code should be emitted for reduction:
4932 //
4933 // static kmp_critical_name lock = { 0 };
4934 //
4935 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4936 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4937 // ...
4938 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4939 // *(Type<n>-1*)rhs[<n>-1]);
4940 // }
4941 //
4942 // ...
4943 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4944 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4945 // RedList, reduce_func, &<lock>)) {
4946 // case 1:
4947 // ...
4948 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4949 // ...
4950 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4951 // break;
4952 // case 2:
4953 // ...
4954 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4955 // ...
4956 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4957 // break;
4958 // default:;
4959 // }
4960 //
4961 // if SimpleReduction is true, only the next code is generated:
4962 // ...
4963 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4964 // ...
4965
4967
4968 if (SimpleReduction) {
4970 const auto *IPriv = Privates.begin();
4971 const auto *ILHS = LHSExprs.begin();
4972 const auto *IRHS = RHSExprs.begin();
4973 for (const Expr *E : ReductionOps) {
4974 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4975 cast<DeclRefExpr>(*IRHS));
4976 ++IPriv;
4977 ++ILHS;
4978 ++IRHS;
4979 }
4980 return;
4981 }
4982
4983 // 1. Build a list of reduction variables.
4984 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4985 auto Size = RHSExprs.size();
4986 for (const Expr *E : Privates) {
4987 if (E->getType()->isVariablyModifiedType())
4988 // Reserve place for array size.
4989 ++Size;
4990 }
4991 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4992 QualType ReductionArrayTy = C.getConstantArrayType(
4993 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4994 /*IndexTypeQuals=*/0);
4995 RawAddress ReductionList =
4996 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4997 const auto *IPriv = Privates.begin();
4998 unsigned Idx = 0;
4999 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5000 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5001 CGF.Builder.CreateStore(
5003 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5004 Elem);
5005 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5006 // Store array size.
5007 ++Idx;
5008 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5009 llvm::Value *Size = CGF.Builder.CreateIntCast(
5010 CGF.getVLASize(
5011 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5012 .NumElts,
5013 CGF.SizeTy, /*isSigned=*/false);
5014 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5015 Elem);
5016 }
5017 }
5018
5019 // 2. Emit reduce_func().
5020 llvm::Function *ReductionFn = emitReductionFunction(
5021 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5022 Privates, LHSExprs, RHSExprs, ReductionOps);
5023
5024 // 3. Create static kmp_critical_name lock = { 0 };
5025 std::string Name = getName({"reduction"});
5026 llvm::Value *Lock = getCriticalRegionLock(Name);
5027
5028 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5029 // RedList, reduce_func, &<lock>);
5030 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5031 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5032 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5033 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5034 ReductionList.getPointer(), CGF.VoidPtrTy);
5035 llvm::Value *Args[] = {
5036 IdentTLoc, // ident_t *<loc>
5037 ThreadId, // i32 <gtid>
5038 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5039 ReductionArrayTySize, // size_type sizeof(RedList)
5040 RL, // void *RedList
5041 ReductionFn, // void (*) (void *, void *) <reduce_func>
5042 Lock // kmp_critical_name *&<lock>
5043 };
5044 llvm::Value *Res = CGF.EmitRuntimeCall(
5045 OMPBuilder.getOrCreateRuntimeFunction(
5046 CGM.getModule(),
5047 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5048 Args);
5049
5050 // 5. Build switch(res)
5051 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5052 llvm::SwitchInst *SwInst =
5053 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5054
5055 // 6. Build case 1:
5056 // ...
5057 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5058 // ...
5059 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5060 // break;
5061 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5062 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5063 CGF.EmitBlock(Case1BB);
5064
5065 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5066 llvm::Value *EndArgs[] = {
5067 IdentTLoc, // ident_t *<loc>
5068 ThreadId, // i32 <gtid>
5069 Lock // kmp_critical_name *&<lock>
5070 };
5071 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5072 CodeGenFunction &CGF, PrePostActionTy &Action) {
5074 const auto *IPriv = Privates.begin();
5075 const auto *ILHS = LHSExprs.begin();
5076 const auto *IRHS = RHSExprs.begin();
5077 for (const Expr *E : ReductionOps) {
5078 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5079 cast<DeclRefExpr>(*IRHS));
5080 ++IPriv;
5081 ++ILHS;
5082 ++IRHS;
5083 }
5084 };
5085 RegionCodeGenTy RCG(CodeGen);
5086 CommonActionTy Action(
5087 nullptr, std::nullopt,
5088 OMPBuilder.getOrCreateRuntimeFunction(
5089 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5090 : OMPRTL___kmpc_end_reduce),
5091 EndArgs);
5092 RCG.setAction(Action);
5093 RCG(CGF);
5094
5095 CGF.EmitBranch(DefaultBB);
5096
5097 // 7. Build case 2:
5098 // ...
5099 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5100 // ...
5101 // break;
5102 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5103 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5104 CGF.EmitBlock(Case2BB);
5105
5106 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5107 CodeGenFunction &CGF, PrePostActionTy &Action) {
5108 const auto *ILHS = LHSExprs.begin();
5109 const auto *IRHS = RHSExprs.begin();
5110 const auto *IPriv = Privates.begin();
5111 for (const Expr *E : ReductionOps) {
5112 const Expr *XExpr = nullptr;
5113 const Expr *EExpr = nullptr;
5114 const Expr *UpExpr = nullptr;
5115 BinaryOperatorKind BO = BO_Comma;
5116 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5117 if (BO->getOpcode() == BO_Assign) {
5118 XExpr = BO->getLHS();
5119 UpExpr = BO->getRHS();
5120 }
5121 }
5122 // Try to emit update expression as a simple atomic.
5123 const Expr *RHSExpr = UpExpr;
5124 if (RHSExpr) {
5125 // Analyze RHS part of the whole expression.
5126 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5127 RHSExpr->IgnoreParenImpCasts())) {
5128 // If this is a conditional operator, analyze its condition for
5129 // min/max reduction operator.
5130 RHSExpr = ACO->getCond();
5131 }
5132 if (const auto *BORHS =
5133 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5134 EExpr = BORHS->getRHS();
5135 BO = BORHS->getOpcode();
5136 }
5137 }
5138 if (XExpr) {
5139 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5140 auto &&AtomicRedGen = [BO, VD,
5141 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5142 const Expr *EExpr, const Expr *UpExpr) {
5143 LValue X = CGF.EmitLValue(XExpr);
5144 RValue E;
5145 if (EExpr)
5146 E = CGF.EmitAnyExpr(EExpr);
5147 CGF.EmitOMPAtomicSimpleUpdateExpr(
5148 X, E, BO, /*IsXLHSInRHSPart=*/true,
5149 llvm::AtomicOrdering::Monotonic, Loc,
5150 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5151 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5152 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5153 CGF.emitOMPSimpleStore(
5154 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5155 VD->getType().getNonReferenceType(), Loc);
5156 PrivateScope.addPrivate(VD, LHSTemp);
5157 (void)PrivateScope.Privatize();
5158 return CGF.EmitAnyExpr(UpExpr);
5159 });
5160 };
5161 if ((*IPriv)->getType()->isArrayType()) {
5162 // Emit atomic reduction for array section.
5163 const auto *RHSVar =
5164 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5165 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5166 AtomicRedGen, XExpr, EExpr, UpExpr);
5167 } else {
5168 // Emit atomic reduction for array subscript or single variable.
5169 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5170 }
5171 } else {
5172 // Emit as a critical region.
5173 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5174 const Expr *, const Expr *) {
5176 std::string Name = RT.getName({"atomic_reduction"});
5178 CGF, Name,
5179 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5180 Action.Enter(CGF);
5181 emitReductionCombiner(CGF, E);
5182 },
5183 Loc);
5184 };
5185 if ((*IPriv)->getType()->isArrayType()) {
5186 const auto *LHSVar =
5187 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5188 const auto *RHSVar =
5189 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5190 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5191 CritRedGen);
5192 } else {
5193 CritRedGen(CGF, nullptr, nullptr, nullptr);
5194 }
5195 }
5196 ++ILHS;
5197 ++IRHS;
5198 ++IPriv;
5199 }
5200 };
5201 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5202 if (!WithNowait) {
5203 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5204 llvm::Value *EndArgs[] = {
5205 IdentTLoc, // ident_t *<loc>
5206 ThreadId, // i32 <gtid>
5207 Lock // kmp_critical_name *&<lock>
5208 };
5209 CommonActionTy Action(nullptr, std::nullopt,
5210 OMPBuilder.getOrCreateRuntimeFunction(
5211 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5212 EndArgs);
5213 AtomicRCG.setAction(Action);
5214 AtomicRCG(CGF);
5215 } else {
5216 AtomicRCG(CGF);
5217 }
5218
5219 CGF.EmitBranch(DefaultBB);
5220 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5221}
5222
5223/// Generates unique name for artificial threadprivate variables.
5224/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5225static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5226 const Expr *Ref) {
5227 SmallString<256> Buffer;
5228 llvm::raw_svector_ostream Out(Buffer);
5229 const clang::DeclRefExpr *DE;
5230 const VarDecl *D = ::getBaseDecl(Ref, DE);
5231 if (!D)
5232 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5233 D = D->getCanonicalDecl();
5234 std::string Name = CGM.getOpenMPRuntime().getName(
5235 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5236 Out << Prefix << Name << "_"
5238 return std::string(Out.str());
5239}
5240
5241/// Emits reduction initializer function:
5242/// \code
5243/// void @.red_init(void* %arg, void* %orig) {
5244/// %0 = bitcast void* %arg to <type>*
5245/// store <type> <init>, <type>* %0
5246/// ret void
5247/// }
5248/// \endcode
5249static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5250 SourceLocation Loc,
5251 ReductionCodeGen &RCG, unsigned N) {
5252 ASTContext &C = CGM.getContext();
5253 QualType VoidPtrTy = C.VoidPtrTy;
5254 VoidPtrTy.addRestrict();
5255 FunctionArgList Args;
5256 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5258 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5260 Args.emplace_back(&Param);
5261 Args.emplace_back(&ParamOrig);
5262 const auto &FnInfo =
5263 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5264 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5265 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5266 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5267 Name, &CGM.getModule());
5268 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5269 Fn->setDoesNotRecurse();
5270 CodeGenFunction CGF(CGM);
5271 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5272 QualType PrivateType = RCG.getPrivateType(N);
5273 Address PrivateAddr = CGF.EmitLoadOfPointer(
5275 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5276 C.getPointerType(PrivateType)->castAs<PointerType>());
5277 llvm::Value *Size = nullptr;
5278 // If the size of the reduction item is non-constant, load it from global
5279 // threadprivate variable.
5280 if (RCG.getSizes(N).second) {
5282 CGF, CGM.getContext().getSizeType(),
5283 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5284 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5285 CGM.getContext().getSizeType(), Loc);
5286 }
5287 RCG.emitAggregateType(CGF, N, Size);
5288 Address OrigAddr = Address::invalid();
5289 // If initializer uses initializer from declare reduction construct, emit a
5290 // pointer to the address of the original reduction item (reuired by reduction
5291 // initializer)
5292 if (RCG.usesReductionInitializer(N)) {
5293 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5294 OrigAddr = CGF.EmitLoadOfPointer(
5295 SharedAddr,
5296 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5297 }
5298 // Emit the initializer:
5299 // %0 = bitcast void* %arg to <type>*
5300 // store <type> <init>, <type>* %0
5301 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5302 [](CodeGenFunction &) { return false; });
5303 CGF.FinishFunction();
5304 return Fn;
5305}
5306
5307/// Emits reduction combiner function:
5308/// \code
5309/// void @.red_comb(void* %arg0, void* %arg1) {
5310/// %lhs = bitcast void* %arg0 to <type>*
5311/// %rhs = bitcast void* %arg1 to <type>*
5312/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5313/// store <type> %2, <type>* %lhs
5314/// ret void
5315/// }
5316/// \endcode
5317static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5318 SourceLocation Loc,
5319 ReductionCodeGen &RCG, unsigned N,
5320 const Expr *ReductionOp,
5321 const Expr *LHS, const Expr *RHS,
5322 const Expr *PrivateRef) {
5323 ASTContext &C = CGM.getContext();
5324 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5325 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5326 FunctionArgList Args;
5327 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5328 C.VoidPtrTy, ImplicitParamKind::Other);
5329 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5331 Args.emplace_back(&ParamInOut);
5332 Args.emplace_back(&ParamIn);
5333 const auto &FnInfo =
5334 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5335 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5336 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5337 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5338 Name, &CGM.getModule());
5339 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5340 Fn->setDoesNotRecurse();
5341 CodeGenFunction CGF(CGM);
5342 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5343 llvm::Value *Size = nullptr;
5344 // If the size of the reduction item is non-constant, load it from global
5345 // threadprivate variable.
5346 if (RCG.getSizes(N).second) {
5348 CGF, CGM.getContext().getSizeType(),
5349 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5350 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5351 CGM.getContext().getSizeType(), Loc);
5352 }
5353 RCG.emitAggregateType(CGF, N, Size);
5354 // Remap lhs and rhs variables to the addresses of the function arguments.
5355 // %lhs = bitcast void* %arg0 to <type>*
5356 // %rhs = bitcast void* %arg1 to <type>*
5357 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5358 PrivateScope.addPrivate(
5359 LHSVD,
5360 // Pull out the pointer to the variable.
5362 CGF.GetAddrOfLocalVar(&ParamInOut)
5364 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5365 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5366 PrivateScope.addPrivate(
5367 RHSVD,
5368 // Pull out the pointer to the variable.
5370 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5371 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5372 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5373 PrivateScope.Privatize();
5374 // Emit the combiner body:
5375 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5376 // store <type> %2, <type>* %lhs
5378 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5379 cast<DeclRefExpr>(RHS));
5380 CGF.FinishFunction();
5381 return Fn;
5382}
5383
5384/// Emits reduction finalizer function:
5385/// \code
5386/// void @.red_fini(void* %arg) {
5387/// %0 = bitcast void* %arg to <type>*
5388/// <destroy>(<type>* %0)
5389/// ret void
5390/// }
5391/// \endcode
5392static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5393 SourceLocation Loc,
5394 ReductionCodeGen &RCG, unsigned N) {
5395 if (!RCG.needCleanups(N))
5396 return nullptr;
5397 ASTContext &C = CGM.getContext();
5398 FunctionArgList Args;
5399 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5401 Args.emplace_back(&Param);
5402 const auto &FnInfo =
5403 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5404 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5405 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5406 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5407 Name, &CGM.getModule());
5408 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5409 Fn->setDoesNotRecurse();
5410 CodeGenFunction CGF(CGM);
5411 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5412 Address PrivateAddr = CGF.EmitLoadOfPointer(
5413 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5414 llvm::Value *Size = nullptr;
5415 // If the size of the reduction item is non-constant, load it from global
5416 // threadprivate variable.
5417 if (RCG.getSizes(N).second) {
5419 CGF, CGM.getContext().getSizeType(),
5420 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5421 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5422 CGM.getContext().getSizeType(), Loc);
5423 }
5424 RCG.emitAggregateType(CGF, N, Size);
5425 // Emit the finalizer body:
5426 // <destroy>(<type>* %0)
5427 RCG.emitCleanups(CGF, N, PrivateAddr);
5428 CGF.FinishFunction(Loc);
5429 return Fn;
5430}
5431
5434 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5435 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5436 return nullptr;
5437
5438 // Build typedef struct:
5439 // kmp_taskred_input {
5440 // void *reduce_shar; // shared reduction item
5441 // void *reduce_orig; // original reduction item used for initialization
5442 // size_t reduce_size; // size of data item
5443 // void *reduce_init; // data initialization routine
5444 // void *reduce_fini; // data finalization routine
5445 // void *reduce_comb; // data combiner routine
5446 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5447 // } kmp_taskred_input_t;
5449 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5450 RD->startDefinition();
5451 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5452 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5453 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5454 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5455 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5456 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5457 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5458 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5459 RD->completeDefinition();
5460 QualType RDType = C.getRecordType(RD);
5461 unsigned Size = Data.ReductionVars.size();
5462 llvm::APInt ArraySize(/*numBits=*/64, Size);
5463 QualType ArrayRDType =
5464 C.getConstantArrayType(RDType, ArraySize, nullptr,
5465 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5466 // kmp_task_red_input_t .rd_input.[Size];
5467 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5468 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5469 Data.ReductionCopies, Data.ReductionOps);
5470 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5471 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5472 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5473 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5474 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5475 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5476 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5477 ".rd_input.gep.");
5478 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5479 // ElemLVal.reduce_shar = &Shareds[Cnt];
5480 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5481 RCG.emitSharedOrigLValue(CGF, Cnt);
5482 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5483 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5484 // ElemLVal.reduce_orig = &Origs[Cnt];
5485 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5486 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5487 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5488 RCG.emitAggregateType(CGF, Cnt);
5489 llvm::Value *SizeValInChars;
5490 llvm::Value *SizeVal;
5491 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5492 // We use delayed creation/initialization for VLAs and array sections. It is
5493 // required because runtime does not provide the way to pass the sizes of
5494 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5495 // threadprivate global variables are used to store these values and use
5496 // them in the functions.
5497 bool DelayedCreation = !!SizeVal;
5498 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5499 /*isSigned=*/false);
5500 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5501 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5502 // ElemLVal.reduce_init = init;
5503 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5504 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5505 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5506 // ElemLVal.reduce_fini = fini;
5507 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5508 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5509 llvm::Value *FiniAddr =
5510 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5511 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5512 // ElemLVal.reduce_comb = comb;
5513 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5514 llvm::Value *CombAddr = emitReduceCombFunction(
5515 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5516 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5517 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5518 // ElemLVal.flags = 0;
5519 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5520 if (DelayedCreation) {
5522 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5523 FlagsLVal);
5524 } else
5525 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5526 FlagsLVal.getType());
5527 }
5528 if (Data.IsReductionWithTaskMod) {
5529 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5530 // is_ws, int num, void *data);
5531 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5532 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5533 CGM.IntTy, /*isSigned=*/true);
5534 llvm::Value *Args[] = {
5535 IdentTLoc, GTid,
5536 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5537 /*isSigned=*/true),
5538 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5540 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5541 return CGF.EmitRuntimeCall(
5542 OMPBuilder.getOrCreateRuntimeFunction(
5543 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5544 Args);
5545 }
5546 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5547 llvm::Value *Args[] = {
5548 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5549 /*isSigned=*/true),
5550 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5552 CGM.VoidPtrTy)};
5553 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5554 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5555 Args);
5556}
5557
5559 SourceLocation Loc,
5560 bool IsWorksharingReduction) {
5561 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5562 // is_ws, int num, void *data);
5563 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5564 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5565 CGM.IntTy, /*isSigned=*/true);
5566 llvm::Value *Args[] = {IdentTLoc, GTid,
5567 llvm::ConstantInt::get(CGM.IntTy,
5568 IsWorksharingReduction ? 1 : 0,
5569 /*isSigned=*/true)};
5570 (void)CGF.EmitRuntimeCall(
5571 OMPBuilder.getOrCreateRuntimeFunction(
5572 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5573 Args);
5574}
5575
5577 SourceLocation Loc,
5578 ReductionCodeGen &RCG,
5579 unsigned N) {
5580 auto Sizes = RCG.getSizes(N);
5581 // Emit threadprivate global variable if the type is non-constant
5582 // (Sizes.second = nullptr).
5583 if (Sizes.second) {
5584 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5585 /*isSigned=*/false);
5587 CGF, CGM.getContext().getSizeType(),
5588 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5589 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5590 }
5591}
5592
5594 SourceLocation Loc,
5595 llvm::Value *ReductionsPtr,
5596 LValue SharedLVal) {
5597 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5598 // *d);
5599 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5600 CGM.IntTy,
5601 /*isSigned=*/true),
5602 ReductionsPtr,
5604 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5605 return Address(
5606 CGF.EmitRuntimeCall(
5607 OMPBuilder.getOrCreateRuntimeFunction(
5608 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5609 Args),
5610 CGF.Int8Ty, SharedLVal.getAlignment());
5611}
5612
5614 const OMPTaskDataTy &Data) {
5615 if (!CGF.HaveInsertPoint())
5616 return;
5617
5618 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5619 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5620 OMPBuilder.createTaskwait(CGF.Builder);
5621 } else {
5622 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5623 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5624 auto &M = CGM.getModule();
5625 Address DependenciesArray = Address::invalid();
5626 llvm::Value *NumOfElements;
5627 std::tie(NumOfElements, DependenciesArray) =
5628 emitDependClause(CGF, Data.Dependences, Loc);
5629 if (!Data.Dependences.empty()) {
5630 llvm::Value *DepWaitTaskArgs[7];
5631 DepWaitTaskArgs[0] = UpLoc;
5632 DepWaitTaskArgs[1] = ThreadID;
5633 DepWaitTaskArgs[2] = NumOfElements;
5634 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5635 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5636 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5637 DepWaitTaskArgs[6] =
5638 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5639
5640 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5641
5642 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5643 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5644 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5645 // kmp_int32 has_no_wait); if dependence info is specified.
5646 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5647 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5648 DepWaitTaskArgs);
5649
5650 } else {
5651
5652 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5653 // global_tid);
5654 llvm::Value *Args[] = {UpLoc, ThreadID};
5655 // Ignore return result until untied tasks are supported.
5656 CGF.EmitRuntimeCall(
5657 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5658 Args);
5659 }
5660 }
5661
5662 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5663 Region->emitUntiedSwitch(CGF);
5664}
5665
5667 OpenMPDirectiveKind InnerKind,
5668 const RegionCodeGenTy &CodeGen,
5669 bool HasCancel) {
5670 if (!CGF.HaveInsertPoint())
5671 return;
5672 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5673 InnerKind != OMPD_critical &&
5674 InnerKind != OMPD_master &&
5675 InnerKind != OMPD_masked);
5676 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5677}
5678
5679namespace {
5680enum RTCancelKind {
5681 CancelNoreq = 0,
5682 CancelParallel = 1,
5683 CancelLoop = 2,
5684 CancelSections = 3,
5685 CancelTaskgroup = 4
5686};
5687} // anonymous namespace
5688
5689static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5690 RTCancelKind CancelKind = CancelNoreq;
5691 if (CancelRegion == OMPD_parallel)
5692 CancelKind = CancelParallel;
5693 else if (CancelRegion == OMPD_for)
5694 CancelKind = CancelLoop;
5695 else if (CancelRegion == OMPD_sections)
5696 CancelKind = CancelSections;
5697 else {
5698 assert(CancelRegion == OMPD_taskgroup);
5699 CancelKind = CancelTaskgroup;
5700 }
5701 return CancelKind;
5702}
5703
5706 OpenMPDirectiveKind CancelRegion) {
5707 if (!CGF.HaveInsertPoint())
5708 return;
5709 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5710 // global_tid, kmp_int32 cncl_kind);
5711 if (auto *OMPRegionInfo =
5712 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5713 // For 'cancellation point taskgroup', the task region info may not have a
5714 // cancel. This may instead happen in another adjacent task.
5715 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5716 llvm::Value *Args[] = {
5717 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5718 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5719 // Ignore return result until untied tasks are supported.
5720 llvm::Value *Result = CGF.EmitRuntimeCall(
5721 OMPBuilder.getOrCreateRuntimeFunction(
5722 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5723 Args);
5724 // if (__kmpc_cancellationpoint()) {
5725 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5726 // exit from construct;
5727 // }
5728 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5729 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5730 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5731 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5732 CGF.EmitBlock(ExitBB);
5733 if (CancelRegion == OMPD_parallel)
5734 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5735 // exit from construct;
5736 CodeGenFunction::JumpDest CancelDest =
5737 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5738 CGF.EmitBranchThroughCleanup(CancelDest);
5739 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5740 }
5741 }
5742}
5743
5745 const Expr *IfCond,
5746 OpenMPDirectiveKind CancelRegion) {
5747 if (!CGF.HaveInsertPoint())
5748 return;
5749 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5750 // kmp_int32 cncl_kind);
5751 auto &M = CGM.getModule();
5752 if (auto *OMPRegionInfo =
5753 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5754 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5755 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5757 llvm::Value *Args[] = {
5758 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5759 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5760 // Ignore return result until untied tasks are supported.
5761 llvm::Value *Result = CGF.EmitRuntimeCall(
5762 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5763 // if (__kmpc_cancel()) {
5764 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5765 // exit from construct;
5766 // }
5767 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5768 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5769 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5770 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5771 CGF.EmitBlock(ExitBB);
5772 if (CancelRegion == OMPD_parallel)
5773 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5774 // exit from construct;
5775 CodeGenFunction::JumpDest CancelDest =
5776 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5777 CGF.EmitBranchThroughCleanup(CancelDest);
5778 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5779 };
5780 if (IfCond) {
5781 emitIfClause(CGF, IfCond, ThenGen,
5782 [](CodeGenFunction &, PrePostActionTy &) {});
5783 } else {
5784 RegionCodeGenTy ThenRCG(ThenGen);
5785 ThenRCG(CGF);
5786 }
5787 }
5788}
5789
5790namespace {
5791/// Cleanup action for uses_allocators support.
5792class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5794
5795public:
5796 OMPUsesAllocatorsActionTy(
5797 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5798 : Allocators(Allocators) {}
5799 void Enter(CodeGenFunction &CGF) override {
5800 if (!CGF.HaveInsertPoint())
5801 return;
5802 for (const auto &AllocatorData : Allocators) {
5804 CGF, AllocatorData.first, AllocatorData.second);
5805 }
5806 }
5807 void Exit(CodeGenFunction &CGF) override {
5808 if (!CGF.HaveInsertPoint())
5809 return;
5810 for (const auto &AllocatorData : Allocators) {
5812 AllocatorData.first);
5813 }
5814 }
5815};
5816} // namespace
5817
5819 const OMPExecutableDirective &D, StringRef ParentName,
5820 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5821 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5822 assert(!ParentName.empty() && "Invalid target entry parent name!");
5825 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5826 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5827 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5828 if (!D.AllocatorTraits)
5829 continue;
5830 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5831 }
5832 }
5833 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5834 CodeGen.setAction(UsesAllocatorAction);
5835 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5836 IsOffloadEntry, CodeGen);
5837}
5838
5840 const Expr *Allocator,
5841 const Expr *AllocatorTraits) {
5842 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5843 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5844 // Use default memspace handle.
5845 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5846 llvm::Value *NumTraits = llvm::ConstantInt::get(
5847 CGF.IntTy, cast<ConstantArrayType>(
5848 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5849 ->getSize()
5850 .getLimitedValue());
5851 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5853 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5854 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5855 AllocatorTraitsLVal.getBaseInfo(),
5856 AllocatorTraitsLVal.getTBAAInfo());
5857 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5858
5859 llvm::Value *AllocatorVal =
5860 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5861 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5862 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5863 // Store to allocator.
5864 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5865 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5866 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5867 AllocatorVal =
5868 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5869 Allocator->getType(), Allocator->getExprLoc());
5870 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5871}
5872
5874 const Expr *Allocator) {
5875 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5876 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5877 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5878 llvm::Value *AllocatorVal =
5879 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5880 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5881 CGF.getContext().VoidPtrTy,
5882 Allocator->getExprLoc());
5883 (void)CGF.EmitRuntimeCall(
5884 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5885 OMPRTL___kmpc_destroy_allocator),
5886 {ThreadId, AllocatorVal});
5887}
5888
5891 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5892 int32_t &MaxTeamsVal) {
5893
5894 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5895 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5896 /*UpperBoundOnly=*/true);
5897
5898 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5899 for (auto *A : C->getAttrs()) {
5900 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5901 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5902 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5903 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5904 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5905 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5907 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5908 &AttrMaxThreadsVal);
5909 else
5910 continue;
5911
5912 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5913 if (AttrMaxThreadsVal > 0)
5914 MaxThreadsVal = MaxThreadsVal > 0
5915 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5916 : AttrMaxThreadsVal;
5917 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5918 if (AttrMaxBlocksVal > 0)
5919 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5920 : AttrMaxBlocksVal;
5921 }
5922 }
5923}
5924
5926 const OMPExecutableDirective &D, StringRef ParentName,
5927 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5928 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5929
5930 llvm::TargetRegionEntryInfo EntryInfo =
5932
5933 CodeGenFunction CGF(CGM, true);
5934 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5935 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5936 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5937
5938 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5939 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5941 };
5942
5943 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5944 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5945
5946 if (!OutlinedFn)
5947 return;
5948
5949 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5950
5951 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5952 for (auto *A : C->getAttrs()) {
5953 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5954 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5955 }
5956 }
5957}
5958
5959/// Checks if the expression is constant or does not have non-trivial function
5960/// calls.
5961static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5962 // We can skip constant expressions.
5963 // We can skip expressions with trivial calls or simple expressions.
5965 !E->hasNonTrivialCall(Ctx)) &&
5966 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5967}
5968
5970 const Stmt *Body) {
5971 const Stmt *Child = Body->IgnoreContainers();
5972 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5973 Child = nullptr;
5974 for (const Stmt *S : C->body()) {
5975 if (const auto *E = dyn_cast<Expr>(S)) {
5976 if (isTrivial(Ctx, E))
5977 continue;
5978 }
5979 // Some of the statements can be ignored.
5980 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5981 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5982 continue;
5983 // Analyze declarations.
5984 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5985 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5986 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5987 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5988 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5989 isa<UsingDirectiveDecl>(D) ||
5990 isa<OMPDeclareReductionDecl>(D) ||
5991 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5992 return true;
5993 const auto *VD = dyn_cast<VarDecl>(D);
5994 if (!VD)
5995 return false;
5996 return VD->hasGlobalStorage() || !VD->isUsed();
5997 }))
5998 continue;
5999 }
6000 // Found multiple children - cannot get the one child only.
6001 if (Child)
6002 return nullptr;
6003 Child = S;
6004 }
6005 if (Child)
6006 Child = Child->IgnoreContainers();
6007 }
6008 return Child;
6009}
6010
6012 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6013 int32_t &MaxTeamsVal) {
6014
6015 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6016 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6017 "Expected target-based executable directive.");
6018 switch (DirectiveKind) {
6019 case OMPD_target: {
6020 const auto *CS = D.getInnermostCapturedStmt();
6021 const auto *Body =
6022 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6023 const Stmt *ChildStmt =
6025 if (const auto *NestedDir =
6026 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6027 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6028 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6029 const Expr *NumTeams =
6030 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6031 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6032 if (auto Constant =
6033 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6034 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6035 return NumTeams;
6036 }
6037 MinTeamsVal = MaxTeamsVal = 0;
6038 return nullptr;
6039 }
6040 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6041 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6042 MinTeamsVal = MaxTeamsVal = 1;
6043 return nullptr;
6044 }
6045 MinTeamsVal = MaxTeamsVal = 1;
6046 return nullptr;
6047 }
6048 // A value of -1 is used to check if we need to emit no teams region
6049 MinTeamsVal = MaxTeamsVal = -1;
6050 return nullptr;
6051 }
6052 case OMPD_target_teams_loop:
6053 case OMPD_target_teams:
6054 case OMPD_target_teams_distribute:
6055 case OMPD_target_teams_distribute_simd:
6056 case OMPD_target_teams_distribute_parallel_for:
6057 case OMPD_target_teams_distribute_parallel_for_simd: {
6059 const Expr *NumTeams =
6060 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6061 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6062 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6063 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6064 return NumTeams;
6065 }
6066 MinTeamsVal = MaxTeamsVal = 0;
6067 return nullptr;
6068 }
6069 case OMPD_target_parallel:
6070 case OMPD_target_parallel_for:
6071 case OMPD_target_parallel_for_simd:
6072 case OMPD_target_parallel_loop:
6073 case OMPD_target_simd:
6074 MinTeamsVal = MaxTeamsVal = 1;
6075 return nullptr;
6076 case OMPD_parallel:
6077 case OMPD_for:
6078 case OMPD_parallel_for:
6079 case OMPD_parallel_loop:
6080 case OMPD_parallel_master:
6081 case OMPD_parallel_sections:
6082 case OMPD_for_simd:
6083 case OMPD_parallel_for_simd:
6084 case OMPD_cancel:
6085 case OMPD_cancellation_point:
6086 case OMPD_ordered:
6087 case OMPD_threadprivate:
6088 case OMPD_allocate:
6089 case OMPD_task:
6090 case OMPD_simd:
6091 case OMPD_tile:
6092 case OMPD_unroll:
6093 case OMPD_sections:
6094 case OMPD_section:
6095 case OMPD_single:
6096 case OMPD_master:
6097 case OMPD_critical:
6098 case OMPD_taskyield:
6099 case OMPD_barrier:
6100 case OMPD_taskwait:
6101 case OMPD_taskgroup:
6102 case OMPD_atomic:
6103 case OMPD_flush:
6104 case OMPD_depobj:
6105 case OMPD_scan:
6106 case OMPD_teams:
6107 case OMPD_target_data:
6108 case OMPD_target_exit_data:
6109 case OMPD_target_enter_data:
6110 case OMPD_distribute:
6111 case OMPD_distribute_simd:
6112 case OMPD_distribute_parallel_for:
6113 case OMPD_distribute_parallel_for_simd:
6114 case OMPD_teams_distribute:
6115 case OMPD_teams_distribute_simd:
6116 case OMPD_teams_distribute_parallel_for:
6117 case OMPD_teams_distribute_parallel_for_simd:
6118 case OMPD_target_update:
6119 case OMPD_declare_simd:
6120 case OMPD_declare_variant:
6121 case OMPD_begin_declare_variant:
6122 case OMPD_end_declare_variant:
6123 case OMPD_declare_target:
6124 case OMPD_end_declare_target:
6125 case OMPD_declare_reduction:
6126 case OMPD_declare_mapper:
6127 case OMPD_taskloop:
6128 case OMPD_taskloop_simd:
6129 case OMPD_master_taskloop:
6130 case OMPD_master_taskloop_simd:
6131 case OMPD_parallel_master_taskloop:
6132 case OMPD_parallel_master_taskloop_simd:
6133 case OMPD_requires:
6134 case OMPD_metadirective:
6135 case OMPD_unknown:
6136 break;
6137 default:
6138 break;
6139 }
6140 llvm_unreachable("Unexpected directive kind.");
6141}
6142
6144 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6145 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6146 "Clauses associated with the teams directive expected to be emitted "
6147 "only for the host!");
6148 CGBuilderTy &Bld = CGF.Builder;
6149 int32_t MinNT = -1, MaxNT = -1;
6150 const Expr *NumTeams =
6151 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6152 if (NumTeams != nullptr) {
6153 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6154
6155 switch (DirectiveKind) {
6156 case OMPD_target: {
6157 const auto *CS = D.getInnermostCapturedStmt();
6158 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6159 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6160 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6161 /*IgnoreResultAssign*/ true);
6162 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6163 /*isSigned=*/true);
6164 }
6165 case OMPD_target_teams:
6166 case OMPD_target_teams_distribute:
6167 case OMPD_target_teams_distribute_simd:
6168 case OMPD_target_teams_distribute_parallel_for:
6169 case OMPD_target_teams_distribute_parallel_for_simd: {
6170 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6171 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6172 /*IgnoreResultAssign*/ true);
6173 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6174 /*isSigned=*/true);
6175 }
6176 default:
6177 break;
6178 }
6179 }
6180
6181 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6182 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6183}
6184
6185/// Check for a num threads constant value (stored in \p DefaultVal), or
6186/// expression (stored in \p E). If the value is conditional (via an if-clause),
6187/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6188/// nullptr, no expression evaluation is perfomed.
6189static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6190 const Expr **E, int32_t &UpperBound,
6191 bool UpperBoundOnly, llvm::Value **CondVal) {
6193 CGF.getContext(), CS->getCapturedStmt());
6194 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6195 if (!Dir)
6196 return;
6197
6198 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6199 // Handle if clause. If if clause present, the number of threads is
6200 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6201 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6202 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6203 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6204 const OMPIfClause *IfClause = nullptr;
6205 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6206 if (C->getNameModifier() == OMPD_unknown ||
6207 C->getNameModifier() == OMPD_parallel) {
6208 IfClause = C;
6209 break;
6210 }
6211 }
6212 if (IfClause) {
6213 const Expr *CondExpr = IfClause->getCondition();
6214 bool Result;
6215 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6216 if (!Result) {
6217 UpperBound = 1;
6218 return;
6219 }
6220 } else {
6221 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6222 if (const auto *PreInit =
6223 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6224 for (const auto *I : PreInit->decls()) {
6225 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6226 CGF.EmitVarDecl(cast<VarDecl>(*I));
6227 } else {
6228 CodeGenFunction::AutoVarEmission Emission =
6229 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6230 CGF.EmitAutoVarCleanups(Emission);
6231 }
6232 }
6233 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6234 }
6235 }
6236 }
6237 }
6238 // Check the value of num_threads clause iff if clause was not specified
6239 // or is not evaluated to false.
6240 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6241 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6242 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6243 const auto *NumThreadsClause =
6244 Dir->getSingleClause<OMPNumThreadsClause>();
6245 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6246 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6247 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6248 UpperBound =
6249 UpperBound
6250 ? Constant->getZExtValue()
6251 : std::min(UpperBound,
6252 static_cast<int32_t>(Constant->getZExtValue()));
6253 // If we haven't found a upper bound, remember we saw a thread limiting
6254 // clause.
6255 if (UpperBound == -1)
6256 UpperBound = 0;
6257 if (!E)
6258 return;
6259 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6260 if (const auto *PreInit =
6261 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6262 for (const auto *I : PreInit->decls()) {
6263 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6264 CGF.EmitVarDecl(cast<VarDecl>(*I));
6265 } else {
6266 CodeGenFunction::AutoVarEmission Emission =
6267 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6268 CGF.EmitAutoVarCleanups(Emission);
6269 }
6270 }
6271 }
6272 *E = NTExpr;
6273 }
6274 return;
6275 }
6276 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6277 UpperBound = 1;
6278}
6279
6281 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6282 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6283 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6284 "Clauses associated with the teams directive expected to be emitted "
6285 "only for the host!");
6286 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6287 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6288 "Expected target-based executable directive.");
6289
6290 const Expr *NT = nullptr;
6291 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6292
6293 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6294 if (E->isIntegerConstantExpr(CGF.getContext())) {
6295 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6296 UpperBound = UpperBound ? Constant->getZExtValue()
6297 : std::min(UpperBound,
6298 int32_t(Constant->getZExtValue()));
6299 }
6300 // If we haven't found a upper bound, remember we saw a thread limiting
6301 // clause.
6302 if (UpperBound == -1)
6303 UpperBound = 0;
6304 if (EPtr)
6305 *EPtr = E;
6306 };
6307
6308 auto ReturnSequential = [&]() {
6309 UpperBound = 1;
6310 return NT;
6311 };
6312
6313 switch (DirectiveKind) {
6314 case OMPD_target: {
6315 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6316 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6318 CGF.getContext(), CS->getCapturedStmt());
6319 // TODO: The standard is not clear how to resolve two thread limit clauses,
6320 // let's pick the teams one if it's present, otherwise the target one.
6321 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6322 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6323 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6324 ThreadLimitClause = TLC;
6325 if (ThreadLimitExpr) {
6326 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6329 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6330 if (const auto *PreInit =
6331 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6332 for (const auto *I : PreInit->decls()) {
6333 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6334 CGF.EmitVarDecl(cast<VarDecl>(*I));
6335 } else {
6337 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6338 CGF.EmitAutoVarCleanups(Emission);
6339 }
6340 }
6341 }
6342 }
6343 }
6344 }
6345 if (ThreadLimitClause)
6346 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6347 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6348 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6349 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6350 CS = Dir->getInnermostCapturedStmt();
6352 CGF.getContext(), CS->getCapturedStmt());
6353 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6354 }
6355 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6356 CS = Dir->getInnermostCapturedStmt();
6357 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6358 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6359 return ReturnSequential();
6360 }
6361 return NT;
6362 }
6363 case OMPD_target_teams: {
6365 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6366 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6367 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6368 }
6369 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6370 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372 CGF.getContext(), CS->getCapturedStmt());
6373 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6374 if (Dir->getDirectiveKind() == OMPD_distribute) {
6375 CS = Dir->getInnermostCapturedStmt();
6376 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6377 }
6378 }
6379 return NT;
6380 }
6381 case OMPD_target_teams_distribute:
6383 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6384 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6385 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6386 }
6387 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6388 UpperBoundOnly, CondVal);
6389 return NT;
6390 case OMPD_target_teams_loop:
6391 case OMPD_target_parallel_loop:
6392 case OMPD_target_parallel:
6393 case OMPD_target_parallel_for:
6394 case OMPD_target_parallel_for_simd:
6395 case OMPD_target_teams_distribute_parallel_for:
6396 case OMPD_target_teams_distribute_parallel_for_simd: {
6397 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6398 const OMPIfClause *IfClause = nullptr;
6399 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6400 if (C->getNameModifier() == OMPD_unknown ||
6401 C->getNameModifier() == OMPD_parallel) {
6402 IfClause = C;
6403 break;
6404 }
6405 }
6406 if (IfClause) {
6407 const Expr *Cond = IfClause->getCondition();
6408 bool Result;
6409 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6410 if (!Result)
6411 return ReturnSequential();
6412 } else {
6414 *CondVal = CGF.EvaluateExprAsBool(Cond);
6415 }
6416 }
6417 }
6419 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6420 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6421 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6422 }
6424 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6425 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6426 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6427 return NumThreadsClause->getNumThreads();
6428 }
6429 return NT;
6430 }
6431 case OMPD_target_teams_distribute_simd:
6432 case OMPD_target_simd:
6433 return ReturnSequential();
6434 default:
6435 break;
6436 }
6437 llvm_unreachable("Unsupported directive kind.");
6438}
6439
6441 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6442 llvm::Value *NumThreadsVal = nullptr;
6443 llvm::Value *CondVal = nullptr;
6444 llvm::Value *ThreadLimitVal = nullptr;
6445 const Expr *ThreadLimitExpr = nullptr;
6446 int32_t UpperBound = -1;
6447
6449 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6450 &ThreadLimitExpr);
6451
6452 // Thread limit expressions are used below, emit them.
6453 if (ThreadLimitExpr) {
6454 ThreadLimitVal =
6455 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6456 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6457 /*isSigned=*/false);
6458 }
6459
6460 // Generate the num teams expression.
6461 if (UpperBound == 1) {
6462 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6463 } else if (NT) {
6464 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6465 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6466 /*isSigned=*/false);
6467 } else if (ThreadLimitVal) {
6468 // If we do not have a num threads value but a thread limit, replace the
6469 // former with the latter. We know handled the thread limit expression.
6470 NumThreadsVal = ThreadLimitVal;
6471 ThreadLimitVal = nullptr;
6472 } else {
6473 // Default to "0" which means runtime choice.
6474 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6475 NumThreadsVal = CGF.Builder.getInt32(0);
6476 }
6477
6478 // Handle if clause. If if clause present, the number of threads is
6479 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6480 if (CondVal) {
6482 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6483 CGF.Builder.getInt32(1));
6484 }
6485
6486 // If the thread limit and num teams expression were present, take the
6487 // minimum.
6488 if (ThreadLimitVal) {
6489 NumThreadsVal = CGF.Builder.CreateSelect(
6490 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6491 ThreadLimitVal, NumThreadsVal);
6492 }
6493
6494 return NumThreadsVal;
6495}
6496
6497namespace {
6499
6500// Utility to handle information from clauses associated with a given
6501// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6502// It provides a convenient interface to obtain the information and generate
6503// code for that information.
6504class MappableExprsHandler {
6505public:
6506 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6507 static unsigned getFlagMemberOffset() {
6508 unsigned Offset = 0;
6509 for (uint64_t Remain =
6510 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6511 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6512 !(Remain & 1); Remain = Remain >> 1)
6513 Offset++;
6514 return Offset;
6515 }
6516
6517 /// Class that holds debugging information for a data mapping to be passed to
6518 /// the runtime library.
6519 class MappingExprInfo {
6520 /// The variable declaration used for the data mapping.
6521 const ValueDecl *MapDecl = nullptr;
6522 /// The original expression used in the map clause, or null if there is
6523 /// none.
6524 const Expr *MapExpr = nullptr;
6525
6526 public:
6527 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6528 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6529
6530 const ValueDecl *getMapDecl() const { return MapDecl; }
6531 const Expr *getMapExpr() const { return MapExpr; }
6532 };
6533
6534 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6535 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6536 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6537 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6538 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6539 using MapNonContiguousArrayTy =
6540 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6541 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6542 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6543
6544 /// This structure contains combined information generated for mappable
6545 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6546 /// mappers, and non-contiguous information.
6547 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6548 MapExprsArrayTy Exprs;
6549 MapValueDeclsArrayTy Mappers;
6550 MapValueDeclsArrayTy DevicePtrDecls;
6551
6552 /// Append arrays in \a CurInfo.
6553 void append(MapCombinedInfoTy &CurInfo) {
6554 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6555 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6556 CurInfo.DevicePtrDecls.end());
6557 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6558 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6559 }
6560 };
6561
6562 /// Map between a struct and the its lowest & highest elements which have been
6563 /// mapped.
6564 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6565 /// HE(FieldIndex, Pointer)}
6566 struct StructRangeInfoTy {
6567 MapCombinedInfoTy PreliminaryMapData;
6568 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6569 0, Address::invalid()};
6570 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6571 0, Address::invalid()};
6574 bool IsArraySection = false;
6575 bool HasCompleteRecord = false;
6576 };
6577
6578private:
6579 /// Kind that defines how a device pointer has to be returned.
6580 struct MapInfo {
6584 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6585 bool ReturnDevicePointer = false;
6586 bool IsImplicit = false;
6587 const ValueDecl *Mapper = nullptr;
6588 const Expr *VarRef = nullptr;
6589 bool ForDeviceAddr = false;
6590
6591 MapInfo() = default;
6592 MapInfo(
6594 OpenMPMapClauseKind MapType,
6596 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6597 bool ReturnDevicePointer, bool IsImplicit,
6598 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6599 bool ForDeviceAddr = false)
6600 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6601 MotionModifiers(MotionModifiers),
6602 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6603 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6604 };
6605
6606 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6607 /// member and there is no map information about it, then emission of that
6608 /// entry is deferred until the whole struct has been processed.
6609 struct DeferredDevicePtrEntryTy {
6610 const Expr *IE = nullptr;
6611 const ValueDecl *VD = nullptr;
6612 bool ForDeviceAddr = false;
6613
6614 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6615 bool ForDeviceAddr)
6616 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6617 };
6618
6619 /// The target directive from where the mappable clauses were extracted. It
6620 /// is either a executable directive or a user-defined mapper directive.
6621 llvm::PointerUnion<const OMPExecutableDirective *,
6622 const OMPDeclareMapperDecl *>
6623 CurDir;
6624
6625 /// Function the directive is being generated for.
6626 CodeGenFunction &CGF;
6627
6628 /// Set of all first private variables in the current directive.
6629 /// bool data is set to true if the variable is implicitly marked as
6630 /// firstprivate, false otherwise.
6631 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6632
6633 /// Map between device pointer declarations and their expression components.
6634 /// The key value for declarations in 'this' is null.
6635 llvm::DenseMap<
6636 const ValueDecl *,
6638 DevPointersMap;
6639
6640 /// Map between device addr declarations and their expression components.
6641 /// The key value for declarations in 'this' is null.
6642 llvm::DenseMap<
6643 const ValueDecl *,
6645 HasDevAddrsMap;
6646
6647 /// Map between lambda declarations and their map type.
6648 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6649
6650 llvm::Value *getExprTypeSize(const Expr *E) const {
6651 QualType ExprTy = E->getType().getCanonicalType();
6652
6653 // Calculate the size for array shaping expression.
6654 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6655 llvm::Value *Size =
6656 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6657 for (const Expr *SE : OAE->getDimensions()) {
6658 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6659 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6660 CGF.getContext().getSizeType(),
6661 SE->getExprLoc());
6662 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6663 }
6664 return Size;
6665 }
6666
6667 // Reference types are ignored for mapping purposes.
6668 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6669 ExprTy = RefTy->getPointeeType().getCanonicalType();
6670
6671 // Given that an array section is considered a built-in type, we need to
6672 // do the calculation based on the length of the section instead of relying
6673 // on CGF.getTypeSize(E->getType()).
6674 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6676 OAE->getBase()->IgnoreParenImpCasts())
6678
6679 // If there is no length associated with the expression and lower bound is
6680 // not specified too, that means we are using the whole length of the
6681 // base.
6682 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6683 !OAE->getLowerBound())
6684 return CGF.getTypeSize(BaseTy);
6685
6686 llvm::Value *ElemSize;
6687 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6688 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6689 } else {
6690 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6691 assert(ATy && "Expecting array type if not a pointer type.");
6692 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6693 }
6694
6695 // If we don't have a length at this point, that is because we have an
6696 // array section with a single element.
6697 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6698 return ElemSize;
6699
6700 if (const Expr *LenExpr = OAE->getLength()) {
6701 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6702 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6703 CGF.getContext().getSizeType(),
6704 LenExpr->getExprLoc());
6705 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6706 }
6707 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6708 OAE->getLowerBound() && "expected array_section[lb:].");
6709 // Size = sizetype - lb * elemtype;
6710 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6711 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6712 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6713 CGF.getContext().getSizeType(),
6714 OAE->getLowerBound()->getExprLoc());
6715 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6716 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6717 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6718 LengthVal = CGF.Builder.CreateSelect(
6719 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6720 return LengthVal;
6721 }
6722 return CGF.getTypeSize(ExprTy);
6723 }
6724
6725 /// Return the corresponding bits for a given map clause modifier. Add
6726 /// a flag marking the map as a pointer if requested. Add a flag marking the
6727 /// map as the first one of a series of maps that relate to the same map
6728 /// expression.
6729 OpenMPOffloadMappingFlags getMapTypeBits(
6731 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6732 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6733 OpenMPOffloadMappingFlags Bits =
6734 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6735 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6736 switch (MapType) {
6737 case OMPC_MAP_alloc:
6738 case OMPC_MAP_release:
6739 // alloc and release is the default behavior in the runtime library, i.e.
6740 // if we don't pass any bits alloc/release that is what the runtime is
6741 // going to do. Therefore, we don't need to signal anything for these two
6742 // type modifiers.
6743 break;
6744 case OMPC_MAP_to:
6745 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6746 break;
6747 case OMPC_MAP_from:
6748 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6749 break;
6750 case OMPC_MAP_tofrom:
6751 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6752 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6753 break;
6754 case OMPC_MAP_delete:
6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6756 break;
6757 case OMPC_MAP_unknown:
6758 llvm_unreachable("Unexpected map type!");
6759 }
6760 if (AddPtrFlag)
6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6762 if (AddIsTargetParamFlag)
6763 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6764 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6766 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6768 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6769 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6770 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6771 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6772 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6773 if (IsNonContiguous)
6774 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6775 return Bits;
6776 }
6777
6778 /// Return true if the provided expression is a final array section. A
6779 /// final array section, is one whose length can't be proved to be one.
6780 bool isFinalArraySectionExpression(const Expr *E) const {
6781 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6782
6783 // It is not an array section and therefore not a unity-size one.
6784 if (!OASE)
6785 return false;
6786
6787 // An array section with no colon always refer to a single element.
6788 if (OASE->getColonLocFirst().isInvalid())
6789 return false;
6790
6791 const Expr *Length = OASE->getLength();
6792
6793 // If we don't have a length we have to check if the array has size 1
6794 // for this dimension. Also, we should always expect a length if the
6795 // base type is pointer.
6796 if (!Length) {
6798 OASE->getBase()->IgnoreParenImpCasts())
6800 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6801 return ATy->getSExtSize() != 1;
6802 // If we don't have a constant dimension length, we have to consider
6803 // the current section as having any size, so it is not necessarily
6804 // unitary. If it happen to be unity size, that's user fault.
6805 return true;
6806 }
6807
6808 // Check if the length evaluates to 1.
6809 Expr::EvalResult Result;
6810 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6811 return true; // Can have more that size 1.
6812
6813 llvm::APSInt ConstLength = Result.Val.getInt();
6814 return ConstLength.getSExtValue() != 1;
6815 }
6816
6817 /// Generate the base pointers, section pointers, sizes, map type bits, and
6818 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6819 /// map type, map or motion modifiers, and expression components.
6820 /// \a IsFirstComponent should be set to true if the provided set of
6821 /// components is the first associated with a capture.
6822 void generateInfoForComponentList(
6824 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6826 MapCombinedInfoTy &CombinedInfo,
6827 MapCombinedInfoTy &StructBaseCombinedInfo,
6828 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6829 bool IsImplicit, bool GenerateAllInfoForClauses,
6830 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6831 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6833 OverlappedElements = std::nullopt) const {
6834 // The following summarizes what has to be generated for each map and the
6835 // types below. The generated information is expressed in this order:
6836 // base pointer, section pointer, size, flags
6837 // (to add to the ones that come from the map type and modifier).
6838 //
6839 // double d;
6840 // int i[100];
6841 // float *p;
6842 // int **a = &i;
6843 //
6844 // struct S1 {
6845 // int i;
6846 // float f[50];
6847 // }
6848 // struct S2 {
6849 // int i;
6850 // float f[50];
6851 // S1 s;
6852 // double *p;
6853 // struct S2 *ps;
6854 // int &ref;
6855 // }
6856 // S2 s;
6857 // S2 *ps;
6858 //
6859 // map(d)
6860 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6861 //
6862 // map(i)
6863 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6864 //
6865 // map(i[1:23])
6866 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6867 //
6868 // map(p)
6869 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6870 //
6871 // map(p[1:24])
6872 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6873 // in unified shared memory mode or for local pointers
6874 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6875 //
6876 // map((*a)[0:3])
6877 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6878 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6879 //
6880 // map(**a)
6881 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6882 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6883 //
6884 // map(s)
6885 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6886 //
6887 // map(s.i)
6888 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6889 //
6890 // map(s.s.f)
6891 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6892 //
6893 // map(s.p)
6894 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6895 //
6896 // map(to: s.p[:22])
6897 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6898 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6899 // &(s.p), &(s.p[0]), 22*sizeof(double),
6900 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6901 // (*) alloc space for struct members, only this is a target parameter
6902 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6903 // optimizes this entry out, same in the examples below)
6904 // (***) map the pointee (map: to)
6905 //
6906 // map(to: s.ref)
6907 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6908 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6909 // (*) alloc space for struct members, only this is a target parameter
6910 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6911 // optimizes this entry out, same in the examples below)
6912 // (***) map the pointee (map: to)
6913 //
6914 // map(s.ps)
6915 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6916 //
6917 // map(from: s.ps->s.i)
6918 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6919 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6920 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6921 //
6922 // map(to: s.ps->ps)
6923 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6924 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6925 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6926 //
6927 // map(s.ps->ps->ps)
6928 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6929 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6930 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6931 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6932 //
6933 // map(to: s.ps->ps->s.f[:22])
6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6937 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6938 //
6939 // map(ps)
6940 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6941 //
6942 // map(ps->i)
6943 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6944 //
6945 // map(ps->s.f)
6946 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6947 //
6948 // map(from: ps->p)
6949 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6950 //
6951 // map(to: ps->p[:22])
6952 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6953 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6954 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6955 //
6956 // map(ps->ps)
6957 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6958 //
6959 // map(from: ps->ps->s.i)
6960 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6961 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6962 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6963 //
6964 // map(from: ps->ps->ps)
6965 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6966 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6967 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6968 //
6969 // map(ps->ps->ps->ps)
6970 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6971 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6972 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6973 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6974 //
6975 // map(to: ps->ps->ps->s.f[:22])
6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6979 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6980 //
6981 // map(to: s.f[:22]) map(from: s.p[:33])
6982 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6983 // sizeof(double*) (**), TARGET_PARAM
6984 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6985 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6986 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6987 // (*) allocate contiguous space needed to fit all mapped members even if
6988 // we allocate space for members not mapped (in this example,
6989 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6990 // them as well because they fall between &s.f[0] and &s.p)
6991 //
6992 // map(from: s.f[:22]) map(to: ps->p[:33])
6993 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6994 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6995 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6996 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6997 // (*) the struct this entry pertains to is the 2nd element in the list of
6998 // arguments, hence MEMBER_OF(2)
6999 //
7000 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7001 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7002 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7003 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7004 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7005 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7006 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7007 // (*) the struct this entry pertains to is the 4th element in the list
7008 // of arguments, hence MEMBER_OF(4)
7009
7010 // Track if the map information being generated is the first for a capture.
7011 bool IsCaptureFirstInfo = IsFirstComponentList;
7012 // When the variable is on a declare target link or in a to clause with
7013 // unified memory, a reference is needed to hold the host/device address
7014 // of the variable.
7015 bool RequiresReference = false;
7016
7017 // Scan the components from the base to the complete expression.
7018 auto CI = Components.rbegin();
7019 auto CE = Components.rend();
7020 auto I = CI;
7021
7022 // Track if the map information being generated is the first for a list of
7023 // components.
7024 bool IsExpressionFirstInfo = true;
7025 bool FirstPointerInComplexData = false;
7027 const Expr *AssocExpr = I->getAssociatedExpression();
7028 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7029 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7030 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7031
7032 if (isa<MemberExpr>(AssocExpr)) {
7033 // The base is the 'this' pointer. The content of the pointer is going
7034 // to be the base of the field being mapped.
7035 BP = CGF.LoadCXXThisAddress();
7036 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7037 (OASE &&
7038 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7039 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7040 } else if (OAShE &&
7041 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7042 BP = Address(
7043 CGF.EmitScalarExpr(OAShE->getBase()),
7044 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7045 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7046 } else {
7047 // The base is the reference to the variable.
7048 // BP = &Var.
7049 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7050 if (const auto *VD =
7051 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7052 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7053 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7054 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7055 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7056 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7058 RequiresReference = true;
7060 }
7061 }
7062 }
7063
7064 // If the variable is a pointer and is being dereferenced (i.e. is not
7065 // the last component), the base has to be the pointer itself, not its
7066 // reference. References are ignored for mapping purposes.
7067 QualType Ty =
7068 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7069 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7070 // No need to generate individual map information for the pointer, it
7071 // can be associated with the combined storage if shared memory mode is
7072 // active or the base declaration is not global variable.
7073 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7075 !VD || VD->hasLocalStorage())
7076 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7077 else
7078 FirstPointerInComplexData = true;
7079 ++I;
7080 }
7081 }
7082
7083 // Track whether a component of the list should be marked as MEMBER_OF some
7084 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7085 // in a component list should be marked as MEMBER_OF, all subsequent entries
7086 // do not belong to the base struct. E.g.
7087 // struct S2 s;
7088 // s.ps->ps->ps->f[:]
7089 // (1) (2) (3) (4)
7090 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7091 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7092 // is the pointee of ps(2) which is not member of struct s, so it should not
7093 // be marked as such (it is still PTR_AND_OBJ).
7094 // The variable is initialized to false so that PTR_AND_OBJ entries which
7095 // are not struct members are not considered (e.g. array of pointers to
7096 // data).
7097 bool ShouldBeMemberOf = false;
7098
7099 // Variable keeping track of whether or not we have encountered a component
7100 // in the component list which is a member expression. Useful when we have a
7101 // pointer or a final array section, in which case it is the previous
7102 // component in the list which tells us whether we have a member expression.
7103 // E.g. X.f[:]
7104 // While processing the final array section "[:]" it is "f" which tells us
7105 // whether we are dealing with a member of a declared struct.
7106 const MemberExpr *EncounteredME = nullptr;
7107
7108 // Track for the total number of dimension. Start from one for the dummy
7109 // dimension.
7110 uint64_t DimSize = 1;
7111
7112 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7113 bool IsPrevMemberReference = false;
7114
7115 // We need to check if we will be encountering any MEs. If we do not
7116 // encounter any ME expression it means we will be mapping the whole struct.
7117 // In that case we need to skip adding an entry for the struct to the
7118 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7119 // list only when generating all info for clauses.
7120 bool IsMappingWholeStruct = true;
7121 if (!GenerateAllInfoForClauses) {
7122 IsMappingWholeStruct = false;
7123 } else {
7124 for (auto TempI = I; TempI != CE; ++TempI) {
7125 const MemberExpr *PossibleME =
7126 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7127 if (PossibleME) {
7128 IsMappingWholeStruct = false;
7129 break;
7130 }
7131 }
7132 }
7133
7134 for (; I != CE; ++I) {
7135 // If the current component is member of a struct (parent struct) mark it.
7136 if (!EncounteredME) {
7137 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7138 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7139 // as MEMBER_OF the parent struct.
7140 if (EncounteredME) {
7141 ShouldBeMemberOf = true;
7142 // Do not emit as complex pointer if this is actually not array-like
7143 // expression.
7144 if (FirstPointerInComplexData) {
7145 QualType Ty = std::prev(I)
7146 ->getAssociatedDeclaration()
7147 ->getType()
7148 .getNonReferenceType();
7149 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7150 FirstPointerInComplexData = false;
7151 }
7152 }
7153 }
7154
7155 auto Next = std::next(I);
7156
7157 // We need to generate the addresses and sizes if this is the last
7158 // component, if the component is a pointer or if it is an array section
7159 // whose length can't be proved to be one. If this is a pointer, it
7160 // becomes the base address for the following components.
7161
7162 // A final array section, is one whose length can't be proved to be one.
7163 // If the map item is non-contiguous then we don't treat any array section
7164 // as final array section.
7165 bool IsFinalArraySection =
7166 !IsNonContiguous &&
7167 isFinalArraySectionExpression(I->getAssociatedExpression());
7168
7169 // If we have a declaration for the mapping use that, otherwise use
7170 // the base declaration of the map clause.
7171 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7172 ? I->getAssociatedDeclaration()
7173 : BaseDecl;
7174 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7175 : MapExpr;
7176
7177 // Get information on whether the element is a pointer. Have to do a
7178 // special treatment for array sections given that they are built-in
7179 // types.
7180 const auto *OASE =
7181 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7182 const auto *OAShE =
7183 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7184 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7185 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7186 bool IsPointer =
7187 OAShE ||
7190 ->isAnyPointerType()) ||
7191 I->getAssociatedExpression()->getType()->isAnyPointerType();
7192 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7193 MapDecl &&
7194 MapDecl->getType()->isLValueReferenceType();
7195 bool IsNonDerefPointer = IsPointer &&
7196 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7197 !IsNonContiguous;
7198
7199 if (OASE)
7200 ++DimSize;
7201
7202 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7203 IsFinalArraySection) {
7204 // If this is not the last component, we expect the pointer to be
7205 // associated with an array expression or member expression.
7206 assert((Next == CE ||
7207 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7208 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7209 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7210 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7211 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7212 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7213 "Unexpected expression");
7214
7216 Address LowestElem = Address::invalid();
7217 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7218 const MemberExpr *E) {
7219 const Expr *BaseExpr = E->getBase();
7220 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7221 // scalar.
7222 LValue BaseLV;
7223 if (E->isArrow()) {
7224 LValueBaseInfo BaseInfo;
7225 TBAAAccessInfo TBAAInfo;
7226 Address Addr =
7227 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7228 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7229 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7230 } else {
7231 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7232 }
7233 return BaseLV;
7234 };
7235 if (OAShE) {
7236 LowestElem = LB =
7237 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7239 OAShE->getBase()->getType()->getPointeeType()),
7241 OAShE->getBase()->getType()));
7242 } else if (IsMemberReference) {
7243 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7244 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7245 LowestElem = CGF.EmitLValueForFieldInitialization(
7246 BaseLVal, cast<FieldDecl>(MapDecl))
7247 .getAddress(CGF);
7248 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7249 .getAddress(CGF);
7250 } else {
7251 LowestElem = LB =
7252 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7253 .getAddress(CGF);
7254 }
7255
7256 // If this component is a pointer inside the base struct then we don't
7257 // need to create any entry for it - it will be combined with the object
7258 // it is pointing to into a single PTR_AND_OBJ entry.
7259 bool IsMemberPointerOrAddr =
7260 EncounteredME &&
7261 (((IsPointer || ForDeviceAddr) &&
7262 I->getAssociatedExpression() == EncounteredME) ||
7263 (IsPrevMemberReference && !IsPointer) ||
7264 (IsMemberReference && Next != CE &&
7265 !Next->getAssociatedExpression()->getType()->isPointerType()));
7266 if (!OverlappedElements.empty() && Next == CE) {
7267 // Handle base element with the info for overlapped elements.
7268 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7269 assert(!IsPointer &&
7270 "Unexpected base element with the pointer type.");
7271 // Mark the whole struct as the struct that requires allocation on the
7272 // device.
7273 PartialStruct.LowestElem = {0, LowestElem};
7274 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7275 I->getAssociatedExpression()->getType());
7278 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7279 TypeSize.getQuantity() - 1);
7280 PartialStruct.HighestElem = {
7281 std::numeric_limits<decltype(
7282 PartialStruct.HighestElem.first)>::max(),
7283 HB};
7284 PartialStruct.Base = BP;
7285 PartialStruct.LB = LB;
7286 assert(
7287 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7288 "Overlapped elements must be used only once for the variable.");
7289 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7290 // Emit data for non-overlapped data.
7291 OpenMPOffloadMappingFlags Flags =
7292 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7293 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7294 /*AddPtrFlag=*/false,
7295 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7296 llvm::Value *Size = nullptr;
7297 // Do bitcopy of all non-overlapped structure elements.
7299 Component : OverlappedElements) {
7300 Address ComponentLB = Address::invalid();
7302 Component) {
7303 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7304 const auto *FD = dyn_cast<FieldDecl>(VD);
7305 if (FD && FD->getType()->isLValueReferenceType()) {
7306 const auto *ME =
7307 cast<MemberExpr>(MC.getAssociatedExpression());
7308 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7309 ComponentLB =
7310 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7311 .getAddress(CGF);
7312 } else {
7313 ComponentLB =
7314 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7315 .getAddress(CGF);
7316 }
7317 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7318 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7319 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7320 LBPtr);
7321 break;
7322 }
7323 }
7324 assert(Size && "Failed to determine structure size");
7325 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7326 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7327 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7328 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7329 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7330 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7331 Size, CGF.Int64Ty, /*isSigned=*/true));
7332 CombinedInfo.Types.push_back(Flags);
7333 CombinedInfo.Mappers.push_back(nullptr);
7334 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7335 : 1);
7336 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7337 }
7338 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7339 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7340 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7341 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7342 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7343 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7344 Size = CGF.Builder.CreatePtrDiff(
7345 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7346 LBPtr);
7347 CombinedInfo.Sizes.push_back(
7348 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7349 CombinedInfo.Types.push_back(Flags);
7350 CombinedInfo.Mappers.push_back(nullptr);
7351 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7352 : 1);
7353 break;
7354 }
7355 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7356 // Skip adding an entry in the CurInfo of this combined entry if the
7357 // whole struct is currently being mapped. The struct needs to be added
7358 // in the first position before any data internal to the struct is being
7359 // mapped.
7360 if (!IsMemberPointerOrAddr ||
7361 (Next == CE && MapType != OMPC_MAP_unknown)) {
7362 if (!IsMappingWholeStruct) {
7363 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7364 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7365 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7366 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7367 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7368 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7369 Size, CGF.Int64Ty, /*isSigned=*/true));
7370 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7371 : 1);
7372 } else {
7373 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7374 StructBaseCombinedInfo.BasePointers.push_back(
7375 BP.emitRawPointer(CGF));
7376 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7377 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7378 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7379 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7380 Size, CGF.Int64Ty, /*isSigned=*/true));
7381 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7382 IsNonContiguous ? DimSize : 1);
7383 }
7384
7385 // If Mapper is valid, the last component inherits the mapper.
7386 bool HasMapper = Mapper && Next == CE;
7387 if (!IsMappingWholeStruct)
7388 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7389 else
7390 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7391 : nullptr);
7392
7393 // We need to add a pointer flag for each map that comes from the
7394 // same expression except for the first one. We also need to signal
7395 // this map is the first one that relates with the current capture
7396 // (there is a set of entries for each capture).
7397 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7398 MapType, MapModifiers, MotionModifiers, IsImplicit,
7399 !IsExpressionFirstInfo || RequiresReference ||
7400 FirstPointerInComplexData || IsMemberReference,
7401 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7402
7403 if (!IsExpressionFirstInfo || IsMemberReference) {
7404 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7405 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7406 if (IsPointer || (IsMemberReference && Next != CE))
7407 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7408 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7409 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7410 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7411 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7412
7413 if (ShouldBeMemberOf) {
7414 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7415 // should be later updated with the correct value of MEMBER_OF.
7416 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7417 // From now on, all subsequent PTR_AND_OBJ entries should not be
7418 // marked as MEMBER_OF.
7419 ShouldBeMemberOf = false;
7420 }
7421 }
7422
7423 if (!IsMappingWholeStruct)
7424 CombinedInfo.Types.push_back(Flags);
7425 else
7426 StructBaseCombinedInfo.Types.push_back(Flags);
7427 }
7428
7429 // If we have encountered a member expression so far, keep track of the
7430 // mapped member. If the parent is "*this", then the value declaration
7431 // is nullptr.
7432 if (EncounteredME) {
7433 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7434 unsigned FieldIndex = FD->getFieldIndex();
7435
7436 // Update info about the lowest and highest elements for this struct
7437 if (!PartialStruct.Base.isValid()) {
7438 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7439 if (IsFinalArraySection) {
7440 Address HB =
7441 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7442 .getAddress(CGF);
7443 PartialStruct.HighestElem = {FieldIndex, HB};
7444 } else {
7445 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7446 }
7447 PartialStruct.Base = BP;
7448 PartialStruct.LB = BP;
7449 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7450 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7451 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7452 if (IsFinalArraySection) {
7453 Address HB =
7454 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7455 .getAddress(CGF);
7456 PartialStruct.HighestElem = {FieldIndex, HB};
7457 } else {
7458 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7459 }
7460 }
7461 }
7462
7463 // Need to emit combined struct for array sections.
7464 if (IsFinalArraySection || IsNonContiguous)
7465 PartialStruct.IsArraySection = true;
7466
7467 // If we have a final array section, we are done with this expression.
7468 if (IsFinalArraySection)
7469 break;
7470
7471 // The pointer becomes the base for the next element.
7472 if (Next != CE)
7473 BP = IsMemberReference ? LowestElem : LB;
7474
7475 IsExpressionFirstInfo = false;
7476 IsCaptureFirstInfo = false;
7477 FirstPointerInComplexData = false;
7478 IsPrevMemberReference = IsMemberReference;
7479 } else if (FirstPointerInComplexData) {
7480 QualType Ty = Components.rbegin()
7481 ->getAssociatedDeclaration()
7482 ->getType()
7483 .getNonReferenceType();
7484 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7485 FirstPointerInComplexData = false;
7486 }
7487 }
7488 // If ran into the whole component - allocate the space for the whole
7489 // record.
7490 if (!EncounteredME)
7491 PartialStruct.HasCompleteRecord = true;
7492
7493 if (!IsNonContiguous)
7494 return;
7495
7496 const ASTContext &Context = CGF.getContext();
7497
7498 // For supporting stride in array section, we need to initialize the first
7499 // dimension size as 1, first offset as 0, and first count as 1
7500 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7501 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7502 MapValuesArrayTy CurStrides;
7503 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7504 uint64_t ElementTypeSize;
7505
7506 // Collect Size information for each dimension and get the element size as
7507 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7508 // should be [10, 10] and the first stride is 4 btyes.
7510 Components) {
7511 const Expr *AssocExpr = Component.getAssociatedExpression();
7512 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7513
7514 if (!OASE)
7515 continue;
7516
7517 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7518 auto *CAT = Context.getAsConstantArrayType(Ty);
7519 auto *VAT = Context.getAsVariableArrayType(Ty);
7520
7521 // We need all the dimension size except for the last dimension.
7522 assert((VAT || CAT || &Component == &*Components.begin()) &&
7523 "Should be either ConstantArray or VariableArray if not the "
7524 "first Component");
7525
7526 // Get element size if CurStrides is empty.
7527 if (CurStrides.empty()) {
7528 const Type *ElementType = nullptr;
7529 if (CAT)
7530 ElementType = CAT->getElementType().getTypePtr();
7531 else if (VAT)
7532 ElementType = VAT->getElementType().getTypePtr();
7533 else
7534 assert(&Component == &*Components.begin() &&
7535 "Only expect pointer (non CAT or VAT) when this is the "
7536 "first Component");
7537 // If ElementType is null, then it means the base is a pointer
7538 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7539 // for next iteration.
7540 if (ElementType) {
7541 // For the case that having pointer as base, we need to remove one
7542 // level of indirection.
7543 if (&Component != &*Components.begin())
7544 ElementType = ElementType->getPointeeOrArrayElementType();
7545 ElementTypeSize =
7546 Context.getTypeSizeInChars(ElementType).getQuantity();
7547 CurStrides.push_back(
7548 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7549 }
7550 }
7551 // Get dimension value except for the last dimension since we don't need
7552 // it.
7553 if (DimSizes.size() < Components.size() - 1) {
7554 if (CAT)
7555 DimSizes.push_back(
7556 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7557 else if (VAT)
7558 DimSizes.push_back(CGF.Builder.CreateIntCast(
7559 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7560 /*IsSigned=*/false));
7561 }
7562 }
7563
7564 // Skip the dummy dimension since we have already have its information.
7565 auto *DI = DimSizes.begin() + 1;
7566 // Product of dimension.
7567 llvm::Value *DimProd =
7568 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7569
7570 // Collect info for non-contiguous. Notice that offset, count, and stride
7571 // are only meaningful for array-section, so we insert a null for anything
7572 // other than array-section.
7573 // Also, the size of offset, count, and stride are not the same as
7574 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7575 // count, and stride are the same as the number of non-contiguous
7576 // declaration in target update to/from clause.
7578 Components) {
7579 const Expr *AssocExpr = Component.getAssociatedExpression();
7580
7581 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7582 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7583 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7584 /*isSigned=*/false);
7585 CurOffsets.push_back(Offset);
7586 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7587 CurStrides.push_back(CurStrides.back());
7588 continue;
7589 }
7590
7591 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7592
7593 if (!OASE)
7594 continue;
7595
7596 // Offset
7597 const Expr *OffsetExpr = OASE->getLowerBound();
7598 llvm::Value *Offset = nullptr;
7599 if (!OffsetExpr) {
7600 // If offset is absent, then we just set it to zero.
7601 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7602 } else {
7603 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7604 CGF.Int64Ty,
7605 /*isSigned=*/false);
7606 }
7607 CurOffsets.push_back(Offset);
7608
7609 // Count
7610 const Expr *CountExpr = OASE->getLength();
7611 llvm::Value *Count = nullptr;
7612 if (!CountExpr) {
7613 // In Clang, once a high dimension is an array section, we construct all
7614 // the lower dimension as array section, however, for case like
7615 // arr[0:2][2], Clang construct the inner dimension as an array section
7616 // but it actually is not in an array section form according to spec.
7617 if (!OASE->getColonLocFirst().isValid() &&
7618 !OASE->getColonLocSecond().isValid()) {
7619 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7620 } else {
7621 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7622 // When the length is absent it defaults to ⌈(size −
7623 // lower-bound)/stride⌉, where size is the size of the array
7624 // dimension.
7625 const Expr *StrideExpr = OASE->getStride();
7626 llvm::Value *Stride =
7627 StrideExpr
7628 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7629 CGF.Int64Ty, /*isSigned=*/false)
7630 : nullptr;
7631 if (Stride)
7632 Count = CGF.Builder.CreateUDiv(
7633 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7634 else
7635 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7636 }
7637 } else {
7638 Count = CGF.EmitScalarExpr(CountExpr);
7639 }
7640 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7641 CurCounts.push_back(Count);
7642
7643 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7644 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7645 // Offset Count Stride
7646 // D0 0 1 4 (int) <- dummy dimension
7647 // D1 0 2 8 (2 * (1) * 4)
7648 // D2 1 2 20 (1 * (1 * 5) * 4)
7649 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7650 const Expr *StrideExpr = OASE->getStride();
7651 llvm::Value *Stride =
7652 StrideExpr
7653 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7654 CGF.Int64Ty, /*isSigned=*/false)
7655 : nullptr;
7656 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7657 if (Stride)
7658 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7659 else
7660 CurStrides.push_back(DimProd);
7661 if (DI != DimSizes.end())
7662 ++DI;
7663 }
7664
7665 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7666 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7667 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7668 }
7669
7670 /// Return the adjusted map modifiers if the declaration a capture refers to
7671 /// appears in a first-private clause. This is expected to be used only with
7672 /// directives that start with 'target'.
7673 OpenMPOffloadMappingFlags
7674 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7675 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7676
7677 // A first private variable captured by reference will use only the
7678 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7679 // declaration is known as first-private in this handler.
7680 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7681 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7682 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7683 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7684 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7685 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7686 }
7687 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7688 if (I != LambdasMap.end())
7689 // for map(to: lambda): using user specified map type.
7690 return getMapTypeBits(
7691 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7692 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7693 /*AddPtrFlag=*/false,
7694 /*AddIsTargetParamFlag=*/false,
7695 /*isNonContiguous=*/false);
7696 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7697 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7698 }
7699
7700 void getPlainLayout(const CXXRecordDecl *RD,
7702 bool AsBase) const {
7703 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7704
7705 llvm::StructType *St =
7706 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7707
7708 unsigned NumElements = St->getNumElements();
7710 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7711 RecordLayout(NumElements);
7712
7713 // Fill bases.
7714 for (const auto &I : RD->bases()) {
7715 if (I.isVirtual())
7716 continue;
7717 const auto *Base = I.getType()->getAsCXXRecordDecl();
7718 // Ignore empty bases.
7719 if (Base->isEmpty() || CGF.getContext()
7722 .isZero())
7723 continue;
7724
7725 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7726 RecordLayout[FieldIndex] = Base;
7727 }
7728 // Fill in virtual bases.
7729 for (const auto &I : RD->vbases()) {
7730 const auto *Base = I.getType()->getAsCXXRecordDecl();
7731 // Ignore empty bases.
7732 if (Base->isEmpty())
7733 continue;
7734 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7735 if (RecordLayout[FieldIndex])
7736 continue;
7737 RecordLayout[FieldIndex] = Base;
7738 }
7739 // Fill in all the fields.
7740 assert(!RD->isUnion() && "Unexpected union.");
7741 for (const auto *Field : RD->fields()) {
7742 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7743 // will fill in later.)
7744 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7745 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7746 RecordLayout[FieldIndex] = Field;
7747 }
7748 }
7749 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7750 &Data : RecordLayout) {
7751 if (Data.isNull())
7752 continue;
7753 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7754 getPlainLayout(Base, Layout, /*AsBase=*/true);
7755 else
7756 Layout.push_back(Data.get<const FieldDecl *>());
7757 }
7758 }
7759
7760 /// Generate all the base pointers, section pointers, sizes, map types, and
7761 /// mappers for the extracted mappable expressions (all included in \a
7762 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7763 /// pair of the relevant declaration and index where it occurs is appended to
7764 /// the device pointers info array.
7765 void generateAllInfoForClauses(
7766 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7767 llvm::OpenMPIRBuilder &OMPBuilder,
7768 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7770 // We have to process the component lists that relate with the same
7771 // declaration in a single chunk so that we can generate the map flags
7772 // correctly. Therefore, we organize all lists in a map.
7773 enum MapKind { Present, Allocs, Other, Total };
7774 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7776 Info;
7777
7778 // Helper function to fill the information map for the different supported
7779 // clauses.
7780 auto &&InfoGen =
7781 [&Info, &SkipVarSet](
7782 const ValueDecl *D, MapKind Kind,
7784 OpenMPMapClauseKind MapType,
7786 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7787 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7788 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7789 if (SkipVarSet.contains(D))
7790 return;
7791 auto It = Info.find(D);
7792 if (It == Info.end())
7793 It = Info
7794 .insert(std::make_pair(
7795 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7796 .first;
7797 It->second[Kind].emplace_back(
7798 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7799 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7800 };
7801
7802 for (const auto *Cl : Clauses) {
7803 const auto *C = dyn_cast<OMPMapClause>(Cl);
7804 if (!C)
7805 continue;
7806 MapKind Kind = Other;
7807 if (llvm::is_contained(C->getMapTypeModifiers(),
7808 OMPC_MAP_MODIFIER_present))
7809 Kind = Present;
7810 else if (C->getMapType() == OMPC_MAP_alloc)
7811 Kind = Allocs;
7812 const auto *EI = C->getVarRefs().begin();
7813 for (const auto L : C->component_lists()) {
7814 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7815 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7816 C->getMapTypeModifiers(), std::nullopt,
7817 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7818 E);
7819 ++EI;
7820 }
7821 }
7822 for (const auto *Cl : Clauses) {
7823 const auto *C = dyn_cast<OMPToClause>(Cl);
7824 if (!C)
7825 continue;
7826 MapKind Kind = Other;
7827 if (llvm::is_contained(C->getMotionModifiers(),
7828 OMPC_MOTION_MODIFIER_present))
7829 Kind = Present;
7830 const auto *EI = C->getVarRefs().begin();
7831 for (const auto L : C->component_lists()) {
7832 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7833 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7834 C->isImplicit(), std::get<2>(L), *EI);
7835 ++EI;
7836 }
7837 }
7838 for (const auto *Cl : Clauses) {
7839 const auto *C = dyn_cast<OMPFromClause>(Cl);
7840 if (!C)
7841 continue;
7842 MapKind Kind = Other;
7843 if (llvm::is_contained(C->getMotionModifiers(),
7844 OMPC_MOTION_MODIFIER_present))
7845 Kind = Present;
7846 const auto *EI = C->getVarRefs().begin();
7847 for (const auto L : C->component_lists()) {
7848 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7849 std::nullopt, C->getMotionModifiers(),
7850 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7851 *EI);
7852 ++EI;
7853 }
7854 }
7855
7856 // Look at the use_device_ptr and use_device_addr clauses information and
7857 // mark the existing map entries as such. If there is no map information for
7858 // an entry in the use_device_ptr and use_device_addr list, we create one
7859 // with map type 'alloc' and zero size section. It is the user fault if that
7860 // was not mapped before. If there is no map information and the pointer is
7861 // a struct member, then we defer the emission of that entry until the whole
7862 // struct has been processed.
7863 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7865 DeferredInfo;
7866 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7867
7868 auto &&UseDeviceDataCombinedInfoGen =
7869 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7870 CodeGenFunction &CGF, bool IsDevAddr) {
7871 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7872 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7873 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7874 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7875 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7876 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7877 UseDeviceDataCombinedInfo.Sizes.push_back(
7878 llvm::Constant::getNullValue(CGF.Int64Ty));
7879 UseDeviceDataCombinedInfo.Types.push_back(
7880 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7881 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7882 };
7883
7884 auto &&MapInfoGen =
7885 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7886 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7888 Components,
7889 bool IsImplicit, bool IsDevAddr) {
7890 // We didn't find any match in our map information - generate a zero
7891 // size array section - if the pointer is a struct member we defer
7892 // this action until the whole struct has been processed.
7893 if (isa<MemberExpr>(IE)) {
7894 // Insert the pointer into Info to be processed by
7895 // generateInfoForComponentList. Because it is a member pointer
7896 // without a pointee, no entry will be generated for it, therefore
7897 // we need to generate one after the whole struct has been
7898 // processed. Nonetheless, generateInfoForComponentList must be
7899 // called to take the pointer into account for the calculation of
7900 // the range of the partial struct.
7901 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7902 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7903 nullptr, nullptr, IsDevAddr);
7904 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7905 } else {
7906 llvm::Value *Ptr;
7907 if (IsDevAddr) {
7908 if (IE->isGLValue())
7909 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7910 else
7911 Ptr = CGF.EmitScalarExpr(IE);
7912 } else {
7913 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7914 }
7915 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7916 }
7917 };
7918
7919 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7920 const Expr *IE, bool IsDevAddr) -> bool {
7921 // We potentially have map information for this declaration already.
7922 // Look for the first set of components that refer to it. If found,
7923 // return true.
7924 // If the first component is a member expression, we have to look into
7925 // 'this', which maps to null in the map of map information. Otherwise
7926 // look directly for the information.
7927 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7928 if (It != Info.end()) {
7929 bool Found = false;
7930 for (auto &Data : It->second) {
7931 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7932 return MI.Components.back().getAssociatedDeclaration() == VD;
7933 });
7934 // If we found a map entry, signal that the pointer has to be
7935 // returned and move on to the next declaration. Exclude cases where
7936 // the base pointer is mapped as array subscript, array section or
7937 // array shaping. The base address is passed as a pointer to base in
7938 // this case and cannot be used as a base for use_device_ptr list
7939 // item.
7940 if (CI != Data.end()) {
7941 if (IsDevAddr) {
7942 CI->ForDeviceAddr = IsDevAddr;
7943 CI->ReturnDevicePointer = true;
7944 Found = true;
7945 break;
7946 } else {
7947 auto PrevCI = std::next(CI->Components.rbegin());
7948 const auto *VarD = dyn_cast<VarDecl>(VD);
7950 isa<MemberExpr>(IE) ||
7951 !VD->getType().getNonReferenceType()->isPointerType() ||
7952 PrevCI == CI->Components.rend() ||
7953 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7954 VarD->hasLocalStorage()) {
7955 CI->ForDeviceAddr = IsDevAddr;
7956 CI->ReturnDevicePointer = true;
7957 Found = true;
7958 break;
7959 }
7960 }
7961 }
7962 }
7963 return Found;
7964 }
7965 return false;
7966 };
7967
7968 // Look at the use_device_ptr clause information and mark the existing map
7969 // entries as such. If there is no map information for an entry in the
7970 // use_device_ptr list, we create one with map type 'alloc' and zero size
7971 // section. It is the user fault if that was not mapped before. If there is
7972 // no map information and the pointer is a struct member, then we defer the
7973 // emission of that entry until the whole struct has been processed.
7974 for (const auto *Cl : Clauses) {
7975 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7976 if (!C)
7977 continue;
7978 for (const auto L : C->component_lists()) {
7980 std::get<1>(L);
7981 assert(!Components.empty() &&
7982 "Not expecting empty list of components!");
7983 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7984 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7985 const Expr *IE = Components.back().getAssociatedExpression();
7986 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7987 continue;
7988 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7989 /*IsDevAddr=*/false);
7990 }
7991 }
7992
7993 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7994 for (const auto *Cl : Clauses) {
7995 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7996 if (!C)
7997 continue;
7998 for (const auto L : C->component_lists()) {
8000 std::get<1>(L);
8001 assert(!std::get<1>(L).empty() &&
8002 "Not expecting empty list of components!");
8003 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8004 if (!Processed.insert(VD).second)
8005 continue;
8006 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8007 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8008 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8009 continue;
8010 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8011 /*IsDevAddr=*/true);
8012 }
8013 }
8014
8015 for (const auto &Data : Info) {
8016 StructRangeInfoTy PartialStruct;
8017 // Current struct information:
8018 MapCombinedInfoTy CurInfo;
8019 // Current struct base information:
8020 MapCombinedInfoTy StructBaseCurInfo;
8021 const Decl *D = Data.first;
8022 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8023 for (const auto &M : Data.second) {
8024 for (const MapInfo &L : M) {
8025 assert(!L.Components.empty() &&
8026 "Not expecting declaration with no component lists.");
8027
8028 // Remember the current base pointer index.
8029 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8030 unsigned StructBasePointersIdx =
8031 StructBaseCurInfo.BasePointers.size();
8032 CurInfo.NonContigInfo.IsNonContiguous =
8033 L.Components.back().isNonContiguous();
8034 generateInfoForComponentList(
8035 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8036 CurInfo, StructBaseCurInfo, PartialStruct,
8037 /*IsFirstComponentList=*/false, L.IsImplicit,
8038 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8039 L.VarRef);
8040
8041 // If this entry relates to a device pointer, set the relevant
8042 // declaration and add the 'return pointer' flag.
8043 if (L.ReturnDevicePointer) {
8044 // Check whether a value was added to either CurInfo or
8045 // StructBaseCurInfo and error if no value was added to either of
8046 // them:
8047 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8048 StructBasePointersIdx <
8049 StructBaseCurInfo.BasePointers.size()) &&
8050 "Unexpected number of mapped base pointers.");
8051
8052 // Choose a base pointer index which is always valid:
8053 const ValueDecl *RelevantVD =
8054 L.Components.back().getAssociatedDeclaration();
8055 assert(RelevantVD &&
8056 "No relevant declaration related with device pointer??");
8057
8058 // If StructBaseCurInfo has been updated this iteration then work on
8059 // the first new entry added to it i.e. make sure that when multiple
8060 // values are added to any of the lists, the first value added is
8061 // being modified by the assignments below (not the last value
8062 // added).
8063 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8064 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8065 RelevantVD;
8066 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8067 L.ForDeviceAddr ? DeviceInfoTy::Address
8068 : DeviceInfoTy::Pointer;
8069 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8070 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8071 } else {
8072 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8073 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8074 L.ForDeviceAddr ? DeviceInfoTy::Address
8075 : DeviceInfoTy::Pointer;
8076 CurInfo.Types[CurrentBasePointersIdx] |=
8077 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8078 }
8079 }
8080 }
8081 }
8082
8083 // Append any pending zero-length pointers which are struct members and
8084 // used with use_device_ptr or use_device_addr.
8085 auto CI = DeferredInfo.find(Data.first);
8086 if (CI != DeferredInfo.end()) {
8087 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8088 llvm::Value *BasePtr;
8089 llvm::Value *Ptr;
8090 if (L.ForDeviceAddr) {
8091 if (L.IE->isGLValue())
8092 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8093 else
8094 Ptr = this->CGF.EmitScalarExpr(L.IE);
8095 BasePtr = Ptr;
8096 // Entry is RETURN_PARAM. Also, set the placeholder value
8097 // MEMBER_OF=FFFF so that the entry is later updated with the
8098 // correct value of MEMBER_OF.
8099 CurInfo.Types.push_back(
8100 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8101 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8102 } else {
8103 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8104 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8105 L.IE->getExprLoc());
8106 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8107 // placeholder value MEMBER_OF=FFFF so that the entry is later
8108 // updated with the correct value of MEMBER_OF.
8109 CurInfo.Types.push_back(
8110 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8111 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8112 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8113 }
8114 CurInfo.Exprs.push_back(L.VD);
8115 CurInfo.BasePointers.emplace_back(BasePtr);
8116 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8117 CurInfo.DevicePointers.emplace_back(
8118 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8119 CurInfo.Pointers.push_back(Ptr);
8120 CurInfo.Sizes.push_back(
8121 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8122 CurInfo.Mappers.push_back(nullptr);
8123 }
8124 }
8125
8126 // Unify entries in one list making sure the struct mapping precedes the
8127 // individual fields:
8128 MapCombinedInfoTy UnionCurInfo;
8129 UnionCurInfo.append(StructBaseCurInfo);
8130 UnionCurInfo.append(CurInfo);
8131
8132 // If there is an entry in PartialStruct it means we have a struct with
8133 // individual members mapped. Emit an extra combined entry.
8134 if (PartialStruct.Base.isValid()) {
8135 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8136 // Emit a combined entry:
8137 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8138 /*IsMapThis*/ !VD, OMPBuilder, VD);
8139 }
8140
8141 // We need to append the results of this capture to what we already have.
8142 CombinedInfo.append(UnionCurInfo);
8143 }
8144 // Append data for use_device_ptr clauses.
8145 CombinedInfo.append(UseDeviceDataCombinedInfo);
8146 }
8147
8148public:
8149 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8150 : CurDir(&Dir), CGF(CGF) {
8151 // Extract firstprivate clause information.
8152 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8153 for (const auto *D : C->varlists())
8154 FirstPrivateDecls.try_emplace(
8155 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8156 // Extract implicit firstprivates from uses_allocators clauses.
8157 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8158 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8159 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8160 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8161 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8162 /*Implicit=*/true);
8163 else if (const auto *VD = dyn_cast<VarDecl>(
8164 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8165 ->getDecl()))
8166 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8167 }
8168 }
8169 // Extract device pointer clause information.
8170 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8171 for (auto L : C->component_lists())
8172 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8173 // Extract device addr clause information.
8174 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8175 for (auto L : C->component_lists())
8176 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8177 // Extract map information.
8178 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8179 if (C->getMapType() != OMPC_MAP_to)
8180 continue;
8181 for (auto L : C->component_lists()) {
8182 const ValueDecl *VD = std::get<0>(L);
8183 const auto *RD = VD ? VD->getType()
8187 : nullptr;
8188 if (RD && RD->isLambda())
8189 LambdasMap.try_emplace(std::get<0>(L), C);
8190 }
8191 }
8192 }
8193
8194 /// Constructor for the declare mapper directive.
8195 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8196 : CurDir(&Dir), CGF(CGF) {}
8197
8198 /// Generate code for the combined entry if we have a partially mapped struct
8199 /// and take care of the mapping flags of the arguments corresponding to
8200 /// individual struct members.
8201 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8202 MapFlagsArrayTy &CurTypes,
8203 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8204 llvm::OpenMPIRBuilder &OMPBuilder,
8205 const ValueDecl *VD = nullptr,
8206 bool NotTargetParams = true) const {
8207 if (CurTypes.size() == 1 &&
8208 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8209 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8210 !PartialStruct.IsArraySection)
8211 return;
8212 Address LBAddr = PartialStruct.LowestElem.second;
8213 Address HBAddr = PartialStruct.HighestElem.second;
8214 if (PartialStruct.HasCompleteRecord) {
8215 LBAddr = PartialStruct.LB;
8216 HBAddr = PartialStruct.LB;
8217 }
8218 CombinedInfo.Exprs.push_back(VD);
8219 // Base is the base of the struct
8220 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8221 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8222 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8223 // Pointer is the address of the lowest element
8224 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8225 const CXXMethodDecl *MD =
8226 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8227 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8228 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8229 // There should not be a mapper for a combined entry.
8230 if (HasBaseClass) {
8231 // OpenMP 5.2 148:21:
8232 // If the target construct is within a class non-static member function,
8233 // and a variable is an accessible data member of the object for which the
8234 // non-static data member function is invoked, the variable is treated as
8235 // if the this[:1] expression had appeared in a map clause with a map-type
8236 // of tofrom.
8237 // Emit this[:1]
8238 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8240 llvm::Value *Size =
8241 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8242 /*isSigned=*/true);
8243 CombinedInfo.Sizes.push_back(Size);
8244 } else {
8245 CombinedInfo.Pointers.push_back(LB);
8246 // Size is (addr of {highest+1} element) - (addr of lowest element)
8247 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8248 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8249 HBAddr.getElementType(), HB, /*Idx0=*/1);
8250 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8251 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8252 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8253 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8254 /*isSigned=*/false);
8255 CombinedInfo.Sizes.push_back(Size);
8256 }
8257 CombinedInfo.Mappers.push_back(nullptr);
8258 // Map type is always TARGET_PARAM, if generate info for captures.
8259 CombinedInfo.Types.push_back(
8260 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8261 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8262 // If any element has the present modifier, then make sure the runtime
8263 // doesn't attempt to allocate the struct.
8264 if (CurTypes.end() !=
8265 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8266 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8267 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8268 }))
8269 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8270 // Remove TARGET_PARAM flag from the first element
8271 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8272 // If any element has the ompx_hold modifier, then make sure the runtime
8273 // uses the hold reference count for the struct as a whole so that it won't
8274 // be unmapped by an extra dynamic reference count decrement. Add it to all
8275 // elements as well so the runtime knows which reference count to check
8276 // when determining whether it's time for device-to-host transfers of
8277 // individual elements.
8278 if (CurTypes.end() !=
8279 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8280 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8281 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8282 })) {
8283 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8284 for (auto &M : CurTypes)
8285 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8286 }
8287
8288 // All other current entries will be MEMBER_OF the combined entry
8289 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8290 // 0xFFFF in the MEMBER_OF field).
8291 OpenMPOffloadMappingFlags MemberOfFlag =
8292 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8293 for (auto &M : CurTypes)
8294 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8295 }
8296
8297 /// Generate all the base pointers, section pointers, sizes, map types, and
8298 /// mappers for the extracted mappable expressions (all included in \a
8299 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8300 /// pair of the relevant declaration and index where it occurs is appended to
8301 /// the device pointers info array.
8302 void generateAllInfo(
8303 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8304 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8306 assert(CurDir.is<const OMPExecutableDirective *>() &&
8307 "Expect a executable directive");
8308 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8309 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8310 SkipVarSet);
8311 }
8312
8313 /// Generate all the base pointers, section pointers, sizes, map types, and
8314 /// mappers for the extracted map clauses of user-defined mapper (all included
8315 /// in \a CombinedInfo).
8316 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8317 llvm::OpenMPIRBuilder &OMPBuilder) const {
8318 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8319 "Expect a declare mapper directive");
8320 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8321 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8322 OMPBuilder);
8323 }
8324
8325 /// Emit capture info for lambdas for variables captured by reference.
8326 void generateInfoForLambdaCaptures(
8327 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8328 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8330 const auto *RD = VDType->getAsCXXRecordDecl();
8331 if (!RD || !RD->isLambda())
8332 return;
8333 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8334 CGF.getContext().getDeclAlign(VD));
8335 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8336 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8337 FieldDecl *ThisCapture = nullptr;
8338 RD->getCaptureFields(Captures, ThisCapture);
8339 if (ThisCapture) {
8340 LValue ThisLVal =
8341 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8342 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8343 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8344 VDLVal.getPointer(CGF));
8345 CombinedInfo.Exprs.push_back(VD);
8346 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8347 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8348 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8349 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8350 CombinedInfo.Sizes.push_back(
8351 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8352 CGF.Int64Ty, /*isSigned=*/true));
8353 CombinedInfo.Types.push_back(
8354 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8355 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8356 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8357 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8358 CombinedInfo.Mappers.push_back(nullptr);
8359 }
8360 for (const LambdaCapture &LC : RD->captures()) {
8361 if (!LC.capturesVariable())
8362 continue;
8363 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8364 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8365 continue;
8366 auto It = Captures.find(VD);
8367 assert(It != Captures.end() && "Found lambda capture without field.");
8368 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8369 if (LC.getCaptureKind() == LCK_ByRef) {
8370 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8371 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8372 VDLVal.getPointer(CGF));
8373 CombinedInfo.Exprs.push_back(VD);
8374 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8375 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8376 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8377 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8378 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8379 CGF.getTypeSize(
8381 CGF.Int64Ty, /*isSigned=*/true));
8382 } else {
8383 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8384 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8385 VDLVal.getPointer(CGF));
8386 CombinedInfo.Exprs.push_back(VD);
8387 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8388 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8389 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8390 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8391 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8392 }
8393 CombinedInfo.Types.push_back(
8394 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8395 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8396 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8397 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8398 CombinedInfo.Mappers.push_back(nullptr);
8399 }
8400 }
8401
8402 /// Set correct indices for lambdas captures.
8403 void adjustMemberOfForLambdaCaptures(
8404 llvm::OpenMPIRBuilder &OMPBuilder,
8405 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8406 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8407 MapFlagsArrayTy &Types) const {
8408 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8409 // Set correct member_of idx for all implicit lambda captures.
8410 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8411 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8412 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8413 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8414 continue;
8415 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8416 assert(BasePtr && "Unable to find base lambda address.");
8417 int TgtIdx = -1;
8418 for (unsigned J = I; J > 0; --J) {
8419 unsigned Idx = J - 1;
8420 if (Pointers[Idx] != BasePtr)
8421 continue;
8422 TgtIdx = Idx;
8423 break;
8424 }
8425 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8426 // All other current entries will be MEMBER_OF the combined entry
8427 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8428 // 0xFFFF in the MEMBER_OF field).
8429 OpenMPOffloadMappingFlags MemberOfFlag =
8430 OMPBuilder.getMemberOfFlag(TgtIdx);
8431 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8432 }
8433 }
8434
8435 /// Generate the base pointers, section pointers, sizes, map types, and
8436 /// mappers associated to a given capture (all included in \a CombinedInfo).
8437 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8438 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8439 StructRangeInfoTy &PartialStruct) const {
8440 assert(!Cap->capturesVariableArrayType() &&
8441 "Not expecting to generate map info for a variable array type!");
8442
8443 // We need to know when we generating information for the first component
8444 const ValueDecl *VD = Cap->capturesThis()
8445 ? nullptr
8446 : Cap->getCapturedVar()->getCanonicalDecl();
8447
8448 // for map(to: lambda): skip here, processing it in
8449 // generateDefaultMapInfo
8450 if (LambdasMap.count(VD))
8451 return;
8452
8453 // If this declaration appears in a is_device_ptr clause we just have to
8454 // pass the pointer by value. If it is a reference to a declaration, we just
8455 // pass its value.
8456 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8457 CombinedInfo.Exprs.push_back(VD);
8458 CombinedInfo.BasePointers.emplace_back(Arg);
8459 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8460 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8461 CombinedInfo.Pointers.push_back(Arg);
8462 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8463 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8464 /*isSigned=*/true));
8465 CombinedInfo.Types.push_back(
8466 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8467 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8468 CombinedInfo.Mappers.push_back(nullptr);
8469 return;
8470 }
8471
8472 using MapData =
8475 const ValueDecl *, const Expr *>;
8476 SmallVector<MapData, 4> DeclComponentLists;
8477 // For member fields list in is_device_ptr, store it in
8478 // DeclComponentLists for generating components info.
8480 auto It = DevPointersMap.find(VD);
8481 if (It != DevPointersMap.end())
8482 for (const auto &MCL : It->second)
8483 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8484 /*IsImpicit = */ true, nullptr,
8485 nullptr);
8486 auto I = HasDevAddrsMap.find(VD);
8487 if (I != HasDevAddrsMap.end())
8488 for (const auto &MCL : I->second)
8489 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8490 /*IsImpicit = */ true, nullptr,
8491 nullptr);
8492 assert(CurDir.is<const OMPExecutableDirective *>() &&
8493 "Expect a executable directive");
8494 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8495 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8496 const auto *EI = C->getVarRefs().begin();
8497 for (const auto L : C->decl_component_lists(VD)) {
8498 const ValueDecl *VDecl, *Mapper;
8499 // The Expression is not correct if the mapping is implicit
8500 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8502 std::tie(VDecl, Components, Mapper) = L;
8503 assert(VDecl == VD && "We got information for the wrong declaration??");
8504 assert(!Components.empty() &&
8505 "Not expecting declaration with no component lists.");
8506 DeclComponentLists.emplace_back(Components, C->getMapType(),
8507 C->getMapTypeModifiers(),
8508 C->isImplicit(), Mapper, E);
8509 ++EI;
8510 }
8511 }
8512 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8513 const MapData &RHS) {
8514 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8515 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8516 bool HasPresent =
8517 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8518 bool HasAllocs = MapType == OMPC_MAP_alloc;
8519 MapModifiers = std::get<2>(RHS);
8520 MapType = std::get<1>(LHS);
8521 bool HasPresentR =
8522 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8523 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8524 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8525 });
8526
8527 // Find overlapping elements (including the offset from the base element).
8528 llvm::SmallDenseMap<
8529 const MapData *,
8532 4>
8533 OverlappedData;
8534 size_t Count = 0;
8535 for (const MapData &L : DeclComponentLists) {
8537 OpenMPMapClauseKind MapType;
8539 bool IsImplicit;
8540 const ValueDecl *Mapper;
8541 const Expr *VarRef;
8542 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8543 L;
8544 ++Count;
8545 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8547 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8548 VarRef) = L1;
8549 auto CI = Components.rbegin();
8550 auto CE = Components.rend();
8551 auto SI = Components1.rbegin();
8552 auto SE = Components1.rend();
8553 for (; CI != CE && SI != SE; ++CI, ++SI) {
8554 if (CI->getAssociatedExpression()->getStmtClass() !=
8555 SI->getAssociatedExpression()->getStmtClass())
8556 break;
8557 // Are we dealing with different variables/fields?
8558 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8559 break;
8560 }
8561 // Found overlapping if, at least for one component, reached the head
8562 // of the components list.
8563 if (CI == CE || SI == SE) {
8564 // Ignore it if it is the same component.
8565 if (CI == CE && SI == SE)
8566 continue;
8567 const auto It = (SI == SE) ? CI : SI;
8568 // If one component is a pointer and another one is a kind of
8569 // dereference of this pointer (array subscript, section, dereference,
8570 // etc.), it is not an overlapping.
8571 // Same, if one component is a base and another component is a
8572 // dereferenced pointer memberexpr with the same base.
8573 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8574 (std::prev(It)->getAssociatedDeclaration() &&
8575 std::prev(It)
8576 ->getAssociatedDeclaration()
8577 ->getType()
8578 ->isPointerType()) ||
8579 (It->getAssociatedDeclaration() &&
8580 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8581 std::next(It) != CE && std::next(It) != SE))
8582 continue;
8583 const MapData &BaseData = CI == CE ? L : L1;
8585 SI == SE ? Components : Components1;
8586 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8587 OverlappedElements.getSecond().push_back(SubData);
8588 }
8589 }
8590 }
8591 // Sort the overlapped elements for each item.
8593 if (!OverlappedData.empty()) {
8594 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8595 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8596 while (BaseType != OrigType) {
8597 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8598 OrigType = BaseType->getPointeeOrArrayElementType();
8599 }
8600
8601 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8602 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8603 else {
8604 const auto *RD = BaseType->getAsRecordDecl();
8605 Layout.append(RD->field_begin(), RD->field_end());
8606 }
8607 }
8608 for (auto &Pair : OverlappedData) {
8609 llvm::stable_sort(
8610 Pair.getSecond(),
8611 [&Layout](
8614 Second) {
8615 auto CI = First.rbegin();
8616 auto CE = First.rend();
8617 auto SI = Second.rbegin();
8618 auto SE = Second.rend();
8619 for (; CI != CE && SI != SE; ++CI, ++SI) {
8620 if (CI->getAssociatedExpression()->getStmtClass() !=
8621 SI->getAssociatedExpression()->getStmtClass())
8622 break;
8623 // Are we dealing with different variables/fields?
8624 if (CI->getAssociatedDeclaration() !=
8625 SI->getAssociatedDeclaration())
8626 break;
8627 }
8628
8629 // Lists contain the same elements.
8630 if (CI == CE && SI == SE)
8631 return false;
8632
8633 // List with less elements is less than list with more elements.
8634 if (CI == CE || SI == SE)
8635 return CI == CE;
8636
8637 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8638 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8639 if (FD1->getParent() == FD2->getParent())
8640 return FD1->getFieldIndex() < FD2->getFieldIndex();
8641 const auto *It =
8642 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8643 return FD == FD1 || FD == FD2;
8644 });
8645 return *It == FD1;
8646 });
8647 }
8648
8649 // Associated with a capture, because the mapping flags depend on it.
8650 // Go through all of the elements with the overlapped elements.
8651 bool IsFirstComponentList = true;
8652 MapCombinedInfoTy StructBaseCombinedInfo;
8653 for (const auto &Pair : OverlappedData) {
8654 const MapData &L = *Pair.getFirst();
8656 OpenMPMapClauseKind MapType;
8658 bool IsImplicit;
8659 const ValueDecl *Mapper;
8660 const Expr *VarRef;
8661 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8662 L;
8664 OverlappedComponents = Pair.getSecond();
8665 generateInfoForComponentList(
8666 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8667 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8668 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8669 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8670 IsFirstComponentList = false;
8671 }
8672 // Go through other elements without overlapped elements.
8673 for (const MapData &L : DeclComponentLists) {
8675 OpenMPMapClauseKind MapType;
8677 bool IsImplicit;
8678 const ValueDecl *Mapper;
8679 const Expr *VarRef;
8680 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8681 L;
8682 auto It = OverlappedData.find(&L);
8683 if (It == OverlappedData.end())
8684 generateInfoForComponentList(
8685 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8686 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8687 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8688 /*ForDeviceAddr=*/false, VD, VarRef);
8689 IsFirstComponentList = false;
8690 }
8691 }
8692
8693 /// Generate the default map information for a given capture \a CI,
8694 /// record field declaration \a RI and captured value \a CV.
8695 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8696 const FieldDecl &RI, llvm::Value *CV,
8697 MapCombinedInfoTy &CombinedInfo) const {
8698 bool IsImplicit = true;
8699 // Do the default mapping.
8700 if (CI.capturesThis()) {
8701 CombinedInfo.Exprs.push_back(nullptr);
8702 CombinedInfo.BasePointers.push_back(CV);
8703 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8704 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8705 CombinedInfo.Pointers.push_back(CV);
8706 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8707 CombinedInfo.Sizes.push_back(
8708 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8709 CGF.Int64Ty, /*isSigned=*/true));
8710 // Default map type.
8711 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8712 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8713 } else if (CI.capturesVariableByCopy()) {
8714 const VarDecl *VD = CI.getCapturedVar();
8715 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8716 CombinedInfo.BasePointers.push_back(CV);
8717 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8718 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8719 CombinedInfo.Pointers.push_back(CV);
8720 if (!RI.getType()->isAnyPointerType()) {
8721 // We have to signal to the runtime captures passed by value that are
8722 // not pointers.
8723 CombinedInfo.Types.push_back(
8724 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8725 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8726 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8727 } else {
8728 // Pointers are implicitly mapped with a zero size and no flags
8729 // (other than first map that is added for all implicit maps).
8730 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8731 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8732 }
8733 auto I = FirstPrivateDecls.find(VD);
8734 if (I != FirstPrivateDecls.end())
8735 IsImplicit = I->getSecond();
8736 } else {
8737 assert(CI.capturesVariable() && "Expected captured reference.");
8738 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8739 QualType ElementType = PtrTy->getPointeeType();
8740 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8741 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8742 // The default map type for a scalar/complex type is 'to' because by
8743 // default the value doesn't have to be retrieved. For an aggregate
8744 // type, the default is 'tofrom'.
8745 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8746 const VarDecl *VD = CI.getCapturedVar();
8747 auto I = FirstPrivateDecls.find(VD);
8748 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8749 CombinedInfo.BasePointers.push_back(CV);
8750 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8751 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8752 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8753 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8754 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8756 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8757 } else {
8758 CombinedInfo.Pointers.push_back(CV);
8759 }
8760 if (I != FirstPrivateDecls.end())
8761 IsImplicit = I->getSecond();
8762 }
8763 // Every default map produces a single argument which is a target parameter.
8764 CombinedInfo.Types.back() |=
8765 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8766
8767 // Add flag stating this is an implicit map.
8768 if (IsImplicit)
8769 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8770
8771 // No user-defined mapper for default mapping.
8772 CombinedInfo.Mappers.push_back(nullptr);
8773 }
8774};
8775} // anonymous namespace
8776
8777// Try to extract the base declaration from a `this->x` expression if possible.
8779 if (!E)
8780 return nullptr;
8781
8782 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8783 if (const MemberExpr *ME =
8784 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8785 return ME->getMemberDecl();
8786 return nullptr;
8787}
8788
8789/// Emit a string constant containing the names of the values mapped to the
8790/// offloading runtime library.
8791llvm::Constant *
8792emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8793 MappableExprsHandler::MappingExprInfo &MapExprs) {
8794
8795 uint32_t SrcLocStrSize;
8796 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8797 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8798
8799 SourceLocation Loc;
8800 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8801 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8802 Loc = VD->getLocation();
8803 else
8804 Loc = MapExprs.getMapExpr()->getExprLoc();
8805 } else {
8806 Loc = MapExprs.getMapDecl()->getLocation();
8807 }
8808
8809 std::string ExprName;
8810 if (MapExprs.getMapExpr()) {
8812 llvm::raw_string_ostream OS(ExprName);
8813 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8814 OS.flush();
8815 } else {
8816 ExprName = MapExprs.getMapDecl()->getNameAsString();
8817 }
8818
8820 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8821 PLoc.getLine(), PLoc.getColumn(),
8822 SrcLocStrSize);
8823}
8824
8825/// Emit the arrays used to pass the captures and map information to the
8826/// offloading runtime library. If there is no map or capture information,
8827/// return nullptr by reference.
8829 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8830 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8831 bool IsNonContiguous = false) {
8832 CodeGenModule &CGM = CGF.CGM;
8833
8834 // Reset the array information.
8835 Info.clearArrayInfo();
8836 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8837
8838 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8839 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8840 CGF.AllocaInsertPt->getIterator());
8841 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8842 CGF.Builder.GetInsertPoint());
8843
8844 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8845 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8846 };
8847 if (CGM.getCodeGenOpts().getDebugInfo() !=
8848 llvm::codegenoptions::NoDebugInfo) {
8849 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8850 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8851 FillInfoMap);
8852 }
8853
8854 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8855 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8856 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8857 }
8858 };
8859
8860 auto CustomMapperCB = [&](unsigned int I) {
8861 llvm::Value *MFunc = nullptr;
8862 if (CombinedInfo.Mappers[I]) {
8863 Info.HasMapper = true;
8865 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8866 }
8867 return MFunc;
8868 };
8869 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8870 /*IsNonContiguous=*/true, DeviceAddrCB,
8871 CustomMapperCB);
8872}
8873
8874/// Check for inner distribute directive.
8875static const OMPExecutableDirective *
8877 const auto *CS = D.getInnermostCapturedStmt();
8878 const auto *Body =
8879 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8880 const Stmt *ChildStmt =
8882
8883 if (const auto *NestedDir =
8884 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8885 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8886 switch (D.getDirectiveKind()) {
8887 case OMPD_target:
8888 // For now, treat 'target' with nested 'teams loop' as if it's
8889 // distributed (target teams distribute).
8890 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8891 return NestedDir;
8892 if (DKind == OMPD_teams) {
8893 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8894 /*IgnoreCaptured=*/true);
8895 if (!Body)
8896 return nullptr;
8897 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8898 if (const auto *NND =
8899 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8900 DKind = NND->getDirectiveKind();
8901 if (isOpenMPDistributeDirective(DKind))
8902 return NND;
8903 }
8904 }
8905 return nullptr;
8906 case OMPD_target_teams:
8907 if (isOpenMPDistributeDirective(DKind))
8908 return NestedDir;
8909 return nullptr;
8910 case OMPD_target_parallel:
8911 case OMPD_target_simd:
8912 case OMPD_target_parallel_for:
8913 case OMPD_target_parallel_for_simd:
8914 return nullptr;
8915 case OMPD_target_teams_distribute:
8916 case OMPD_target_teams_distribute_simd:
8917 case OMPD_target_teams_distribute_parallel_for:
8918 case OMPD_target_teams_distribute_parallel_for_simd:
8919 case OMPD_parallel:
8920 case OMPD_for:
8921 case OMPD_parallel_for:
8922 case OMPD_parallel_master:
8923 case OMPD_parallel_sections:
8924 case OMPD_for_simd:
8925 case OMPD_parallel_for_simd:
8926 case OMPD_cancel:
8927 case OMPD_cancellation_point:
8928 case OMPD_ordered:
8929 case OMPD_threadprivate:
8930 case OMPD_allocate:
8931 case OMPD_task:
8932 case OMPD_simd:
8933 case OMPD_tile:
8934 case OMPD_unroll:
8935 case OMPD_sections:
8936 case OMPD_section:
8937 case OMPD_single:
8938 case OMPD_master:
8939 case OMPD_critical:
8940 case OMPD_taskyield:
8941 case OMPD_barrier:
8942 case OMPD_taskwait:
8943 case OMPD_taskgroup:
8944 case OMPD_atomic:
8945 case OMPD_flush:
8946 case OMPD_depobj:
8947 case OMPD_scan:
8948 case OMPD_teams:
8949 case OMPD_target_data:
8950 case OMPD_target_exit_data:
8951 case OMPD_target_enter_data:
8952 case OMPD_distribute:
8953 case OMPD_distribute_simd:
8954 case OMPD_distribute_parallel_for:
8955 case OMPD_distribute_parallel_for_simd:
8956 case OMPD_teams_distribute:
8957 case OMPD_teams_distribute_simd:
8958 case OMPD_teams_distribute_parallel_for:
8959 case OMPD_teams_distribute_parallel_for_simd:
8960 case OMPD_target_update:
8961 case OMPD_declare_simd:
8962 case OMPD_declare_variant:
8963 case OMPD_begin_declare_variant:
8964 case OMPD_end_declare_variant:
8965 case OMPD_declare_target:
8966 case OMPD_end_declare_target:
8967 case OMPD_declare_reduction:
8968 case OMPD_declare_mapper:
8969 case OMPD_taskloop:
8970 case OMPD_taskloop_simd:
8971 case OMPD_master_taskloop:
8972 case OMPD_master_taskloop_simd:
8973 case OMPD_parallel_master_taskloop:
8974 case OMPD_parallel_master_taskloop_simd:
8975 case OMPD_requires:
8976 case OMPD_metadirective:
8977 case OMPD_unknown:
8978 default:
8979 llvm_unreachable("Unexpected directive.");
8980 }
8981 }
8982
8983 return nullptr;
8984}
8985
8986/// Emit the user-defined mapper function. The code generation follows the
8987/// pattern in the example below.
8988/// \code
8989/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8990/// void *base, void *begin,
8991/// int64_t size, int64_t type,
8992/// void *name = nullptr) {
8993/// // Allocate space for an array section first or add a base/begin for
8994/// // pointer dereference.
8995/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8996/// !maptype.IsDelete)
8997/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8998/// size*sizeof(Ty), clearToFromMember(type));
8999/// // Map members.
9000/// for (unsigned i = 0; i < size; i++) {
9001/// // For each component specified by this mapper:
9002/// for (auto c : begin[i]->all_components) {
9003/// if (c.hasMapper())
9004/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9005/// c.arg_type, c.arg_name);
9006/// else
9007/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9008/// c.arg_begin, c.arg_size, c.arg_type,
9009/// c.arg_name);
9010/// }
9011/// }
9012/// // Delete the array section.
9013/// if (size > 1 && maptype.IsDelete)
9014/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9015/// size*sizeof(Ty), clearToFromMember(type));
9016/// }
9017/// \endcode
9019 CodeGenFunction *CGF) {
9020 if (UDMMap.count(D) > 0)
9021 return;
9023 QualType Ty = D->getType();
9024 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9025 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9026 auto *MapperVarDecl =
9027 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9028 SourceLocation Loc = D->getLocation();
9029 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9030 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9031
9032 // Prepare mapper function arguments and attributes.
9033 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9034 C.VoidPtrTy, ImplicitParamKind::Other);
9035 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9037 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9038 C.VoidPtrTy, ImplicitParamKind::Other);
9039 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9041 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9043 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9045 FunctionArgList Args;
9046 Args.push_back(&HandleArg);
9047 Args.push_back(&BaseArg);
9048 Args.push_back(&BeginArg);
9049 Args.push_back(&SizeArg);
9050 Args.push_back(&TypeArg);
9051 Args.push_back(&NameArg);
9052 const CGFunctionInfo &FnInfo =
9054 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9055 SmallString<64> TyStr;
9056 llvm::raw_svector_ostream Out(TyStr);
9058 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9059 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9060 Name, &CGM.getModule());
9062 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9063 // Start the mapper function code generation.
9064 CodeGenFunction MapperCGF(CGM);
9065 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9066 // Compute the starting and end addresses of array elements.
9067 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9068 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9069 C.getPointerType(Int64Ty), Loc);
9070 // Prepare common arguments for array initiation and deletion.
9071 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9072 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9073 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9074 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9075 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9076 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9077 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9078 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9079 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9080 // Convert the size in bytes into the number of array elements.
9081 Size = MapperCGF.Builder.CreateExactUDiv(
9082 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9083 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9084 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9085 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9086 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9087 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9088 C.getPointerType(Int64Ty), Loc);
9089 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9090 MapperCGF.GetAddrOfLocalVar(&NameArg),
9091 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9092
9093 // Emit array initiation if this is an array section and \p MapType indicates
9094 // that memory allocation is required.
9095 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9096 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9097 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9098
9099 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9100
9101 // Emit the loop header block.
9102 MapperCGF.EmitBlock(HeadBB);
9103 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9104 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9105 // Evaluate whether the initial condition is satisfied.
9106 llvm::Value *IsEmpty =
9107 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9108 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9109 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9110
9111 // Emit the loop body block.
9112 MapperCGF.EmitBlock(BodyBB);
9113 llvm::BasicBlock *LastBB = BodyBB;
9114 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9115 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9116 PtrPHI->addIncoming(PtrBegin, EntryBB);
9117 Address PtrCurrent(PtrPHI, ElemTy,
9118 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9119 .getAlignment()
9120 .alignmentOfArrayElement(ElementSize));
9121 // Privatize the declared variable of mapper to be the current array element.
9123 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9124 (void)Scope.Privatize();
9125
9126 // Get map clause information. Fill up the arrays with all mapped variables.
9127 MappableExprsHandler::MapCombinedInfoTy Info;
9128 MappableExprsHandler MEHandler(*D, MapperCGF);
9129 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9130
9131 // Call the runtime API __tgt_mapper_num_components to get the number of
9132 // pre-existing components.
9133 llvm::Value *OffloadingArgs[] = {Handle};
9134 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9135 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9136 OMPRTL___tgt_mapper_num_components),
9137 OffloadingArgs);
9138 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9139 PreviousSize,
9140 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9141
9142 // Fill up the runtime mapper handle for all components.
9143 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9144 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9145 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9146 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9147 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9148 llvm::Value *CurSizeArg = Info.Sizes[I];
9149 llvm::Value *CurNameArg =
9150 (CGM.getCodeGenOpts().getDebugInfo() ==
9151 llvm::codegenoptions::NoDebugInfo)
9152 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9153 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9154
9155 // Extract the MEMBER_OF field from the map type.
9156 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9157 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9158 Info.Types[I]));
9159 llvm::Value *MemberMapType =
9160 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9161
9162 // Combine the map type inherited from user-defined mapper with that
9163 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9164 // bits of the \a MapType, which is the input argument of the mapper
9165 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9166 // bits of MemberMapType.
9167 // [OpenMP 5.0], 1.2.6. map-type decay.
9168 // | alloc | to | from | tofrom | release | delete
9169 // ----------------------------------------------------------
9170 // alloc | alloc | alloc | alloc | alloc | release | delete
9171 // to | alloc | to | alloc | to | release | delete
9172 // from | alloc | alloc | from | from | release | delete
9173 // tofrom | alloc | to | from | tofrom | release | delete
9174 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9175 MapType,
9176 MapperCGF.Builder.getInt64(
9177 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9178 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9179 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9180 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9181 llvm::BasicBlock *AllocElseBB =
9182 MapperCGF.createBasicBlock("omp.type.alloc.else");
9183 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9184 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9185 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9186 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9187 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9188 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9189 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9190 MapperCGF.EmitBlock(AllocBB);
9191 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9192 MemberMapType,
9193 MapperCGF.Builder.getInt64(
9194 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9195 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9196 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9197 MapperCGF.Builder.CreateBr(EndBB);
9198 MapperCGF.EmitBlock(AllocElseBB);
9199 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9200 LeftToFrom,
9201 MapperCGF.Builder.getInt64(
9202 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9203 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9204 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9205 // In case of to, clear OMP_MAP_FROM.
9206 MapperCGF.EmitBlock(ToBB);
9207 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9208 MemberMapType,
9209 MapperCGF.Builder.getInt64(
9210 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9211 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9212 MapperCGF.Builder.CreateBr(EndBB);
9213 MapperCGF.EmitBlock(ToElseBB);
9214 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9215 LeftToFrom,
9216 MapperCGF.Builder.getInt64(
9217 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9218 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9219 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9220 // In case of from, clear OMP_MAP_TO.
9221 MapperCGF.EmitBlock(FromBB);
9222 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9223 MemberMapType,
9224 MapperCGF.Builder.getInt64(
9225 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9226 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9227 // In case of tofrom, do nothing.
9228 MapperCGF.EmitBlock(EndBB);
9229 LastBB = EndBB;
9230 llvm::PHINode *CurMapType =
9231 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9232 CurMapType->addIncoming(AllocMapType, AllocBB);
9233 CurMapType->addIncoming(ToMapType, ToBB);
9234 CurMapType->addIncoming(FromMapType, FromBB);
9235 CurMapType->addIncoming(MemberMapType, ToElseBB);
9236
9237 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9238 CurSizeArg, CurMapType, CurNameArg};
9239 if (Info.Mappers[I]) {
9240 // Call the corresponding mapper function.
9241 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9242 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9243 assert(MapperFunc && "Expect a valid mapper function is available.");
9244 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9245 } else {
9246 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9247 // data structure.
9248 MapperCGF.EmitRuntimeCall(
9249 OMPBuilder.getOrCreateRuntimeFunction(
9250 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9251 OffloadingArgs);
9252 }
9253 }
9254
9255 // Update the pointer to point to the next element that needs to be mapped,
9256 // and check whether we have mapped all elements.
9257 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9258 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9259 PtrPHI->addIncoming(PtrNext, LastBB);
9260 llvm::Value *IsDone =
9261 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9262 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9263 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9264
9265 MapperCGF.EmitBlock(ExitBB);
9266 // Emit array deletion if this is an array section and \p MapType indicates
9267 // that deletion is required.
9268 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9269 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9270
9271 // Emit the function exit block.
9272 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9273 MapperCGF.FinishFunction();
9274 UDMMap.try_emplace(D, Fn);
9275 if (CGF) {
9276 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9277 Decls.second.push_back(D);
9278 }
9279}
9280
9281/// Emit the array initialization or deletion portion for user-defined mapper
9282/// code generation. First, it evaluates whether an array section is mapped and
9283/// whether the \a MapType instructs to delete this section. If \a IsInit is
9284/// true, and \a MapType indicates to not delete this array, array
9285/// initialization code is generated. If \a IsInit is false, and \a MapType
9286/// indicates to not this array, array deletion code is generated.
9288 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9289 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9290 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9291 bool IsInit) {
9292 StringRef Prefix = IsInit ? ".init" : ".del";
9293
9294 // Evaluate if this is an array section.
9295 llvm::BasicBlock *BodyBB =
9296 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9297 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9298 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9299 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9300 MapType,
9301 MapperCGF.Builder.getInt64(
9302 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9303 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9304 llvm::Value *DeleteCond;
9305 llvm::Value *Cond;
9306 if (IsInit) {
9307 // base != begin?
9308 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9309 // IsPtrAndObj?
9310 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9311 MapType,
9312 MapperCGF.Builder.getInt64(
9313 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9314 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9315 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9316 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9317 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9318 DeleteCond = MapperCGF.Builder.CreateIsNull(
9319 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9320 } else {
9321 Cond = IsArray;
9322 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9323 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9324 }
9325 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9326 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9327
9328 MapperCGF.EmitBlock(BodyBB);
9329 // Get the array size by multiplying element size and element number (i.e., \p
9330 // Size).
9331 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9332 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9333 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9334 // memory allocation/deletion purpose only.
9335 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9336 MapType,
9337 MapperCGF.Builder.getInt64(
9338 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9339 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9340 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9341 MapTypeArg = MapperCGF.Builder.CreateOr(
9342 MapTypeArg,
9343 MapperCGF.Builder.getInt64(
9344 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9345 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9346
9347 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9348 // data structure.
9349 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9350 ArraySize, MapTypeArg, MapName};
9351 MapperCGF.EmitRuntimeCall(
9352 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9353 OMPRTL___tgt_push_mapper_component),
9354 OffloadingArgs);
9355}
9356
9358 const OMPDeclareMapperDecl *D) {
9359 auto I = UDMMap.find(D);
9360 if (I != UDMMap.end())
9361 return I->second;
9363 return UDMMap.lookup(D);
9364}
9365
9368 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9369 const OMPLoopDirective &D)>
9370 SizeEmitter) {
9372 const OMPExecutableDirective *TD = &D;
9373 // Get nested teams distribute kind directive, if any. For now, treat
9374 // 'target_teams_loop' as if it's really a target_teams_distribute.
9375 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9376 Kind != OMPD_target_teams_loop)
9378 if (!TD)
9379 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9380
9381 const auto *LD = cast<OMPLoopDirective>(TD);
9382 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9383 return NumIterations;
9384 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9385}
9386
9387static void
9388emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9389 const OMPExecutableDirective &D,
9391 bool RequiresOuterTask, const CapturedStmt &CS,
9392 bool OffloadingMandatory, CodeGenFunction &CGF) {
9393 if (OffloadingMandatory) {
9394 CGF.Builder.CreateUnreachable();
9395 } else {
9396 if (RequiresOuterTask) {
9397 CapturedVars.clear();
9398 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9399 }
9400 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9401 CapturedVars);
9402 }
9403}
9404
9405static llvm::Value *emitDeviceID(
9406 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9407 CodeGenFunction &CGF) {
9408 // Emit device ID if any.
9409 llvm::Value *DeviceID;
9410 if (Device.getPointer()) {
9411 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9412 Device.getInt() == OMPC_DEVICE_device_num) &&
9413 "Expected device_num modifier.");
9414 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9415 DeviceID =
9416 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9417 } else {
9418 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9419 }
9420 return DeviceID;
9421}
9422
9424 CodeGenFunction &CGF) {
9425 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9426
9427 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9428 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9429 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9430 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9431 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9432 /*isSigned=*/false);
9433 }
9434 return DynCGroupMem;
9435}
9436
9438 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9439 const OMPExecutableDirective &D,
9440 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9441 const CapturedStmt &CS, bool OffloadingMandatory,
9442 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9443 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9444 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9445 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9446 const OMPLoopDirective &D)>
9447 SizeEmitter,
9448 CodeGenFunction &CGF, CodeGenModule &CGM) {
9449 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9450
9451 // Fill up the arrays with all the captured variables.
9452 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9453
9454 // Get mappable expression information.
9455 MappableExprsHandler MEHandler(D, CGF);
9456 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9458
9459 auto RI = CS.getCapturedRecordDecl()->field_begin();
9460 auto *CV = CapturedVars.begin();
9462 CE = CS.capture_end();
9463 CI != CE; ++CI, ++RI, ++CV) {
9464 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9465 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9466
9467 // VLA sizes are passed to the outlined region by copy and do not have map
9468 // information associated.
9469 if (CI->capturesVariableArrayType()) {
9470 CurInfo.Exprs.push_back(nullptr);
9471 CurInfo.BasePointers.push_back(*CV);
9472 CurInfo.DevicePtrDecls.push_back(nullptr);
9473 CurInfo.DevicePointers.push_back(
9474 MappableExprsHandler::DeviceInfoTy::None);
9475 CurInfo.Pointers.push_back(*CV);
9476 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9477 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9478 // Copy to the device as an argument. No need to retrieve it.
9479 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9480 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9481 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9482 CurInfo.Mappers.push_back(nullptr);
9483 } else {
9484 // If we have any information in the map clause, we use it, otherwise we
9485 // just do a default mapping.
9486 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9487 if (!CI->capturesThis())
9488 MappedVarSet.insert(CI->getCapturedVar());
9489 else
9490 MappedVarSet.insert(nullptr);
9491 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9492 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9493 // Generate correct mapping for variables captured by reference in
9494 // lambdas.
9495 if (CI->capturesVariable())
9496 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9497 CurInfo, LambdaPointers);
9498 }
9499 // We expect to have at least an element of information for this capture.
9500 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9501 "Non-existing map pointer for capture!");
9502 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9503 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9504 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9505 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9506 "Inconsistent map information sizes!");
9507
9508 // If there is an entry in PartialStruct it means we have a struct with
9509 // individual members mapped. Emit an extra combined entry.
9510 if (PartialStruct.Base.isValid()) {
9511 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9512 MEHandler.emitCombinedEntry(
9513 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9514 OMPBuilder, nullptr,
9515 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9516 }
9517
9518 // We need to append the results of this capture to what we already have.
9519 CombinedInfo.append(CurInfo);
9520 }
9521 // Adjust MEMBER_OF flags for the lambdas captures.
9522 MEHandler.adjustMemberOfForLambdaCaptures(
9523 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9524 CombinedInfo.Pointers, CombinedInfo.Types);
9525 // Map any list items in a map clause that were not captures because they
9526 // weren't referenced within the construct.
9527 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9528
9530 // Fill up the arrays and create the arguments.
9531 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9532 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9533 llvm::codegenoptions::NoDebugInfo;
9534 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9535 EmitDebug,
9536 /*ForEndCall=*/false);
9537
9538 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9539 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9540 CGF.VoidPtrTy, CGM.getPointerAlign());
9541 InputInfo.PointersArray =
9542 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9543 InputInfo.SizesArray =
9544 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9545 InputInfo.MappersArray =
9546 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9547 MapTypesArray = Info.RTArgs.MapTypesArray;
9548 MapNamesArray = Info.RTArgs.MapNamesArray;
9549
9550 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9551 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9552 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9553 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9554 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9555
9556 if (IsReverseOffloading) {
9557 // Reverse offloading is not supported, so just execute on the host.
9558 // FIXME: This fallback solution is incorrect since it ignores the
9559 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9560 // assert here and ensure SEMA emits an error.
9561 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9562 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9563 return;
9564 }
9565
9566 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9567 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9568
9569 llvm::Value *BasePointersArray =
9570 InputInfo.BasePointersArray.emitRawPointer(CGF);
9571 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9572 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9573 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9574
9575 auto &&EmitTargetCallFallbackCB =
9576 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9577 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9578 -> llvm::OpenMPIRBuilder::InsertPointTy {
9579 CGF.Builder.restoreIP(IP);
9580 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9581 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9582 return CGF.Builder.saveIP();
9583 };
9584
9585 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9586 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9587 llvm::Value *NumThreads =
9588 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9589 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9590 llvm::Value *NumIterations =
9591 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9592 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9593 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9594 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9595
9596 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9597 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9598 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9599
9600 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9601 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9602 DynCGGroupMem, HasNoWait);
9603
9604 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9605 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9606 DeviceID, RTLoc, AllocaIP));
9607 };
9608
9609 if (RequiresOuterTask)
9610 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9611 else
9612 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9613}
9614
9615static void
9616emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9617 const OMPExecutableDirective &D,
9619 bool RequiresOuterTask, const CapturedStmt &CS,
9620 bool OffloadingMandatory, CodeGenFunction &CGF) {
9621
9622 // Notify that the host version must be executed.
9623 auto &&ElseGen =
9624 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9625 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9626 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9627 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9628 };
9629
9630 if (RequiresOuterTask) {
9631 CodeGenFunction::OMPTargetDataInfo InputInfo;
9632 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9633 } else {
9634 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9635 }
9636}
9637
9640 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9641 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9642 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9643 const OMPLoopDirective &D)>
9644 SizeEmitter) {
9645 if (!CGF.HaveInsertPoint())
9646 return;
9647
9648 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9649 CGM.getLangOpts().OpenMPOffloadMandatory;
9650
9651 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9652
9653 const bool RequiresOuterTask =
9657 (CGM.getLangOpts().OpenMP >= 51 &&
9661 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9662 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9663 PrePostActionTy &) {
9664 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9665 };
9666 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9667
9669 llvm::Value *MapTypesArray = nullptr;
9670 llvm::Value *MapNamesArray = nullptr;
9671
9672 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9673 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9674 OutlinedFnID, &InputInfo, &MapTypesArray,
9675 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9676 PrePostActionTy &) {
9677 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9678 RequiresOuterTask, CS, OffloadingMandatory,
9679 Device, OutlinedFnID, InputInfo, MapTypesArray,
9680 MapNamesArray, SizeEmitter, CGF, CGM);
9681 };
9682
9683 auto &&TargetElseGen =
9684 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9685 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9686 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9687 CS, OffloadingMandatory, CGF);
9688 };
9689
9690 // If we have a target function ID it means that we need to support
9691 // offloading, otherwise, just execute on the host. We need to execute on host
9692 // regardless of the conditional in the if clause if, e.g., the user do not
9693 // specify target triples.
9694 if (OutlinedFnID) {
9695 if (IfCond) {
9696 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9697 } else {
9698 RegionCodeGenTy ThenRCG(TargetThenGen);
9699 ThenRCG(CGF);
9700 }
9701 } else {
9702 RegionCodeGenTy ElseRCG(TargetElseGen);
9703 ElseRCG(CGF);
9704 }
9705}
9706
9708 StringRef ParentName) {
9709 if (!S)
9710 return;
9711
9712 // Codegen OMP target directives that offload compute to the device.
9713 bool RequiresDeviceCodegen =
9714 isa<OMPExecutableDirective>(S) &&
9716 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9717
9718 if (RequiresDeviceCodegen) {
9719 const auto &E = *cast<OMPExecutableDirective>(S);
9720
9721 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9722 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9723
9724 // Is this a target region that should not be emitted as an entry point? If
9725 // so just signal we are done with this target region.
9726 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9727 return;
9728
9729 switch (E.getDirectiveKind()) {
9730 case OMPD_target:
9732 cast<OMPTargetDirective>(E));
9733 break;
9734 case OMPD_target_parallel:
9736 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9737 break;
9738 case OMPD_target_teams:
9740 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9741 break;
9742 case OMPD_target_teams_distribute:
9744 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9745 break;
9746 case OMPD_target_teams_distribute_simd:
9748 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9749 break;
9750 case OMPD_target_parallel_for:
9752 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9753 break;
9754 case OMPD_target_parallel_for_simd:
9756 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9757 break;
9758 case OMPD_target_simd:
9760 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9761 break;
9762 case OMPD_target_teams_distribute_parallel_for:
9764 CGM, ParentName,
9765 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9766 break;
9767 case OMPD_target_teams_distribute_parallel_for_simd:
9770 CGM, ParentName,
9771 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9772 break;
9773 case OMPD_target_teams_loop:
9775 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9776 break;
9777 case OMPD_target_parallel_loop:
9779 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9780 break;
9781 case OMPD_parallel:
9782 case OMPD_for:
9783 case OMPD_parallel_for:
9784 case OMPD_parallel_master:
9785 case OMPD_parallel_sections:
9786 case OMPD_for_simd:
9787 case OMPD_parallel_for_simd:
9788 case OMPD_cancel:
9789 case OMPD_cancellation_point:
9790 case OMPD_ordered:
9791 case OMPD_threadprivate:
9792 case OMPD_allocate:
9793 case OMPD_task:
9794 case OMPD_simd:
9795 case OMPD_tile:
9796 case OMPD_unroll:
9797 case OMPD_sections:
9798 case OMPD_section:
9799 case OMPD_single:
9800 case OMPD_master:
9801 case OMPD_critical:
9802 case OMPD_taskyield:
9803 case OMPD_barrier:
9804 case OMPD_taskwait:
9805 case OMPD_taskgroup:
9806 case OMPD_atomic:
9807 case OMPD_flush:
9808 case OMPD_depobj:
9809 case OMPD_scan:
9810 case OMPD_teams:
9811 case OMPD_target_data:
9812 case OMPD_target_exit_data:
9813 case OMPD_target_enter_data:
9814 case OMPD_distribute:
9815 case OMPD_distribute_simd:
9816 case OMPD_distribute_parallel_for:
9817 case OMPD_distribute_parallel_for_simd:
9818 case OMPD_teams_distribute:
9819 case OMPD_teams_distribute_simd:
9820 case OMPD_teams_distribute_parallel_for:
9821 case OMPD_teams_distribute_parallel_for_simd:
9822 case OMPD_target_update:
9823 case OMPD_declare_simd:
9824 case OMPD_declare_variant:
9825 case OMPD_begin_declare_variant:
9826 case OMPD_end_declare_variant:
9827 case OMPD_declare_target:
9828 case OMPD_end_declare_target:
9829 case OMPD_declare_reduction:
9830 case OMPD_declare_mapper:
9831 case OMPD_taskloop:
9832 case OMPD_taskloop_simd:
9833 case OMPD_master_taskloop:
9834 case OMPD_master_taskloop_simd:
9835 case OMPD_parallel_master_taskloop:
9836 case OMPD_parallel_master_taskloop_simd:
9837 case OMPD_requires:
9838 case OMPD_metadirective:
9839 case OMPD_unknown:
9840 default:
9841 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9842 }
9843 return;
9844 }
9845
9846 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9847 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9848 return;
9849
9850 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9851 return;
9852 }
9853
9854 // If this is a lambda function, look into its body.
9855 if (const auto *L = dyn_cast<LambdaExpr>(S))
9856 S = L->getBody();
9857
9858 // Keep looking for target regions recursively.
9859 for (const Stmt *II : S->children())
9860 scanForTargetRegionsFunctions(II, ParentName);
9861}
9862
9863static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9864 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9865 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9866 if (!DevTy)
9867 return false;
9868 // Do not emit device_type(nohost) functions for the host.
9869 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9870 return true;
9871 // Do not emit device_type(host) functions for the device.
9872 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9873 return true;
9874 return false;
9875}
9876
9878 // If emitting code for the host, we do not process FD here. Instead we do
9879 // the normal code generation.
9880 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9881 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9882 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9883 CGM.getLangOpts().OpenMPIsTargetDevice))
9884 return true;
9885 return false;
9886 }
9887
9888 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9889 // Try to detect target regions in the function.
9890 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9891 StringRef Name = CGM.getMangledName(GD);
9892 scanForTargetRegionsFunctions(FD->getBody(), Name);
9893 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9894 CGM.getLangOpts().OpenMPIsTargetDevice))
9895 return true;
9896 }
9897
9898 // Do not to emit function if it is not marked as declare target.
9899 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9900 AlreadyEmittedTargetDecls.count(VD) == 0;
9901}
9902
9904 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9905 CGM.getLangOpts().OpenMPIsTargetDevice))
9906 return true;
9907
9908 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9909 return false;
9910
9911 // Check if there are Ctors/Dtors in this declaration and look for target
9912 // regions in it. We use the complete variant to produce the kernel name
9913 // mangling.
9914 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9915 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9916 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9917 StringRef ParentName =
9919 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9920 }
9921 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9922 StringRef ParentName =
9924 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9925 }
9926 }
9927
9928 // Do not to emit variable if it is not marked as declare target.
9929 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9930 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9931 cast<VarDecl>(GD.getDecl()));
9932 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9933 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9934 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9936 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9937 return true;
9938 }
9939 return false;
9940}
9941
9943 llvm::Constant *Addr) {
9944 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9945 !CGM.getLangOpts().OpenMPIsTargetDevice)
9946 return;
9947
9948 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9949 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9950
9951 // If this is an 'extern' declaration we defer to the canonical definition and
9952 // do not emit an offloading entry.
9953 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9954 VD->hasExternalStorage())
9955 return;
9956
9957 if (!Res) {
9958 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9959 // Register non-target variables being emitted in device code (debug info
9960 // may cause this).
9961 StringRef VarName = CGM.getMangledName(VD);
9962 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9963 }
9964 return;
9965 }
9966
9967 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9968 auto LinkageForVariable = [&VD, this]() {
9970 };
9971
9972 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9973 OMPBuilder.registerTargetGlobalVariable(
9976 VD->isExternallyVisible(),
9978 VD->getCanonicalDecl()->getBeginLoc()),
9979 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9980 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9983 Addr);
9984
9985 for (auto *ref : GeneratedRefs)
9987}
9988
9990 if (isa<FunctionDecl>(GD.getDecl()) ||
9991 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9992 return emitTargetFunctions(GD);
9993
9994 return emitTargetGlobalVariable(GD);
9995}
9996
9998 for (const VarDecl *VD : DeferredGlobalVariables) {
9999 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10001 if (!Res)
10002 continue;
10003 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10004 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10006 CGM.EmitGlobal(VD);
10007 } else {
10008 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10009 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10010 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10012 "Expected link clause or to clause with unified memory.");
10014 }
10015 }
10016}
10017
10019 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10021 " Expected target-based directive.");
10022}
10023
10025 for (const OMPClause *Clause : D->clauselists()) {
10026 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10028 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10029 } else if (const auto *AC =
10030 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10031 switch (AC->getAtomicDefaultMemOrderKind()) {
10032 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10033 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10034 break;
10035 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10036 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10037 break;
10038 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10039 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10040 break;
10042 break;
10043 }
10044 }
10045 }
10046}
10047
10048llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10050}
10051
10053 LangAS &AS) {
10054 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10055 return false;
10056 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10057 switch(A->getAllocatorType()) {
10058 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10059 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10060 // Not supported, fallback to the default mem space.
10061 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10062 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10063 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10064 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10065 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10066 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10067 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10068 AS = LangAS::Default;
10069 return true;
10070 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10071 llvm_unreachable("Expected predefined allocator for the variables with the "
10072 "static storage.");
10073 }
10074 return false;
10075}
10076
10079}
10080
10082 CodeGenModule &CGM)
10083 : CGM(CGM) {
10084 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10085 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10087 }
10088}
10089
10091 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10092 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10093}
10094
10096 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10097 return true;
10098
10099 const auto *D = cast<FunctionDecl>(GD.getDecl());
10100 // Do not to emit function if it is marked as declare target as it was already
10101 // emitted.
10102 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10103 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10104 if (auto *F = dyn_cast_or_null<llvm::Function>(
10106 return !F->isDeclaration();
10107 return false;
10108 }
10109 return true;
10110 }
10111
10112 return !AlreadyEmittedTargetDecls.insert(D).second;
10113}
10114
10116 const OMPExecutableDirective &D,
10117 SourceLocation Loc,
10118 llvm::Function *OutlinedFn,
10119 ArrayRef<llvm::Value *> CapturedVars) {
10120 if (!CGF.HaveInsertPoint())
10121 return;
10122
10123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10125
10126 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10127 llvm::Value *Args[] = {
10128 RTLoc,
10129 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10130 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10132 RealArgs.append(std::begin(Args), std::end(Args));
10133 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10134
10135 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10136 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10137 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10138}
10139
10141 const Expr *NumTeams,
10142 const Expr *ThreadLimit,
10143 SourceLocation Loc) {
10144 if (!CGF.HaveInsertPoint())
10145 return;
10146
10147 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10148
10149 llvm::Value *NumTeamsVal =
10150 NumTeams
10151 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10152 CGF.CGM.Int32Ty, /* isSigned = */ true)
10153 : CGF.Builder.getInt32(0);
10154
10155 llvm::Value *ThreadLimitVal =
10156 ThreadLimit
10157 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10158 CGF.CGM.Int32Ty, /* isSigned = */ true)
10159 : CGF.Builder.getInt32(0);
10160
10161 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10162 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10163 ThreadLimitVal};
10164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10165 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10166 PushNumTeamsArgs);
10167}
10168
10170 const Expr *ThreadLimit,
10171 SourceLocation Loc) {
10172 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10173 llvm::Value *ThreadLimitVal =
10174 ThreadLimit
10175 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10176 CGF.CGM.Int32Ty, /* isSigned = */ true)
10177 : CGF.Builder.getInt32(0);
10178
10179 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10180 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10181 ThreadLimitVal};
10182 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10183 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10184 ThreadLimitArgs);
10185}
10186
10188 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10189 const Expr *Device, const RegionCodeGenTy &CodeGen,
10191 if (!CGF.HaveInsertPoint())
10192 return;
10193
10194 // Action used to replace the default codegen action and turn privatization
10195 // off.
10196 PrePostActionTy NoPrivAction;
10197
10198 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10199
10200 llvm::Value *IfCondVal = nullptr;
10201 if (IfCond)
10202 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10203
10204 // Emit device ID if any.
10205 llvm::Value *DeviceID = nullptr;
10206 if (Device) {
10207 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10208 CGF.Int64Ty, /*isSigned=*/true);
10209 } else {
10210 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10211 }
10212
10213 // Fill up the arrays with all the mapped variables.
10214 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10215 auto GenMapInfoCB =
10216 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10217 CGF.Builder.restoreIP(CodeGenIP);
10218 // Get map clause information.
10219 MappableExprsHandler MEHandler(D, CGF);
10220 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10221
10222 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10223 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10224 };
10225 if (CGM.getCodeGenOpts().getDebugInfo() !=
10226 llvm::codegenoptions::NoDebugInfo) {
10227 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10228 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10229 FillInfoMap);
10230 }
10231
10232 return CombinedInfo;
10233 };
10234 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10235 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10236 CGF.Builder.restoreIP(CodeGenIP);
10237 switch (BodyGenType) {
10238 case BodyGenTy::Priv:
10239 if (!Info.CaptureDeviceAddrMap.empty())
10240 CodeGen(CGF);
10241 break;
10242 case BodyGenTy::DupNoPriv:
10243 if (!Info.CaptureDeviceAddrMap.empty()) {
10244 CodeGen.setAction(NoPrivAction);
10245 CodeGen(CGF);
10246 }
10247 break;
10248 case BodyGenTy::NoPriv:
10249 if (Info.CaptureDeviceAddrMap.empty()) {
10250 CodeGen.setAction(NoPrivAction);
10251 CodeGen(CGF);
10252 }
10253 break;
10254 }
10255 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10256 CGF.Builder.GetInsertPoint());
10257 };
10258
10259 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10260 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10261 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10262 }
10263 };
10264
10265 auto CustomMapperCB = [&](unsigned int I) {
10266 llvm::Value *MFunc = nullptr;
10267 if (CombinedInfo.Mappers[I]) {
10268 Info.HasMapper = true;
10270 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10271 }
10272 return MFunc;
10273 };
10274
10275 // Source location for the ident struct
10276 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10277
10278 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10279 CGF.AllocaInsertPt->getIterator());
10280 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10281 CGF.Builder.GetInsertPoint());
10282 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10283 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10284 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10285 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10286}
10287
10289 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10290 const Expr *Device) {
10291 if (!CGF.HaveInsertPoint())
10292 return;
10293
10294 assert((isa<OMPTargetEnterDataDirective>(D) ||
10295 isa<OMPTargetExitDataDirective>(D) ||
10296 isa<OMPTargetUpdateDirective>(D)) &&
10297 "Expecting either target enter, exit data, or update directives.");
10298
10300 llvm::Value *MapTypesArray = nullptr;
10301 llvm::Value *MapNamesArray = nullptr;
10302 // Generate the code for the opening of the data environment.
10303 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10304 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10305 // Emit device ID if any.
10306 llvm::Value *DeviceID = nullptr;
10307 if (Device) {
10308 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10309 CGF.Int64Ty, /*isSigned=*/true);
10310 } else {
10311 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10312 }
10313
10314 // Emit the number of elements in the offloading arrays.
10315 llvm::Constant *PointerNum =
10316 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10317
10318 // Source location for the ident struct
10319 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10320
10321 llvm::Value *OffloadingArgs[] = {
10322 RTLoc,
10323 DeviceID,
10324 PointerNum,
10325 InputInfo.BasePointersArray.emitRawPointer(CGF),
10326 InputInfo.PointersArray.emitRawPointer(CGF),
10327 InputInfo.SizesArray.emitRawPointer(CGF),
10328 MapTypesArray,
10329 MapNamesArray,
10330 InputInfo.MappersArray.emitRawPointer(CGF)};
10331
10332 // Select the right runtime function call for each standalone
10333 // directive.
10334 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10335 RuntimeFunction RTLFn;
10336 switch (D.getDirectiveKind()) {
10337 case OMPD_target_enter_data:
10338 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10339 : OMPRTL___tgt_target_data_begin_mapper;
10340 break;
10341 case OMPD_target_exit_data:
10342 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10343 : OMPRTL___tgt_target_data_end_mapper;
10344 break;
10345 case OMPD_target_update:
10346 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10347 : OMPRTL___tgt_target_data_update_mapper;
10348 break;
10349 case OMPD_parallel:
10350 case OMPD_for:
10351 case OMPD_parallel_for:
10352 case OMPD_parallel_master:
10353 case OMPD_parallel_sections:
10354 case OMPD_for_simd:
10355 case OMPD_parallel_for_simd:
10356 case OMPD_cancel:
10357 case OMPD_cancellation_point:
10358 case OMPD_ordered:
10359 case OMPD_threadprivate:
10360 case OMPD_allocate:
10361 case OMPD_task:
10362 case OMPD_simd:
10363 case OMPD_tile:
10364 case OMPD_unroll:
10365 case OMPD_sections:
10366 case OMPD_section:
10367 case OMPD_single:
10368 case OMPD_master:
10369 case OMPD_critical:
10370 case OMPD_taskyield:
10371 case OMPD_barrier:
10372 case OMPD_taskwait:
10373 case OMPD_taskgroup:
10374 case OMPD_atomic:
10375 case OMPD_flush:
10376 case OMPD_depobj:
10377 case OMPD_scan:
10378 case OMPD_teams:
10379 case OMPD_target_data:
10380 case OMPD_distribute:
10381 case OMPD_distribute_simd:
10382 case OMPD_distribute_parallel_for:
10383 case OMPD_distribute_parallel_for_simd:
10384 case OMPD_teams_distribute:
10385 case OMPD_teams_distribute_simd:
10386 case OMPD_teams_distribute_parallel_for:
10387 case OMPD_teams_distribute_parallel_for_simd:
10388 case OMPD_declare_simd:
10389 case OMPD_declare_variant:
10390 case OMPD_begin_declare_variant:
10391 case OMPD_end_declare_variant:
10392 case OMPD_declare_target:
10393 case OMPD_end_declare_target:
10394 case OMPD_declare_reduction:
10395 case OMPD_declare_mapper:
10396 case OMPD_taskloop:
10397 case OMPD_taskloop_simd:
10398 case OMPD_master_taskloop:
10399 case OMPD_master_taskloop_simd:
10400 case OMPD_parallel_master_taskloop:
10401 case OMPD_parallel_master_taskloop_simd:
10402 case OMPD_target:
10403 case OMPD_target_simd:
10404 case OMPD_target_teams_distribute:
10405 case OMPD_target_teams_distribute_simd:
10406 case OMPD_target_teams_distribute_parallel_for:
10407 case OMPD_target_teams_distribute_parallel_for_simd:
10408 case OMPD_target_teams:
10409 case OMPD_target_parallel:
10410 case OMPD_target_parallel_for:
10411 case OMPD_target_parallel_for_simd:
10412 case OMPD_requires:
10413 case OMPD_metadirective:
10414 case OMPD_unknown:
10415 default:
10416 llvm_unreachable("Unexpected standalone target data directive.");
10417 break;
10418 }
10419 CGF.EmitRuntimeCall(
10420 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10421 OffloadingArgs);
10422 };
10423
10424 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10425 &MapNamesArray](CodeGenFunction &CGF,
10426 PrePostActionTy &) {
10427 // Fill up the arrays with all the mapped variables.
10428 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10429
10430 // Get map clause information.
10431 MappableExprsHandler MEHandler(D, CGF);
10432 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10433
10435 // Fill up the arrays and create the arguments.
10436 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10437 /*IsNonContiguous=*/true);
10438 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10440 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10441 llvm::codegenoptions::NoDebugInfo;
10442 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10443 EmitDebug,
10444 /*ForEndCall=*/false);
10445 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10446 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10448 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10450 InputInfo.SizesArray =
10451 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10452 InputInfo.MappersArray =
10453 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10454 MapTypesArray = Info.RTArgs.MapTypesArray;
10455 MapNamesArray = Info.RTArgs.MapNamesArray;
10456 if (RequiresOuterTask)
10457 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10458 else
10459 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10460 };
10461
10462 if (IfCond) {
10463 emitIfClause(CGF, IfCond, TargetThenGen,
10464 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10465 } else {
10466 RegionCodeGenTy ThenRCG(TargetThenGen);
10467 ThenRCG(CGF);
10468 }
10469}
10470
10471namespace {
10472 /// Kind of parameter in a function with 'declare simd' directive.
10473enum ParamKindTy {
10474 Linear,
10475 LinearRef,
10476 LinearUVal,
10477 LinearVal,
10478 Uniform,
10479 Vector,
10480};
10481/// Attribute set of the parameter.
10482struct ParamAttrTy {
10483 ParamKindTy Kind = Vector;
10484 llvm::APSInt StrideOrArg;
10485 llvm::APSInt Alignment;
10486 bool HasVarStride = false;
10487};
10488} // namespace
10489
10490static unsigned evaluateCDTSize(const FunctionDecl *FD,
10491 ArrayRef<ParamAttrTy> ParamAttrs) {
10492 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10493 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10494 // of that clause. The VLEN value must be power of 2.
10495 // In other case the notion of the function`s "characteristic data type" (CDT)
10496 // is used to compute the vector length.
10497 // CDT is defined in the following order:
10498 // a) For non-void function, the CDT is the return type.
10499 // b) If the function has any non-uniform, non-linear parameters, then the
10500 // CDT is the type of the first such parameter.
10501 // c) If the CDT determined by a) or b) above is struct, union, or class
10502 // type which is pass-by-value (except for the type that maps to the
10503 // built-in complex data type), the characteristic data type is int.
10504 // d) If none of the above three cases is applicable, the CDT is int.
10505 // The VLEN is then determined based on the CDT and the size of vector
10506 // register of that ISA for which current vector version is generated. The
10507 // VLEN is computed using the formula below:
10508 // VLEN = sizeof(vector_register) / sizeof(CDT),
10509 // where vector register size specified in section 3.2.1 Registers and the
10510 // Stack Frame of original AMD64 ABI document.
10511 QualType RetType = FD->getReturnType();
10512 if (RetType.isNull())
10513 return 0;
10514 ASTContext &C = FD->getASTContext();
10515 QualType CDT;
10516 if (!RetType.isNull() && !RetType->isVoidType()) {
10517 CDT = RetType;
10518 } else {
10519 unsigned Offset = 0;
10520 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10521 if (ParamAttrs[Offset].Kind == Vector)
10522 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10523 ++Offset;
10524 }
10525 if (CDT.isNull()) {
10526 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10527 if (ParamAttrs[I + Offset].Kind == Vector) {
10528 CDT = FD->getParamDecl(I)->getType();
10529 break;
10530 }
10531 }
10532 }
10533 }
10534 if (CDT.isNull())
10535 CDT = C.IntTy;
10536 CDT = CDT->getCanonicalTypeUnqualified();
10537 if (CDT->isRecordType() || CDT->isUnionType())
10538 CDT = C.IntTy;
10539 return C.getTypeSize(CDT);
10540}
10541
10542/// Mangle the parameter part of the vector function name according to
10543/// their OpenMP classification. The mangling function is defined in
10544/// section 4.5 of the AAVFABI(2021Q1).
10545static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10546 SmallString<256> Buffer;
10547 llvm::raw_svector_ostream Out(Buffer);
10548 for (const auto &ParamAttr : ParamAttrs) {
10549 switch (ParamAttr.Kind) {
10550 case Linear:
10551 Out << 'l';
10552 break;
10553 case LinearRef:
10554 Out << 'R';
10555 break;
10556 case LinearUVal:
10557 Out << 'U';
10558 break;
10559 case LinearVal:
10560 Out << 'L';
10561 break;
10562 case Uniform:
10563 Out << 'u';
10564 break;
10565 case Vector:
10566 Out << 'v';
10567 break;
10568 }
10569 if (ParamAttr.HasVarStride)
10570 Out << "s" << ParamAttr.StrideOrArg;
10571 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10572 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10573 // Don't print the step value if it is not present or if it is
10574 // equal to 1.
10575 if (ParamAttr.StrideOrArg < 0)
10576 Out << 'n' << -ParamAttr.StrideOrArg;
10577 else if (ParamAttr.StrideOrArg != 1)
10578 Out << ParamAttr.StrideOrArg;
10579 }
10580
10581 if (!!ParamAttr.Alignment)
10582 Out << 'a' << ParamAttr.Alignment;
10583 }
10584
10585 return std::string(Out.str());
10586}
10587
10588static void
10589emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10590 const llvm::APSInt &VLENVal,
10591 ArrayRef<ParamAttrTy> ParamAttrs,
10592 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10593 struct ISADataTy {
10594 char ISA;
10595 unsigned VecRegSize;
10596 };
10597 ISADataTy ISAData[] = {
10598 {
10599 'b', 128
10600 }, // SSE
10601 {
10602 'c', 256
10603 }, // AVX
10604 {
10605 'd', 256
10606 }, // AVX2
10607 {
10608 'e', 512
10609 }, // AVX512
10610 };
10612 switch (State) {
10613 case OMPDeclareSimdDeclAttr::BS_Undefined:
10614 Masked.push_back('N');
10615 Masked.push_back('M');
10616 break;
10617 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10618 Masked.push_back('N');
10619 break;
10620 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10621 Masked.push_back('M');
10622 break;
10623 }
10624 for (char Mask : Masked) {
10625 for (const ISADataTy &Data : ISAData) {
10626 SmallString<256> Buffer;
10627 llvm::raw_svector_ostream Out(Buffer);
10628 Out << "_ZGV" << Data.ISA << Mask;
10629 if (!VLENVal) {
10630 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10631 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10632 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10633 } else {
10634 Out << VLENVal;
10635 }
10636 Out << mangleVectorParameters(ParamAttrs);
10637 Out << '_' << Fn->getName();
10638 Fn->addFnAttr(Out.str());
10639 }
10640 }
10641}
10642
10643// This are the Functions that are needed to mangle the name of the
10644// vector functions generated by the compiler, according to the rules
10645// defined in the "Vector Function ABI specifications for AArch64",
10646// available at
10647// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10648
10649/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10650static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10651 QT = QT.getCanonicalType();
10652
10653 if (QT->isVoidType())
10654 return false;
10655
10656 if (Kind == ParamKindTy::Uniform)
10657 return false;
10658
10659 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10660 return false;
10661
10662 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10663 !QT->isReferenceType())
10664 return false;
10665
10666 return true;
10667}
10668
10669/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10671 QT = QT.getCanonicalType();
10672 unsigned Size = C.getTypeSize(QT);
10673
10674 // Only scalars and complex within 16 bytes wide set PVB to true.
10675 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10676 return false;
10677
10678 if (QT->isFloatingType())
10679 return true;
10680
10681 if (QT->isIntegerType())
10682 return true;
10683
10684 if (QT->isPointerType())
10685 return true;
10686
10687 // TODO: Add support for complex types (section 3.1.2, item 2).
10688
10689 return false;
10690}
10691
10692/// Computes the lane size (LS) of a return type or of an input parameter,
10693/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10694/// TODO: Add support for references, section 3.2.1, item 1.
10695static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10696 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10698 if (getAArch64PBV(PTy, C))
10699 return C.getTypeSize(PTy);
10700 }
10701 if (getAArch64PBV(QT, C))
10702 return C.getTypeSize(QT);
10703
10704 return C.getTypeSize(C.getUIntPtrType());
10705}
10706
10707// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10708// signature of the scalar function, as defined in 3.2.2 of the
10709// AAVFABI.
10710static std::tuple<unsigned, unsigned, bool>
10712 QualType RetType = FD->getReturnType().getCanonicalType();
10713
10714 ASTContext &C = FD->getASTContext();
10715
10716 bool OutputBecomesInput = false;
10717
10719 if (!RetType->isVoidType()) {
10720 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10721 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10722 OutputBecomesInput = true;
10723 }
10724 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10726 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10727 }
10728
10729 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10730 // The LS of a function parameter / return value can only be a power
10731 // of 2, starting from 8 bits, up to 128.
10732 assert(llvm::all_of(Sizes,
10733 [](unsigned Size) {
10734 return Size == 8 || Size == 16 || Size == 32 ||
10735 Size == 64 || Size == 128;
10736 }) &&
10737 "Invalid size");
10738
10739 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10740 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10741 OutputBecomesInput);
10742}
10743
10744// Function used to add the attribute. The parameter `VLEN` is
10745// templated to allow the use of "x" when targeting scalable functions
10746// for SVE.
10747template <typename T>
10748static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10749 char ISA, StringRef ParSeq,
10750 StringRef MangledName, bool OutputBecomesInput,
10751 llvm::Function *Fn) {
10752 SmallString<256> Buffer;
10753 llvm::raw_svector_ostream Out(Buffer);
10754 Out << Prefix << ISA << LMask << VLEN;
10755 if (OutputBecomesInput)
10756 Out << "v";
10757 Out << ParSeq << "_" << MangledName;
10758 Fn->addFnAttr(Out.str());
10759}
10760
10761// Helper function to generate the Advanced SIMD names depending on
10762// the value of the NDS when simdlen is not present.
10763static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10764 StringRef Prefix, char ISA,
10765 StringRef ParSeq, StringRef MangledName,
10766 bool OutputBecomesInput,
10767 llvm::Function *Fn) {
10768 switch (NDS) {
10769 case 8:
10770 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10771 OutputBecomesInput, Fn);
10772 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10773 OutputBecomesInput, Fn);
10774 break;
10775 case 16:
10776 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10777 OutputBecomesInput, Fn);
10778 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10779 OutputBecomesInput, Fn);
10780 break;
10781 case 32:
10782 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10783 OutputBecomesInput, Fn);
10784 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10785 OutputBecomesInput, Fn);
10786 break;
10787 case 64:
10788 case 128:
10789 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10790 OutputBecomesInput, Fn);
10791 break;
10792 default:
10793 llvm_unreachable("Scalar type is too wide.");
10794 }
10795}
10796
10797/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10799 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10800 ArrayRef<ParamAttrTy> ParamAttrs,
10801 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10802 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10803
10804 // Get basic data for building the vector signature.
10805 const auto Data = getNDSWDS(FD, ParamAttrs);
10806 const unsigned NDS = std::get<0>(Data);
10807 const unsigned WDS = std::get<1>(Data);
10808 const bool OutputBecomesInput = std::get<2>(Data);
10809
10810 // Check the values provided via `simdlen` by the user.
10811 // 1. A `simdlen(1)` doesn't produce vector signatures,
10812 if (UserVLEN == 1) {
10813 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10815 "The clause simdlen(1) has no effect when targeting aarch64.");
10816 CGM.getDiags().Report(SLoc, DiagID);
10817 return;
10818 }
10819
10820 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10821 // Advanced SIMD output.
10822 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10823 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10824 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10825 "power of 2 when targeting Advanced SIMD.");
10826 CGM.getDiags().Report(SLoc, DiagID);
10827 return;
10828 }
10829
10830 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10831 // limits.
10832 if (ISA == 's' && UserVLEN != 0) {
10833 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10834 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10835 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10836 "lanes in the architectural constraints "
10837 "for SVE (min is 128-bit, max is "
10838 "2048-bit, by steps of 128-bit)");
10839 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10840 return;
10841 }
10842 }
10843
10844 // Sort out parameter sequence.
10845 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10846 StringRef Prefix = "_ZGV";
10847 // Generate simdlen from user input (if any).
10848 if (UserVLEN) {
10849 if (ISA == 's') {
10850 // SVE generates only a masked function.
10851 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10852 OutputBecomesInput, Fn);
10853 } else {
10854 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10855 // Advanced SIMD generates one or two functions, depending on
10856 // the `[not]inbranch` clause.
10857 switch (State) {
10858 case OMPDeclareSimdDeclAttr::BS_Undefined:
10859 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10860 OutputBecomesInput, Fn);
10861 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10862 OutputBecomesInput, Fn);
10863 break;
10864 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10865 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10866 OutputBecomesInput, Fn);
10867 break;
10868 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10869 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10870 OutputBecomesInput, Fn);
10871 break;
10872 }
10873 }
10874 } else {
10875 // If no user simdlen is provided, follow the AAVFABI rules for
10876 // generating the vector length.
10877 if (ISA == 's') {
10878 // SVE, section 3.4.1, item 1.
10879 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10880 OutputBecomesInput, Fn);
10881 } else {
10882 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10883 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10884 // two vector names depending on the use of the clause
10885 // `[not]inbranch`.
10886 switch (State) {
10887 case OMPDeclareSimdDeclAttr::BS_Undefined:
10888 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10889 OutputBecomesInput, Fn);
10890 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10891 OutputBecomesInput, Fn);
10892 break;
10893 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10894 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10895 OutputBecomesInput, Fn);
10896 break;
10897 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10898 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10899 OutputBecomesInput, Fn);
10900 break;
10901 }
10902 }
10903 }
10904}
10905
10907 llvm::Function *Fn) {
10909 FD = FD->getMostRecentDecl();
10910 while (FD) {
10911 // Map params to their positions in function decl.
10912 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10913 if (isa<CXXMethodDecl>(FD))
10914 ParamPositions.try_emplace(FD, 0);
10915 unsigned ParamPos = ParamPositions.size();
10916 for (const ParmVarDecl *P : FD->parameters()) {
10917 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10918 ++ParamPos;
10919 }
10920 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10921 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10922 // Mark uniform parameters.
10923 for (const Expr *E : Attr->uniforms()) {
10924 E = E->IgnoreParenImpCasts();
10925 unsigned Pos;
10926 if (isa<CXXThisExpr>(E)) {
10927 Pos = ParamPositions[FD];
10928 } else {
10929 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10930 ->getCanonicalDecl();
10931 auto It = ParamPositions.find(PVD);
10932 assert(It != ParamPositions.end() && "Function parameter not found");
10933 Pos = It->second;
10934 }
10935 ParamAttrs[Pos].Kind = Uniform;
10936 }
10937 // Get alignment info.
10938 auto *NI = Attr->alignments_begin();
10939 for (const Expr *E : Attr->aligneds()) {
10940 E = E->IgnoreParenImpCasts();
10941 unsigned Pos;
10942 QualType ParmTy;
10943 if (isa<CXXThisExpr>(E)) {
10944 Pos = ParamPositions[FD];
10945 ParmTy = E->getType();
10946 } else {
10947 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10948 ->getCanonicalDecl();
10949 auto It = ParamPositions.find(PVD);
10950 assert(It != ParamPositions.end() && "Function parameter not found");
10951 Pos = It->second;
10952 ParmTy = PVD->getType();
10953 }
10954 ParamAttrs[Pos].Alignment =
10955 (*NI)
10956 ? (*NI)->EvaluateKnownConstInt(C)
10957 : llvm::APSInt::getUnsigned(
10958 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10959 .getQuantity());
10960 ++NI;
10961 }
10962 // Mark linear parameters.
10963 auto *SI = Attr->steps_begin();
10964 auto *MI = Attr->modifiers_begin();
10965 for (const Expr *E : Attr->linears()) {
10966 E = E->IgnoreParenImpCasts();
10967 unsigned Pos;
10968 bool IsReferenceType = false;
10969 // Rescaling factor needed to compute the linear parameter
10970 // value in the mangled name.
10971 unsigned PtrRescalingFactor = 1;
10972 if (isa<CXXThisExpr>(E)) {
10973 Pos = ParamPositions[FD];
10974 auto *P = cast<PointerType>(E->getType());
10975 PtrRescalingFactor = CGM.getContext()
10976 .getTypeSizeInChars(P->getPointeeType())
10977 .getQuantity();
10978 } else {
10979 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10980 ->getCanonicalDecl();
10981 auto It = ParamPositions.find(PVD);
10982 assert(It != ParamPositions.end() && "Function parameter not found");
10983 Pos = It->second;
10984 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10985 PtrRescalingFactor = CGM.getContext()
10986 .getTypeSizeInChars(P->getPointeeType())
10987 .getQuantity();
10988 else if (PVD->getType()->isReferenceType()) {
10989 IsReferenceType = true;
10990 PtrRescalingFactor =
10991 CGM.getContext()
10992 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10993 .getQuantity();
10994 }
10995 }
10996 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10997 if (*MI == OMPC_LINEAR_ref)
10998 ParamAttr.Kind = LinearRef;
10999 else if (*MI == OMPC_LINEAR_uval)
11000 ParamAttr.Kind = LinearUVal;
11001 else if (IsReferenceType)
11002 ParamAttr.Kind = LinearVal;
11003 else
11004 ParamAttr.Kind = Linear;
11005 // Assuming a stride of 1, for `linear` without modifiers.
11006 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11007 if (*SI) {
11009 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11010 if (const auto *DRE =
11011 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11012 if (const auto *StridePVD =
11013 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11014 ParamAttr.HasVarStride = true;
11015 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11016 assert(It != ParamPositions.end() &&
11017 "Function parameter not found");
11018 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11019 }
11020 }
11021 } else {
11022 ParamAttr.StrideOrArg = Result.Val.getInt();
11023 }
11024 }
11025 // If we are using a linear clause on a pointer, we need to
11026 // rescale the value of linear_step with the byte size of the
11027 // pointee type.
11028 if (!ParamAttr.HasVarStride &&
11029 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11030 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11031 ++SI;
11032 ++MI;
11033 }
11034 llvm::APSInt VLENVal;
11035 SourceLocation ExprLoc;
11036 const Expr *VLENExpr = Attr->getSimdlen();
11037 if (VLENExpr) {
11038 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11039 ExprLoc = VLENExpr->getExprLoc();
11040 }
11041 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11042 if (CGM.getTriple().isX86()) {
11043 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11044 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11045 unsigned VLEN = VLENVal.getExtValue();
11046 StringRef MangledName = Fn->getName();
11047 if (CGM.getTarget().hasFeature("sve"))
11048 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11049 MangledName, 's', 128, Fn, ExprLoc);
11050 else if (CGM.getTarget().hasFeature("neon"))
11051 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11052 MangledName, 'n', 128, Fn, ExprLoc);
11053 }
11054 }
11055 FD = FD->getPreviousDecl();
11056 }
11057}
11058
11059namespace {
11060/// Cleanup action for doacross support.
11061class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11062public:
11063 static const int DoacrossFinArgs = 2;
11064
11065private:
11066 llvm::FunctionCallee RTLFn;
11067 llvm::Value *Args[DoacrossFinArgs];
11068
11069public:
11070 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11071 ArrayRef<llvm::Value *> CallArgs)
11072 : RTLFn(RTLFn) {
11073 assert(CallArgs.size() == DoacrossFinArgs);
11074 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11075 }
11076 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11077 if (!CGF.HaveInsertPoint())
11078 return;
11079 CGF.EmitRuntimeCall(RTLFn, Args);
11080 }
11081};
11082} // namespace
11083
11085 const OMPLoopDirective &D,
11086 ArrayRef<Expr *> NumIterations) {
11087 if (!CGF.HaveInsertPoint())
11088 return;
11089
11091 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11092 RecordDecl *RD;
11093 if (KmpDimTy.isNull()) {
11094 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11095 // kmp_int64 lo; // lower
11096 // kmp_int64 up; // upper
11097 // kmp_int64 st; // stride
11098 // };
11099 RD = C.buildImplicitRecord("kmp_dim");
11100 RD->startDefinition();
11101 addFieldToRecordDecl(C, RD, Int64Ty);
11102 addFieldToRecordDecl(C, RD, Int64Ty);
11103 addFieldToRecordDecl(C, RD, Int64Ty);
11104 RD->completeDefinition();
11105 KmpDimTy = C.getRecordType(RD);
11106 } else {
11107 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11108 }
11109 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11110 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11112
11113 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11114 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11115 enum { LowerFD = 0, UpperFD, StrideFD };
11116 // Fill dims with data.
11117 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11118 LValue DimsLVal = CGF.MakeAddrLValue(
11119 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11120 // dims.upper = num_iterations;
11121 LValue UpperLVal = CGF.EmitLValueForField(
11122 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11123 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11124 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11125 Int64Ty, NumIterations[I]->getExprLoc());
11126 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11127 // dims.stride = 1;
11128 LValue StrideLVal = CGF.EmitLValueForField(
11129 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11130 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11131 StrideLVal);
11132 }
11133
11134 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11135 // kmp_int32 num_dims, struct kmp_dim * dims);
11136 llvm::Value *Args[] = {
11138 getThreadID(CGF, D.getBeginLoc()),
11139 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11141 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11142 CGM.VoidPtrTy)};
11143
11144 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11145 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11146 CGF.EmitRuntimeCall(RTLFn, Args);
11147 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11148 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11149 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11150 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11151 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11152 llvm::ArrayRef(FiniArgs));
11153}
11154
11155template <typename T>
11157 const T *C, llvm::Value *ULoc,
11158 llvm::Value *ThreadID) {
11159 QualType Int64Ty =
11160 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11161 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11163 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11164 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11165 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11166 const Expr *CounterVal = C->getLoopData(I);
11167 assert(CounterVal);
11168 llvm::Value *CntVal = CGF.EmitScalarConversion(
11169 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11170 CounterVal->getExprLoc());
11171 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11172 /*Volatile=*/false, Int64Ty);
11173 }
11174 llvm::Value *Args[] = {
11175 ULoc, ThreadID,
11176 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11177 llvm::FunctionCallee RTLFn;
11178 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11179 OMPDoacrossKind<T> ODK;
11180 if (ODK.isSource(C)) {
11181 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11182 OMPRTL___kmpc_doacross_post);
11183 } else {
11184 assert(ODK.isSink(C) && "Expect sink modifier.");
11185 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11186 OMPRTL___kmpc_doacross_wait);
11187 }
11188 CGF.EmitRuntimeCall(RTLFn, Args);
11189}
11190
11192 const OMPDependClause *C) {
11193 return EmitDoacrossOrdered<OMPDependClause>(
11194 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11195 getThreadID(CGF, C->getBeginLoc()));
11196}
11197
11199 const OMPDoacrossClause *C) {
11200 return EmitDoacrossOrdered<OMPDoacrossClause>(
11201 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11202 getThreadID(CGF, C->getBeginLoc()));
11203}
11204
11206 llvm::FunctionCallee Callee,
11207 ArrayRef<llvm::Value *> Args) const {
11208 assert(Loc.isValid() && "Outlined function call location must be valid.");
11210
11211 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11212 if (Fn->doesNotThrow()) {
11213 CGF.EmitNounwindRuntimeCall(Fn, Args);
11214 return;
11215 }
11216 }
11217 CGF.EmitRuntimeCall(Callee, Args);
11218}
11219
11221 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11222 ArrayRef<llvm::Value *> Args) const {
11223 emitCall(CGF, Loc, OutlinedFn, Args);
11224}
11225
11227 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11228 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11230}
11231
11233 const VarDecl *NativeParam,
11234 const VarDecl *TargetParam) const {
11235 return CGF.GetAddrOfLocalVar(NativeParam);
11236}
11237
11238/// Return allocator value from expression, or return a null allocator (default
11239/// when no allocator specified).
11240static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11241 const Expr *Allocator) {
11242 llvm::Value *AllocVal;
11243 if (Allocator) {
11244 AllocVal = CGF.EmitScalarExpr(Allocator);
11245 // According to the standard, the original allocator type is a enum
11246 // (integer). Convert to pointer type, if required.
11247 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11248 CGF.getContext().VoidPtrTy,
11249 Allocator->getExprLoc());
11250 } else {
11251 // If no allocator specified, it defaults to the null allocator.
11252 AllocVal = llvm::Constant::getNullValue(
11254 }
11255 return AllocVal;
11256}
11257
11258/// Return the alignment from an allocate directive if present.
11259static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11260 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11261
11262 if (!AllocateAlignment)
11263 return nullptr;
11264
11265 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11266}
11267
11269 const VarDecl *VD) {
11270 if (!VD)
11271 return Address::invalid();
11272 Address UntiedAddr = Address::invalid();
11273 Address UntiedRealAddr = Address::invalid();
11274 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11275 if (It != FunctionToUntiedTaskStackMap.end()) {
11276 const UntiedLocalVarsAddressesMap &UntiedData =
11277 UntiedLocalVarsStack[It->second];
11278 auto I = UntiedData.find(VD);
11279 if (I != UntiedData.end()) {
11280 UntiedAddr = I->second.first;
11281 UntiedRealAddr = I->second.second;
11282 }
11283 }
11284 const VarDecl *CVD = VD->getCanonicalDecl();
11285 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11286 // Use the default allocation.
11287 if (!isAllocatableDecl(VD))
11288 return UntiedAddr;
11289 llvm::Value *Size;
11290 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11291 if (CVD->getType()->isVariablyModifiedType()) {
11292 Size = CGF.getTypeSize(CVD->getType());
11293 // Align the size: ((size + align - 1) / align) * align
11294 Size = CGF.Builder.CreateNUWAdd(
11295 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11296 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11297 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11298 } else {
11300 Size = CGM.getSize(Sz.alignTo(Align));
11301 }
11302 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11303 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11304 const Expr *Allocator = AA->getAllocator();
11305 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11306 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11308 Args.push_back(ThreadID);
11309 if (Alignment)
11310 Args.push_back(Alignment);
11311 Args.push_back(Size);
11312 Args.push_back(AllocVal);
11313 llvm::omp::RuntimeFunction FnID =
11314 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11315 llvm::Value *Addr = CGF.EmitRuntimeCall(
11316 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11317 getName({CVD->getName(), ".void.addr"}));
11318 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11319 CGM.getModule(), OMPRTL___kmpc_free);
11322 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11323 if (UntiedAddr.isValid())
11324 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11325
11326 // Cleanup action for allocate support.
11327 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11328 llvm::FunctionCallee RTLFn;
11329 SourceLocation::UIntTy LocEncoding;
11330 Address Addr;
11331 const Expr *AllocExpr;
11332
11333 public:
11334 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11335 SourceLocation::UIntTy LocEncoding, Address Addr,
11336 const Expr *AllocExpr)
11337 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11338 AllocExpr(AllocExpr) {}
11339 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11340 if (!CGF.HaveInsertPoint())
11341 return;
11342 llvm::Value *Args[3];
11343 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11344 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11346 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11347 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11348 Args[2] = AllocVal;
11349 CGF.EmitRuntimeCall(RTLFn, Args);
11350 }
11351 };
11352 Address VDAddr =
11353 UntiedRealAddr.isValid()
11354 ? UntiedRealAddr
11355 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11356 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11357 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11358 VDAddr, Allocator);
11359 if (UntiedRealAddr.isValid())
11360 if (auto *Region =
11361 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11362 Region->emitUntiedSwitch(CGF);
11363 return VDAddr;
11364 }
11365 return UntiedAddr;
11366}
11367
11369 const VarDecl *VD) const {
11370 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11371 if (It == FunctionToUntiedTaskStackMap.end())
11372 return false;
11373 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11374}
11375
11378 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11379 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11380 if (!NeedToPush)
11381 return;
11383 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11384 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11385 for (const Stmt *Ref : C->private_refs()) {
11386 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11387 const ValueDecl *VD;
11388 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11389 VD = DRE->getDecl();
11390 } else {
11391 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11392 assert((ME->isImplicitCXXThis() ||
11393 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11394 "Expected member of current class.");
11395 VD = ME->getMemberDecl();
11396 }
11397 DS.insert(VD);
11398 }
11399 }
11400}
11401
11403 if (!NeedToPush)
11404 return;
11406}
11407
11409 CodeGenFunction &CGF,
11410 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11411 std::pair<Address, Address>> &LocalVars)
11412 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11413 if (!NeedToPush)
11414 return;
11416 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11417 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11418}
11419
11421 if (!NeedToPush)
11422 return;
11424}
11425
11427 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11428
11429 return llvm::any_of(
11431 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11432}
11433
11434void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11435 const OMPExecutableDirective &S,
11436 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11437 const {
11439 // Vars in target/task regions must be excluded completely.
11440 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11441 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11443 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11444 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11445 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11446 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11447 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11448 }
11449 }
11450 // Exclude vars in private clauses.
11451 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11452 for (const Expr *Ref : C->varlists()) {
11453 if (!Ref->getType()->isScalarType())
11454 continue;
11455 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11456 if (!DRE)
11457 continue;
11458 NeedToCheckForLPCs.insert(DRE->getDecl());
11459 }
11460 }
11461 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11462 for (const Expr *Ref : C->varlists()) {
11463 if (!Ref->getType()->isScalarType())
11464 continue;
11465 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11466 if (!DRE)
11467 continue;
11468 NeedToCheckForLPCs.insert(DRE->getDecl());
11469 }
11470 }
11471 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11472 for (const Expr *Ref : C->varlists()) {
11473 if (!Ref->getType()->isScalarType())
11474 continue;
11475 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11476 if (!DRE)
11477 continue;
11478 NeedToCheckForLPCs.insert(DRE->getDecl());
11479 }
11480 }
11481 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11482 for (const Expr *Ref : C->varlists()) {
11483 if (!Ref->getType()->isScalarType())
11484 continue;
11485 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11486 if (!DRE)
11487 continue;
11488 NeedToCheckForLPCs.insert(DRE->getDecl());
11489 }
11490 }
11491 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11492 for (const Expr *Ref : C->varlists()) {
11493 if (!Ref->getType()->isScalarType())
11494 continue;
11495 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11496 if (!DRE)
11497 continue;
11498 NeedToCheckForLPCs.insert(DRE->getDecl());
11499 }
11500 }
11501 for (const Decl *VD : NeedToCheckForLPCs) {
11502 for (const LastprivateConditionalData &Data :
11504 if (Data.DeclToUniqueName.count(VD) > 0) {
11505 if (!Data.Disabled)
11506 NeedToAddForLPCsAsDisabled.insert(VD);
11507 break;
11508 }
11509 }
11510 }
11511}
11512
11513CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11514 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11515 : CGM(CGF.CGM),
11516 Action((CGM.getLangOpts().OpenMP >= 50 &&
11517 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11518 [](const OMPLastprivateClause *C) {
11519 return C->getKind() ==
11520 OMPC_LASTPRIVATE_conditional;
11521 }))
11522 ? ActionToDo::PushAsLastprivateConditional
11523 : ActionToDo::DoNotPush) {
11524 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11525 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11526 return;
11527 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11528 "Expected a push action.");
11531 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11532 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11533 continue;
11534
11535 for (const Expr *Ref : C->varlists()) {
11536 Data.DeclToUniqueName.insert(std::make_pair(
11537 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11538 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11539 }
11540 }
11541 Data.IVLVal = IVLVal;
11542 Data.Fn = CGF.CurFn;
11543}
11544
11545CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11547 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11548 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11549 if (CGM.getLangOpts().OpenMP < 50)
11550 return;
11551 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11552 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11553 if (!NeedToAddForLPCsAsDisabled.empty()) {
11554 Action = ActionToDo::DisableLastprivateConditional;
11555 LastprivateConditionalData &Data =
11556 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11557 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11558 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11559 Data.Fn = CGF.CurFn;
11560 Data.Disabled = true;
11561 }
11562}
11563
11566 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11567 return LastprivateConditionalRAII(CGF, S);
11568}
11569
11571 if (CGM.getLangOpts().OpenMP < 50)
11572 return;
11573 if (Action == ActionToDo::DisableLastprivateConditional) {
11574 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11575 "Expected list of disabled private vars.");
11577 }
11578 if (Action == ActionToDo::PushAsLastprivateConditional) {
11579 assert(
11581 "Expected list of lastprivate conditional vars.");
11583 }
11584}
11585
11587 const VarDecl *VD) {
11589 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11590 if (I == LastprivateConditionalToTypes.end())
11591 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11592 QualType NewType;
11593 const FieldDecl *VDField;
11594 const FieldDecl *FiredField;
11595 LValue BaseLVal;
11596 auto VI = I->getSecond().find(VD);
11597 if (VI == I->getSecond().end()) {
11598 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11599 RD->startDefinition();
11600 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11601 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11602 RD->completeDefinition();
11603 NewType = C.getRecordType(RD);
11604 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11605 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11606 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11607 } else {
11608 NewType = std::get<0>(VI->getSecond());
11609 VDField = std::get<1>(VI->getSecond());
11610 FiredField = std::get<2>(VI->getSecond());
11611 BaseLVal = std::get<3>(VI->getSecond());
11612 }
11613 LValue FiredLVal =
11614 CGF.EmitLValueForField(BaseLVal, FiredField);
11616 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11617 FiredLVal);
11618 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11619}
11620
11621namespace {
11622/// Checks if the lastprivate conditional variable is referenced in LHS.
11623class LastprivateConditionalRefChecker final
11624 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11626 const Expr *FoundE = nullptr;
11627 const Decl *FoundD = nullptr;
11628 StringRef UniqueDeclName;
11629 LValue IVLVal;
11630 llvm::Function *FoundFn = nullptr;
11631 SourceLocation Loc;
11632
11633public:
11634 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11636 llvm::reverse(LPM)) {
11637 auto It = D.DeclToUniqueName.find(E->getDecl());
11638 if (It == D.DeclToUniqueName.end())
11639 continue;
11640 if (D.Disabled)
11641 return false;
11642 FoundE = E;
11643 FoundD = E->getDecl()->getCanonicalDecl();
11644 UniqueDeclName = It->second;
11645 IVLVal = D.IVLVal;
11646 FoundFn = D.Fn;
11647 break;
11648 }
11649 return FoundE == E;
11650 }
11651 bool VisitMemberExpr(const MemberExpr *E) {
11652 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11653 return false;
11655 llvm::reverse(LPM)) {
11656 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11657 if (It == D.DeclToUniqueName.end())
11658 continue;
11659 if (D.Disabled)
11660 return false;
11661 FoundE = E;
11662 FoundD = E->getMemberDecl()->getCanonicalDecl();
11663 UniqueDeclName = It->second;
11664 IVLVal = D.IVLVal;
11665 FoundFn = D.Fn;
11666 break;
11667 }
11668 return FoundE == E;
11669 }
11670 bool VisitStmt(const Stmt *S) {
11671 for (const Stmt *Child : S->children()) {
11672 if (!Child)
11673 continue;
11674 if (const auto *E = dyn_cast<Expr>(Child))
11675 if (!E->isGLValue())
11676 continue;
11677 if (Visit(Child))
11678 return true;
11679 }
11680 return false;
11681 }
11682 explicit LastprivateConditionalRefChecker(
11684 : LPM(LPM) {}
11685 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11686 getFoundData() const {
11687 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11688 }
11689};
11690} // namespace
11691
11693 LValue IVLVal,
11694 StringRef UniqueDeclName,
11695 LValue LVal,
11696 SourceLocation Loc) {
11697 // Last updated loop counter for the lastprivate conditional var.
11698 // int<xx> last_iv = 0;
11699 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11700 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11701 LLIVTy, getName({UniqueDeclName, "iv"}));
11702 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11703 IVLVal.getAlignment().getAsAlign());
11704 LValue LastIVLVal =
11705 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11706
11707 // Last value of the lastprivate conditional.
11708 // decltype(priv_a) last_a;
11709 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11710 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11711 cast<llvm::GlobalVariable>(Last)->setAlignment(
11712 LVal.getAlignment().getAsAlign());
11713 LValue LastLVal =
11714 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11715
11716 // Global loop counter. Required to handle inner parallel-for regions.
11717 // iv
11718 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11719
11720 // #pragma omp critical(a)
11721 // if (last_iv <= iv) {
11722 // last_iv = iv;
11723 // last_a = priv_a;
11724 // }
11725 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11726 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11727 Action.Enter(CGF);
11728 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11729 // (last_iv <= iv) ? Check if the variable is updated and store new
11730 // value in global var.
11731 llvm::Value *CmpRes;
11732 if (IVLVal.getType()->isSignedIntegerType()) {
11733 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11734 } else {
11735 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11736 "Loop iteration variable must be integer.");
11737 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11738 }
11739 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11740 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11741 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11742 // {
11743 CGF.EmitBlock(ThenBB);
11744
11745 // last_iv = iv;
11746 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11747
11748 // last_a = priv_a;
11749 switch (CGF.getEvaluationKind(LVal.getType())) {
11750 case TEK_Scalar: {
11751 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11752 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11753 break;
11754 }
11755 case TEK_Complex: {
11756 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11757 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11758 break;
11759 }
11760 case TEK_Aggregate:
11761 llvm_unreachable(
11762 "Aggregates are not supported in lastprivate conditional.");
11763 }
11764 // }
11765 CGF.EmitBranch(ExitBB);
11766 // There is no need to emit line number for unconditional branch.
11768 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11769 };
11770
11771 if (CGM.getLangOpts().OpenMPSimd) {
11772 // Do not emit as a critical region as no parallel region could be emitted.
11773 RegionCodeGenTy ThenRCG(CodeGen);
11774 ThenRCG(CGF);
11775 } else {
11776 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11777 }
11778}
11779
11781 const Expr *LHS) {
11782 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11783 return;
11784 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11785 if (!Checker.Visit(LHS))
11786 return;
11787 const Expr *FoundE;
11788 const Decl *FoundD;
11789 StringRef UniqueDeclName;
11790 LValue IVLVal;
11791 llvm::Function *FoundFn;
11792 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11793 Checker.getFoundData();
11794 if (FoundFn != CGF.CurFn) {
11795 // Special codegen for inner parallel regions.
11796 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11797 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11798 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11799 "Lastprivate conditional is not found in outer region.");
11800 QualType StructTy = std::get<0>(It->getSecond());
11801 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11802 LValue PrivLVal = CGF.EmitLValue(FoundE);
11804 PrivLVal.getAddress(CGF),
11805 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11806 CGF.ConvertTypeForMem(StructTy));
11807 LValue BaseLVal =
11808 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11809 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11810 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11811 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11812 FiredLVal, llvm::AtomicOrdering::Unordered,
11813 /*IsVolatile=*/true, /*isInit=*/false);
11814 return;
11815 }
11816
11817 // Private address of the lastprivate conditional in the current context.
11818 // priv_a
11819 LValue LVal = CGF.EmitLValue(FoundE);
11820 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11821 FoundE->getExprLoc());
11822}
11823
11826 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11827 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11828 return;
11829 auto Range = llvm::reverse(LastprivateConditionalStack);
11830 auto It = llvm::find_if(
11831 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11832 if (It == Range.end() || It->Fn != CGF.CurFn)
11833 return;
11834 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11835 assert(LPCI != LastprivateConditionalToTypes.end() &&
11836 "Lastprivates must be registered already.");
11838 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11839 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11840 for (const auto &Pair : It->DeclToUniqueName) {
11841 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11842 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11843 continue;
11844 auto I = LPCI->getSecond().find(Pair.first);
11845 assert(I != LPCI->getSecond().end() &&
11846 "Lastprivate must be rehistered already.");
11847 // bool Cmp = priv_a.Fired != 0;
11848 LValue BaseLVal = std::get<3>(I->getSecond());
11849 LValue FiredLVal =
11850 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11851 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11852 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11853 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11854 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11855 // if (Cmp) {
11856 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11857 CGF.EmitBlock(ThenBB);
11858 Address Addr = CGF.GetAddrOfLocalVar(VD);
11859 LValue LVal;
11860 if (VD->getType()->isReferenceType())
11861 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11863 else
11864 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11866 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11867 D.getBeginLoc());
11869 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11870 // }
11871 }
11872}
11873
11875 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11876 SourceLocation Loc) {
11877 if (CGF.getLangOpts().OpenMP < 50)
11878 return;
11879 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11880 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11881 "Unknown lastprivate conditional variable.");
11882 StringRef UniqueName = It->second;
11883 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11884 // The variable was not updated in the region - exit.
11885 if (!GV)
11886 return;
11887 LValue LPLVal = CGF.MakeRawAddrLValue(
11888 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11889 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11890 CGF.EmitStoreOfScalar(Res, PrivLVal);
11891}
11892
11895 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11896 const RegionCodeGenTy &CodeGen) {
11897 llvm_unreachable("Not supported in SIMD-only mode");
11898}
11899
11902 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11903 const RegionCodeGenTy &CodeGen) {
11904 llvm_unreachable("Not supported in SIMD-only mode");
11905}
11906
11908 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11909 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11910 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11911 bool Tied, unsigned &NumberOfParts) {
11912 llvm_unreachable("Not supported in SIMD-only mode");
11913}
11914
11916 SourceLocation Loc,
11917 llvm::Function *OutlinedFn,
11918 ArrayRef<llvm::Value *> CapturedVars,
11919 const Expr *IfCond,
11920 llvm::Value *NumThreads) {
11921 llvm_unreachable("Not supported in SIMD-only mode");
11922}
11923
11925 CodeGenFunction &CGF, StringRef CriticalName,
11926 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11927 const Expr *Hint) {
11928 llvm_unreachable("Not supported in SIMD-only mode");
11929}
11930
11932 const RegionCodeGenTy &MasterOpGen,
11933 SourceLocation Loc) {
11934 llvm_unreachable("Not supported in SIMD-only mode");
11935}
11936
11938 const RegionCodeGenTy &MasterOpGen,
11939 SourceLocation Loc,
11940 const Expr *Filter) {
11941 llvm_unreachable("Not supported in SIMD-only mode");
11942}
11943
11945 SourceLocation Loc) {
11946 llvm_unreachable("Not supported in SIMD-only mode");
11947}
11948
11950 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11951 SourceLocation Loc) {
11952 llvm_unreachable("Not supported in SIMD-only mode");
11953}
11954
11956 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11957 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11959 ArrayRef<const Expr *> AssignmentOps) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11961}
11962
11964 const RegionCodeGenTy &OrderedOpGen,
11965 SourceLocation Loc,
11966 bool IsThreads) {
11967 llvm_unreachable("Not supported in SIMD-only mode");
11968}
11969
11971 SourceLocation Loc,
11973 bool EmitChecks,
11974 bool ForceSimpleCall) {
11975 llvm_unreachable("Not supported in SIMD-only mode");
11976}
11977
11980 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11981 bool Ordered, const DispatchRTInput &DispatchValues) {
11982 llvm_unreachable("Not supported in SIMD-only mode");
11983}
11984
11987 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11988 llvm_unreachable("Not supported in SIMD-only mode");
11989}
11990
11993 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11995}
11996
11998 SourceLocation Loc,
11999 unsigned IVSize,
12000 bool IVSigned) {
12001 llvm_unreachable("Not supported in SIMD-only mode");
12002}
12003
12005 SourceLocation Loc,
12006 OpenMPDirectiveKind DKind) {
12007 llvm_unreachable("Not supported in SIMD-only mode");
12008}
12009
12011 SourceLocation Loc,
12012 unsigned IVSize, bool IVSigned,
12013 Address IL, Address LB,
12014 Address UB, Address ST) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12019 llvm::Value *NumThreads,
12020 SourceLocation Loc) {
12021 llvm_unreachable("Not supported in SIMD-only mode");
12022}
12023
12025 ProcBindKind ProcBind,
12026 SourceLocation Loc) {
12027 llvm_unreachable("Not supported in SIMD-only mode");
12028}
12029
12031 const VarDecl *VD,
12032 Address VDAddr,
12033 SourceLocation Loc) {
12034 llvm_unreachable("Not supported in SIMD-only mode");
12035}
12036
12038 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12039 CodeGenFunction *CGF) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12041}
12042
12044 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12045 llvm_unreachable("Not supported in SIMD-only mode");
12046}
12047
12050 SourceLocation Loc,
12051 llvm::AtomicOrdering AO) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12053}
12054
12056 const OMPExecutableDirective &D,
12057 llvm::Function *TaskFunction,
12058 QualType SharedsTy, Address Shareds,
12059 const Expr *IfCond,
12060 const OMPTaskDataTy &Data) {
12061 llvm_unreachable("Not supported in SIMD-only mode");
12062}
12063
12066 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12067 const Expr *IfCond, const OMPTaskDataTy &Data) {
12068 llvm_unreachable("Not supported in SIMD-only mode");
12069}
12070
12074 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12075 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12076 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12077 ReductionOps, Options);
12078}
12079
12082 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12083 llvm_unreachable("Not supported in SIMD-only mode");
12084}
12085
12087 SourceLocation Loc,
12088 bool IsWorksharingReduction) {
12089 llvm_unreachable("Not supported in SIMD-only mode");
12090}
12091
12093 SourceLocation Loc,
12094 ReductionCodeGen &RCG,
12095 unsigned N) {
12096 llvm_unreachable("Not supported in SIMD-only mode");
12097}
12098
12100 SourceLocation Loc,
12101 llvm::Value *ReductionsPtr,
12102 LValue SharedLVal) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12104}
12105
12107 SourceLocation Loc,
12108 const OMPTaskDataTy &Data) {
12109 llvm_unreachable("Not supported in SIMD-only mode");
12110}
12111
12114 OpenMPDirectiveKind CancelRegion) {
12115 llvm_unreachable("Not supported in SIMD-only mode");
12116}
12117
12119 SourceLocation Loc, const Expr *IfCond,
12120 OpenMPDirectiveKind CancelRegion) {
12121 llvm_unreachable("Not supported in SIMD-only mode");
12122}
12123
12125 const OMPExecutableDirective &D, StringRef ParentName,
12126 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12127 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12128 llvm_unreachable("Not supported in SIMD-only mode");
12129}
12130
12133 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12134 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12135 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12136 const OMPLoopDirective &D)>
12137 SizeEmitter) {
12138 llvm_unreachable("Not supported in SIMD-only mode");
12139}
12140
12142 llvm_unreachable("Not supported in SIMD-only mode");
12143}
12144
12146 llvm_unreachable("Not supported in SIMD-only mode");
12147}
12148
12150 return false;
12151}
12152
12154 const OMPExecutableDirective &D,
12155 SourceLocation Loc,
12156 llvm::Function *OutlinedFn,
12157 ArrayRef<llvm::Value *> CapturedVars) {
12158 llvm_unreachable("Not supported in SIMD-only mode");
12159}
12160
12162 const Expr *NumTeams,
12163 const Expr *ThreadLimit,
12164 SourceLocation Loc) {
12165 llvm_unreachable("Not supported in SIMD-only mode");
12166}
12167
12169 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12170 const Expr *Device, const RegionCodeGenTy &CodeGen,
12172 llvm_unreachable("Not supported in SIMD-only mode");
12173}
12174
12176 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12177 const Expr *Device) {
12178 llvm_unreachable("Not supported in SIMD-only mode");
12179}
12180
12182 const OMPLoopDirective &D,
12183 ArrayRef<Expr *> NumIterations) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12185}
12186
12188 const OMPDependClause *C) {
12189 llvm_unreachable("Not supported in SIMD-only mode");
12190}
12191
12193 const OMPDoacrossClause *C) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12195}
12196
12197const VarDecl *
12199 const VarDecl *NativeParam) const {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201}
12202
12203Address
12205 const VarDecl *NativeParam,
12206 const VarDecl *TargetParam) const {
12207 llvm_unreachable("Not supported in SIMD-only mode");
12208}
#define V(N, I)
Definition: ASTContext.h:3284
StringRef P
#define SM(sm)
Definition: Cuda.cpp:82
Provides LLVM's BitmaskEnum facility to enumeration types declared in namespace clang.
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Defines the clang::FileManager interface and associated types.
int Priority
Definition: Format.cpp:2975
#define X(type, name)
Definition: Value.h:143
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
SourceManager & getSourceManager()
Definition: ASTContext.h:705
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2767
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2590
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1118
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:775
CanQualType BoolTy
Definition: ASTContext.h:1092
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1091
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2770
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:757
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5063
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3514
Attr - This represents one attribute.
Definition: Attr.h:42
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2535
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2799
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2060
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2186
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2210
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:619
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1022
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1641
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:613
base_class_range vbases()
Definition: DeclCXX.h:636
capture_const_range captures() const
Definition: DeclCXX.h:1101
ctor_range ctors() const
Definition: DeclCXX.h:681
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:1975
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:349
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3770
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3804
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1305
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3810
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3798
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3801
This captures a statement into a function.
Definition: Stmt.h:3757
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3908
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3878
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3861
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1431
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3903
capture_range captures()
Definition: Stmt.h:3895
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
static Address invalid()
Definition: Address.h:153
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CharUnits getAlignment() const
Definition: Address.h:166
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:184
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:226
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:241
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:234
bool isValid() const
Definition: Address.h:154
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:176
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:864
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:871
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:881
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::Value * CreateIsNull(Address Addr, const Twine &Name="")
Definition: CGBuilder.h:355
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:241
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:364
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:278
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits Callee function call with arguments Args with location Loc.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:308
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:654
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:627
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2792
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:436
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T, bool ForBitField=false)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:722
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:260
static ConstantAddress invalid()
Definition: Address.h:268
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:576
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:181
CharUnits getAlignment() const
Definition: CGValue.h:346
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:370
const Qualifiers & getQuals() const
Definition: CGValue.h:341
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:349
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:361
QualType getType() const
Definition: CGValue.h:294
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:338
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:41
static RValue get(llvm::Value *V)
Definition: CGValue.h:97
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:107
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:70
An abstract representation of an aligned address.
Definition: Address.h:41
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:76
llvm::Value * getPointer() const
Definition: Address.h:65
static RawAddress invalid()
Definition: Address.h:60
bool isValid() const
Definition: Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:75
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1436
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1698
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
ValueDecl * getDecl()
Definition: Expr.h:1328
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
T * getAttr() const
Definition: DeclBase.h:579
bool hasAttrs() const
Definition: DeclBase.h:524
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:501
void addAttr(Attr *A)
Definition: DeclBase.cpp:975
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:565
SourceLocation getLocation() const
Definition: DeclBase.h:445
DeclContext * getDeclContext()
Definition: DeclBase.h:454
AttrVec & getAttrs()
Definition: DeclBase.h:530
bool hasAttr() const
Definition: DeclBase.h:583
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:968
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:822
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:873
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3064
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3059
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3556
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3906
Represents a member of a struct/union/class.
Definition: Decl.h:3058
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4547
Represents a function declaration or definition.
Definition: Decl.h:1971
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2707
QualType getReturnType() const
Definition: Decl.h:2755
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2684
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3617
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3692
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5381
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:977
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:539
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:535
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3172
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3255
Expr * getBase() const
Definition: Expr.h:3249
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
bool isExternallyVisible() const
Definition: Decl.h:408
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition: DeclOpenMP.h:349
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:249
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:226
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:223
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:220
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:246
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:241
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:556
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive.
Definition: StmtOpenMP.h:547
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:569
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:502
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind.
Definition: StmtOpenMP.h:496
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:504
static const SpecificClause * getSingleClause(ArrayRef< OMPClause * > Clauses)
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:477
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:459
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:527
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:596
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition: Expr.cpp:5246
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition: ExprOpenMP.h:275
Decl * getIteratorDecl(unsigned I)
Gets the iterator declaration for the given iterator.
Definition: Expr.cpp:5203
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1018
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:1376
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:1368
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:1360
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:676
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
clauselist_range clauselists()
Definition: DeclOpenMP.h:442
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1168
Represents a parameter to a function.
Definition: Decl.h:1761
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3135
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:940
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1167
QualType withRestrict() const
Definition: Type.h:1170
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1007
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7355
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7395
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7556
QualType getCanonicalType() const
Definition: Type.h:7407
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1530
Represents a struct/union/class.
Definition: Decl.h:4169
field_iterator field_end() const
Definition: Decl.h:4378
field_range fields() const
Definition: Decl.h:4375
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5083
bool field_empty() const
Definition: Decl.h:4383
field_iterator field_begin() const
Definition: Decl.cpp:5071
RecordDecl * getDecl() const
Definition: Type.h:5555
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:204
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:226
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3376
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:197
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4739
bool isUnion() const
Definition: Decl.h:3791
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1540
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1451
The base class of the type hierarchy.
Definition: Type.h:1813
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1870
bool isVoidType() const
Definition: Type.h:7901
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2155
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8076
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2134
bool isArrayType() const
Definition: Type.h:7674
bool isPointerType() const
Definition: Type.h:7608
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7941
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8186
bool isReferenceType() const
Definition: Type.h:7620
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:694
bool isLValueReferenceType() const
Definition: Type.h:7624
QualType getCanonicalTypeInternal() const
Definition: Type.h:2932
const RecordType * getAsStructureType() const
Definition: Type.cpp:710
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8069
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2667
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8172
bool isFloatingType() const
Definition: Type.cpp:2237
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2184
bool isAnyPointerType() const
Definition: Type.h:7612
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8119
bool isRecordType() const
Definition: Type.h:7702
bool isUnionType() const
Definition: Type.cpp:660
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1878
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1874
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
Represents a variable declaration or definition.
Definition: Decl.h:918
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2254
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2363
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1204
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1171
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1282
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2372
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1249
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1345
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3743
Expr * getSizeExpr() const
Definition: Type.h:3762
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:33
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
@ NotKnownNonNull
Definition: Address.h:32
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:24
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:269
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:38
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:43
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:39
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:103
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:54
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:58
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:132
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:136
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:131
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:50
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:78
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:79
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:30
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:34
@ AS_public
Definition: Specifiers.h:121
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:70
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:74
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:22
#define bool
Definition: stdbool.h:20
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:4731
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Upper
Normalized upper bound.
Definition: ExprOpenMP.h:116
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:179
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:181
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:180
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57