clang 19.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.getPointer();
672 llvm::Value *DestBegin = DestAddr.getPointer();
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
829 cast<OpaqueValueExpr>(
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
846 cast<OpaqueValueExpr>(
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Base);
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Base);
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.getPointer());
975 llvm::Value *PrivatePointer =
977 PrivateAddr.getPointer(), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095 Name, &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106 Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111 .getAddress(CGF));
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115 .getAddress(CGF));
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Out->getInit())) {
1119 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120 Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1132 if (UDRMap.count(D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1142 CGM, D->getType(),
1144 : nullptr,
1145 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(D, Combiner, Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152 Decls.second.push_back(D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName({"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1253 return getOutlinedHelperName(CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293 TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 CGM.getModule(), OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1367 SourceLocation Loc,
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389}
1390
1392 SourceLocation Loc) {
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454 OMPRTL___kmpc_global_thread_num),
1455 emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1465 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(D);
1470 FunctionUDRMap.erase(CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(D);
1476 FunctionUDMMap.erase(I);
1477 }
1480}
1481
1483 return OMPBuilder.IdentPtr;
1484}
1485
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492 }
1493 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1547 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558}
1559
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 VD->isExternallyVisible(),
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579 LinkageForVariable);
1580
1581 if (!addr)
1582 return Address::invalid();
1583 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName({"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 OMPBuilder.getOrCreateRuntimeFunction(
1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614 Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626 OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1631 Ctor, CopyCtor, Dtor};
1632 CGF.EmitRuntimeCall(
1633 OMPBuilder.getOrCreateRuntimeFunction(
1634 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1635 Args);
1636}
1637
1639 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1640 bool PerformInit, CodeGenFunction *CGF) {
1641 if (CGM.getLangOpts().OpenMPUseTLS &&
1643 return nullptr;
1644
1645 VD = VD->getDefinition(CGM.getContext());
1646 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1647 QualType ASTTy = VD->getType();
1648
1649 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1650 const Expr *Init = VD->getAnyInitializer();
1651 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1652 // Generate function that re-emits the declaration's initializer into the
1653 // threadprivate copy of the variable VD
1654 CodeGenFunction CtorCGF(CGM);
1655 FunctionArgList Args;
1656 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1657 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1659 Args.push_back(&Dst);
1660
1662 CGM.getContext().VoidPtrTy, Args);
1663 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1664 std::string Name = getName({"__kmpc_global_ctor_", ""});
1665 llvm::Function *Fn =
1666 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1667 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1668 Args, Loc, Loc);
1669 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1673 VDAddr.getAlignment());
1674 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1675 /*IsInitializer=*/true);
1676 ArgVal = CtorCGF.EmitLoadOfScalar(
1677 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1679 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1680 CtorCGF.FinishFunction();
1681 Ctor = Fn;
1682 }
1684 // Generate function that emits destructor call for the threadprivate copy
1685 // of the variable VD
1686 CodeGenFunction DtorCGF(CGM);
1687 FunctionArgList Args;
1688 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1689 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1691 Args.push_back(&Dst);
1692
1694 CGM.getContext().VoidTy, Args);
1695 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1696 std::string Name = getName({"__kmpc_global_dtor_", ""});
1697 llvm::Function *Fn =
1698 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1699 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1700 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1701 Loc, Loc);
1702 // Create a scope with an artificial location for the body of this function.
1703 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1704 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1705 DtorCGF.GetAddrOfLocalVar(&Dst),
1706 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1707 DtorCGF.emitDestroy(
1708 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1709 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1710 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1711 DtorCGF.FinishFunction();
1712 Dtor = Fn;
1713 }
1714 // Do not emit init function if it is not required.
1715 if (!Ctor && !Dtor)
1716 return nullptr;
1717
1718 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1719 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1720 /*isVarArg=*/false)
1721 ->getPointerTo();
1722 // Copying constructor for the threadprivate variable.
1723 // Must be NULL - reserved by runtime, but currently it requires that this
1724 // parameter is always NULL. Otherwise it fires assertion.
1725 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1726 if (Ctor == nullptr) {
1727 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1728 /*isVarArg=*/false)
1729 ->getPointerTo();
1730 Ctor = llvm::Constant::getNullValue(CtorTy);
1731 }
1732 if (Dtor == nullptr) {
1733 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1734 /*isVarArg=*/false)
1735 ->getPointerTo();
1736 Dtor = llvm::Constant::getNullValue(DtorTy);
1737 }
1738 if (!CGF) {
1739 auto *InitFunctionTy =
1740 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1741 std::string Name = getName({"__omp_threadprivate_init_", ""});
1742 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1743 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1744 CodeGenFunction InitCGF(CGM);
1745 FunctionArgList ArgList;
1746 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1747 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1748 Loc, Loc);
1749 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1750 InitCGF.FinishFunction();
1751 return InitFunction;
1752 }
1753 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1754 }
1755 return nullptr;
1756}
1757
1759 llvm::GlobalValue *GV) {
1760 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1761 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1762
1763 // We only need to handle active 'indirect' declare target functions.
1764 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1765 return;
1766
1767 // Get a mangled name to store the new device global in.
1768 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1770 SmallString<128> Name;
1771 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1772
1773 // We need to generate a new global to hold the address of the indirectly
1774 // called device function. Doing this allows us to keep the visibility and
1775 // linkage of the associated function unchanged while allowing the runtime to
1776 // access its value.
1777 llvm::GlobalValue *Addr = GV;
1778 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1779 Addr = new llvm::GlobalVariable(
1781 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1782 nullptr, llvm::GlobalValue::NotThreadLocal,
1783 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1784 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1785 }
1786
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1789 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1794 QualType VarType,
1795 StringRef Name) {
1796 std::string Suffix = getName({"artificial", ""});
1797 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1798 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1799 VarLVType, Twine(Name).concat(Suffix).str());
1800 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1802 GAddr->setThreadLocal(/*Val=*/true);
1803 return Address(GAddr, GAddr->getValueType(),
1805 }
1806 std::string CacheSuffix = getName({"cache", ""});
1807 llvm::Value *Args[] = {
1811 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1812 /*isSigned=*/false),
1813 OMPBuilder.getOrCreateInternalVariable(
1815 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1816 return Address(
1818 CGF.EmitRuntimeCall(
1819 OMPBuilder.getOrCreateRuntimeFunction(
1820 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1821 Args),
1822 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1823 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1824}
1825
1827 const RegionCodeGenTy &ThenGen,
1828 const RegionCodeGenTy &ElseGen) {
1829 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1830
1831 // If the condition constant folds and can be elided, try to avoid emitting
1832 // the condition and the dead arm of the if/else.
1833 bool CondConstant;
1834 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1835 if (CondConstant)
1836 ThenGen(CGF);
1837 else
1838 ElseGen(CGF);
1839 return;
1840 }
1841
1842 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1843 // emit the conditional branch.
1844 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1845 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1846 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1847 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1848
1849 // Emit the 'then' code.
1850 CGF.EmitBlock(ThenBlock);
1851 ThenGen(CGF);
1852 CGF.EmitBranch(ContBlock);
1853 // Emit the 'else' code if present.
1854 // There is no need to emit line number for unconditional branch.
1856 CGF.EmitBlock(ElseBlock);
1857 ElseGen(CGF);
1858 // There is no need to emit line number for unconditional branch.
1860 CGF.EmitBranch(ContBlock);
1861 // Emit the continuation block for code after the if.
1862 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1863}
1864
1866 llvm::Function *OutlinedFn,
1867 ArrayRef<llvm::Value *> CapturedVars,
1868 const Expr *IfCond,
1869 llvm::Value *NumThreads) {
1870 if (!CGF.HaveInsertPoint())
1871 return;
1872 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1873 auto &M = CGM.getModule();
1874 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1875 this](CodeGenFunction &CGF, PrePostActionTy &) {
1876 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1878 llvm::Value *Args[] = {
1879 RTLoc,
1880 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1881 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1883 RealArgs.append(std::begin(Args), std::end(Args));
1884 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1885
1886 llvm::FunctionCallee RTLFn =
1887 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1888 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1889 };
1890 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1891 this](CodeGenFunction &CGF, PrePostActionTy &) {
1893 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1894 // Build calls:
1895 // __kmpc_serialized_parallel(&Loc, GTid);
1896 llvm::Value *Args[] = {RTLoc, ThreadID};
1897 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1898 M, OMPRTL___kmpc_serialized_parallel),
1899 Args);
1900
1901 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1902 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1903 Address ZeroAddrBound =
1905 /*Name=*/".bound.zero.addr");
1906 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1908 // ThreadId for serialized parallels is 0.
1909 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1910 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1911 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1912
1913 // Ensure we do not inline the function. This is trivially true for the ones
1914 // passed to __kmpc_fork_call but the ones called in serialized regions
1915 // could be inlined. This is not a perfect but it is closer to the invariant
1916 // we want, namely, every data environment starts with a new function.
1917 // TODO: We should pass the if condition to the runtime function and do the
1918 // handling there. Much cleaner code.
1919 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1920 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1921 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1922
1923 // __kmpc_end_serialized_parallel(&Loc, GTid);
1924 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1926 M, OMPRTL___kmpc_end_serialized_parallel),
1927 EndArgs);
1928 };
1929 if (IfCond) {
1930 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1931 } else {
1932 RegionCodeGenTy ThenRCG(ThenGen);
1933 ThenRCG(CGF);
1934 }
1935}
1936
1937// If we're inside an (outlined) parallel region, use the region info's
1938// thread-ID variable (it is passed in a first argument of the outlined function
1939// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1940// regular serial code region, get thread ID by calling kmp_int32
1941// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1942// return the address of that temp.
1944 SourceLocation Loc) {
1945 if (auto *OMPRegionInfo =
1946 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1947 if (OMPRegionInfo->getThreadIDVariable())
1948 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1949
1950 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1951 QualType Int32Ty =
1952 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1953 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1954 CGF.EmitStoreOfScalar(ThreadID,
1955 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1956
1957 return ThreadIDTemp;
1958}
1959
1960llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1961 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1962 std::string Name = getName({Prefix, "var"});
1963 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1964}
1965
1966namespace {
1967/// Common pre(post)-action for different OpenMP constructs.
1968class CommonActionTy final : public PrePostActionTy {
1969 llvm::FunctionCallee EnterCallee;
1970 ArrayRef<llvm::Value *> EnterArgs;
1971 llvm::FunctionCallee ExitCallee;
1972 ArrayRef<llvm::Value *> ExitArgs;
1973 bool Conditional;
1974 llvm::BasicBlock *ContBlock = nullptr;
1975
1976public:
1977 CommonActionTy(llvm::FunctionCallee EnterCallee,
1978 ArrayRef<llvm::Value *> EnterArgs,
1979 llvm::FunctionCallee ExitCallee,
1980 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1981 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1982 ExitArgs(ExitArgs), Conditional(Conditional) {}
1983 void Enter(CodeGenFunction &CGF) override {
1984 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1985 if (Conditional) {
1986 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1987 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1988 ContBlock = CGF.createBasicBlock("omp_if.end");
1989 // Generate the branch (If-stmt)
1990 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1991 CGF.EmitBlock(ThenBlock);
1992 }
1993 }
1994 void Done(CodeGenFunction &CGF) {
1995 // Emit the rest of blocks/branches
1996 CGF.EmitBranch(ContBlock);
1997 CGF.EmitBlock(ContBlock, true);
1998 }
1999 void Exit(CodeGenFunction &CGF) override {
2000 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2001 }
2002};
2003} // anonymous namespace
2004
2006 StringRef CriticalName,
2007 const RegionCodeGenTy &CriticalOpGen,
2008 SourceLocation Loc, const Expr *Hint) {
2009 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2010 // CriticalOpGen();
2011 // __kmpc_end_critical(ident_t *, gtid, Lock);
2012 // Prepare arguments and build a call to __kmpc_critical
2013 if (!CGF.HaveInsertPoint())
2014 return;
2015 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2016 getCriticalRegionLock(CriticalName)};
2017 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2018 std::end(Args));
2019 if (Hint) {
2020 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2021 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2022 }
2023 CommonActionTy Action(
2024 OMPBuilder.getOrCreateRuntimeFunction(
2025 CGM.getModule(),
2026 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2027 EnterArgs,
2028 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2029 OMPRTL___kmpc_end_critical),
2030 Args);
2031 CriticalOpGen.setAction(Action);
2032 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2033}
2034
2036 const RegionCodeGenTy &MasterOpGen,
2037 SourceLocation Loc) {
2038 if (!CGF.HaveInsertPoint())
2039 return;
2040 // if(__kmpc_master(ident_t *, gtid)) {
2041 // MasterOpGen();
2042 // __kmpc_end_master(ident_t *, gtid);
2043 // }
2044 // Prepare arguments and build a call to __kmpc_master
2045 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2046 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2047 CGM.getModule(), OMPRTL___kmpc_master),
2048 Args,
2049 OMPBuilder.getOrCreateRuntimeFunction(
2050 CGM.getModule(), OMPRTL___kmpc_end_master),
2051 Args,
2052 /*Conditional=*/true);
2053 MasterOpGen.setAction(Action);
2054 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2055 Action.Done(CGF);
2056}
2057
2059 const RegionCodeGenTy &MaskedOpGen,
2060 SourceLocation Loc, const Expr *Filter) {
2061 if (!CGF.HaveInsertPoint())
2062 return;
2063 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2064 // MaskedOpGen();
2065 // __kmpc_end_masked(iden_t *, gtid);
2066 // }
2067 // Prepare arguments and build a call to __kmpc_masked
2068 llvm::Value *FilterVal = Filter
2069 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2070 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2071 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2072 FilterVal};
2073 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2074 getThreadID(CGF, Loc)};
2075 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2076 CGM.getModule(), OMPRTL___kmpc_masked),
2077 Args,
2078 OMPBuilder.getOrCreateRuntimeFunction(
2079 CGM.getModule(), OMPRTL___kmpc_end_masked),
2080 ArgsEnd,
2081 /*Conditional=*/true);
2082 MaskedOpGen.setAction(Action);
2083 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2084 Action.Done(CGF);
2085}
2086
2088 SourceLocation Loc) {
2089 if (!CGF.HaveInsertPoint())
2090 return;
2091 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2092 OMPBuilder.createTaskyield(CGF.Builder);
2093 } else {
2094 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2095 llvm::Value *Args[] = {
2096 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2097 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2098 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2099 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2100 Args);
2101 }
2102
2103 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2104 Region->emitUntiedSwitch(CGF);
2105}
2106
2108 const RegionCodeGenTy &TaskgroupOpGen,
2109 SourceLocation Loc) {
2110 if (!CGF.HaveInsertPoint())
2111 return;
2112 // __kmpc_taskgroup(ident_t *, gtid);
2113 // TaskgroupOpGen();
2114 // __kmpc_end_taskgroup(ident_t *, gtid);
2115 // Prepare arguments and build a call to __kmpc_taskgroup
2116 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2117 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2118 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2119 Args,
2120 OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2122 Args);
2123 TaskgroupOpGen.setAction(Action);
2124 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2125}
2126
2127/// Given an array of pointers to variables, project the address of a
2128/// given variable.
2130 unsigned Index, const VarDecl *Var) {
2131 // Pull out the pointer to the variable.
2132 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2133 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2134
2135 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2136 return Address(
2137 CGF.Builder.CreateBitCast(
2138 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2139 ElemTy, CGF.getContext().getDeclAlign(Var));
2140}
2141
2143 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2144 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2145 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2146 SourceLocation Loc) {
2147 ASTContext &C = CGM.getContext();
2148 // void copy_func(void *LHSArg, void *RHSArg);
2149 FunctionArgList Args;
2150 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2152 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154 Args.push_back(&LHSArg);
2155 Args.push_back(&RHSArg);
2156 const auto &CGFI =
2157 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2158 std::string Name =
2159 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2160 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2161 llvm::GlobalValue::InternalLinkage, Name,
2162 &CGM.getModule());
2164 Fn->setDoesNotRecurse();
2165 CodeGenFunction CGF(CGM);
2166 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2167 // Dest = (void*[n])(LHSArg);
2168 // Src = (void*[n])(RHSArg);
2170 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2171 ArgsElemType->getPointerTo()),
2172 ArgsElemType, CGF.getPointerAlign());
2174 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2175 ArgsElemType->getPointerTo()),
2176 ArgsElemType, CGF.getPointerAlign());
2177 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2178 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2179 // ...
2180 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2181 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2182 const auto *DestVar =
2183 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2184 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2185
2186 const auto *SrcVar =
2187 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2188 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2189
2190 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2191 QualType Type = VD->getType();
2192 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2193 }
2194 CGF.FinishFunction();
2195 return Fn;
2196}
2197
2199 const RegionCodeGenTy &SingleOpGen,
2200 SourceLocation Loc,
2201 ArrayRef<const Expr *> CopyprivateVars,
2202 ArrayRef<const Expr *> SrcExprs,
2203 ArrayRef<const Expr *> DstExprs,
2204 ArrayRef<const Expr *> AssignmentOps) {
2205 if (!CGF.HaveInsertPoint())
2206 return;
2207 assert(CopyprivateVars.size() == SrcExprs.size() &&
2208 CopyprivateVars.size() == DstExprs.size() &&
2209 CopyprivateVars.size() == AssignmentOps.size());
2211 // int32 did_it = 0;
2212 // if(__kmpc_single(ident_t *, gtid)) {
2213 // SingleOpGen();
2214 // __kmpc_end_single(ident_t *, gtid);
2215 // did_it = 1;
2216 // }
2217 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2218 // <copy_func>, did_it);
2219
2220 Address DidIt = Address::invalid();
2221 if (!CopyprivateVars.empty()) {
2222 // int32 did_it = 0;
2223 QualType KmpInt32Ty =
2224 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2225 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2226 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2227 }
2228 // Prepare arguments and build a call to __kmpc_single
2229 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2230 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2231 CGM.getModule(), OMPRTL___kmpc_single),
2232 Args,
2233 OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_end_single),
2235 Args,
2236 /*Conditional=*/true);
2237 SingleOpGen.setAction(Action);
2238 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2239 if (DidIt.isValid()) {
2240 // did_it = 1;
2241 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2242 }
2243 Action.Done(CGF);
2244 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2245 // <copy_func>, did_it);
2246 if (DidIt.isValid()) {
2247 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2248 QualType CopyprivateArrayTy = C.getConstantArrayType(
2249 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2250 /*IndexTypeQuals=*/0);
2251 // Create a list of all private variables for copyprivate.
2252 Address CopyprivateList =
2253 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2254 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2255 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2256 CGF.Builder.CreateStore(
2258 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2259 CGF.VoidPtrTy),
2260 Elem);
2261 }
2262 // Build function that copies private values from single region to all other
2263 // threads in the corresponding parallel region.
2264 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2265 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2266 SrcExprs, DstExprs, AssignmentOps, Loc);
2267 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2269 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2270 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2271 llvm::Value *Args[] = {
2272 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2273 getThreadID(CGF, Loc), // i32 <gtid>
2274 BufSize, // size_t <buf_size>
2275 CL.getPointer(), // void *<copyprivate list>
2276 CpyFn, // void (*) (void *, void *) <copy_func>
2277 DidItVal // i32 did_it
2278 };
2279 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2280 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2281 Args);
2282 }
2283}
2284
2286 const RegionCodeGenTy &OrderedOpGen,
2287 SourceLocation Loc, bool IsThreads) {
2288 if (!CGF.HaveInsertPoint())
2289 return;
2290 // __kmpc_ordered(ident_t *, gtid);
2291 // OrderedOpGen();
2292 // __kmpc_end_ordered(ident_t *, gtid);
2293 // Prepare arguments and build a call to __kmpc_ordered
2294 if (IsThreads) {
2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297 CGM.getModule(), OMPRTL___kmpc_ordered),
2298 Args,
2299 OMPBuilder.getOrCreateRuntimeFunction(
2300 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2301 Args);
2302 OrderedOpGen.setAction(Action);
2303 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2304 return;
2305 }
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307}
2308
2310 unsigned Flags;
2311 if (Kind == OMPD_for)
2312 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2313 else if (Kind == OMPD_sections)
2314 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2315 else if (Kind == OMPD_single)
2316 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2317 else if (Kind == OMPD_barrier)
2318 Flags = OMP_IDENT_BARRIER_EXPL;
2319 else
2320 Flags = OMP_IDENT_BARRIER_IMPL;
2321 return Flags;
2322}
2323
2325 CodeGenFunction &CGF, const OMPLoopDirective &S,
2326 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2327 // Check if the loop directive is actually a doacross loop directive. In this
2328 // case choose static, 1 schedule.
2329 if (llvm::any_of(
2330 S.getClausesOfKind<OMPOrderedClause>(),
2331 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2332 ScheduleKind = OMPC_SCHEDULE_static;
2333 // Chunk size is 1 in this case.
2334 llvm::APInt ChunkSize(32, 1);
2335 ChunkExpr = IntegerLiteral::Create(
2336 CGF.getContext(), ChunkSize,
2337 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2338 SourceLocation());
2339 }
2340}
2341
2343 OpenMPDirectiveKind Kind, bool EmitChecks,
2344 bool ForceSimpleCall) {
2345 // Check if we should use the OMPBuilder
2346 auto *OMPRegionInfo =
2347 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2348 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2349 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2350 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2351 return;
2352 }
2353
2354 if (!CGF.HaveInsertPoint())
2355 return;
2356 // Build call __kmpc_cancel_barrier(loc, thread_id);
2357 // Build call __kmpc_barrier(loc, thread_id);
2358 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2359 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2360 // thread_id);
2361 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2362 getThreadID(CGF, Loc)};
2363 if (OMPRegionInfo) {
2364 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2365 llvm::Value *Result = CGF.EmitRuntimeCall(
2366 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2367 OMPRTL___kmpc_cancel_barrier),
2368 Args);
2369 if (EmitChecks) {
2370 // if (__kmpc_cancel_barrier()) {
2371 // exit from construct;
2372 // }
2373 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2374 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2375 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2376 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2377 CGF.EmitBlock(ExitBB);
2378 // exit from construct;
2379 CodeGenFunction::JumpDest CancelDestination =
2380 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2381 CGF.EmitBranchThroughCleanup(CancelDestination);
2382 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2383 }
2384 return;
2385 }
2386 }
2387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388 CGM.getModule(), OMPRTL___kmpc_barrier),
2389 Args);
2390}
2391
2393 Expr *ME, bool IsFatal) {
2394 llvm::Value *MVL =
2395 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2396 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2397 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2398 // *message)
2399 llvm::Value *Args[] = {
2400 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2401 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2402 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2403 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2404 CGM.getModule(), OMPRTL___kmpc_error),
2405 Args);
2406}
2407
2408/// Map the OpenMP loop schedule to the runtime enumeration.
2409static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2410 bool Chunked, bool Ordered) {
2411 switch (ScheduleKind) {
2412 case OMPC_SCHEDULE_static:
2413 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2414 : (Ordered ? OMP_ord_static : OMP_sch_static);
2415 case OMPC_SCHEDULE_dynamic:
2416 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2417 case OMPC_SCHEDULE_guided:
2418 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2419 case OMPC_SCHEDULE_runtime:
2420 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2421 case OMPC_SCHEDULE_auto:
2422 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2424 assert(!Chunked && "chunk was specified but schedule kind not known");
2425 return Ordered ? OMP_ord_static : OMP_sch_static;
2426 }
2427 llvm_unreachable("Unexpected runtime schedule");
2428}
2429
2430/// Map the OpenMP distribute schedule to the runtime enumeration.
2431static OpenMPSchedType
2433 // only static is allowed for dist_schedule
2434 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2435}
2436
2438 bool Chunked) const {
2439 OpenMPSchedType Schedule =
2440 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2441 return Schedule == OMP_sch_static;
2442}
2443
2445 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2446 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2447 return Schedule == OMP_dist_sch_static;
2448}
2449
2451 bool Chunked) const {
2452 OpenMPSchedType Schedule =
2453 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2454 return Schedule == OMP_sch_static_chunked;
2455}
2456
2458 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2459 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2460 return Schedule == OMP_dist_sch_static_chunked;
2461}
2462
2464 OpenMPSchedType Schedule =
2465 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2466 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2467 return Schedule != OMP_sch_static;
2468}
2469
2470static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2473 int Modifier = 0;
2474 switch (M1) {
2475 case OMPC_SCHEDULE_MODIFIER_monotonic:
2476 Modifier = OMP_sch_modifier_monotonic;
2477 break;
2478 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2479 Modifier = OMP_sch_modifier_nonmonotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_simd:
2482 if (Schedule == OMP_sch_static_chunked)
2483 Schedule = OMP_sch_static_balanced_chunked;
2484 break;
2487 break;
2488 }
2489 switch (M2) {
2490 case OMPC_SCHEDULE_MODIFIER_monotonic:
2491 Modifier = OMP_sch_modifier_monotonic;
2492 break;
2493 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2494 Modifier = OMP_sch_modifier_nonmonotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_simd:
2497 if (Schedule == OMP_sch_static_chunked)
2498 Schedule = OMP_sch_static_balanced_chunked;
2499 break;
2502 break;
2503 }
2504 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2505 // If the static schedule kind is specified or if the ordered clause is
2506 // specified, and if the nonmonotonic modifier is not specified, the effect is
2507 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2508 // modifier is specified, the effect is as if the nonmonotonic modifier is
2509 // specified.
2510 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2511 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2512 Schedule == OMP_sch_static_balanced_chunked ||
2513 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2514 Schedule == OMP_dist_sch_static_chunked ||
2515 Schedule == OMP_dist_sch_static))
2516 Modifier = OMP_sch_modifier_nonmonotonic;
2517 }
2518 return Schedule | Modifier;
2519}
2520
2523 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2524 bool Ordered, const DispatchRTInput &DispatchValues) {
2525 if (!CGF.HaveInsertPoint())
2526 return;
2527 OpenMPSchedType Schedule = getRuntimeSchedule(
2528 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2529 assert(Ordered ||
2530 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2531 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2532 Schedule != OMP_sch_static_balanced_chunked));
2533 // Call __kmpc_dispatch_init(
2534 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2535 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2536 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2537
2538 // If the Chunk was not specified in the clause - use default value 1.
2539 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2540 : CGF.Builder.getIntN(IVSize, 1);
2541 llvm::Value *Args[] = {
2542 emitUpdateLocation(CGF, Loc),
2543 getThreadID(CGF, Loc),
2544 CGF.Builder.getInt32(addMonoNonMonoModifier(
2545 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2546 DispatchValues.LB, // Lower
2547 DispatchValues.UB, // Upper
2548 CGF.Builder.getIntN(IVSize, 1), // Stride
2549 Chunk // Chunk
2550 };
2551 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2552 Args);
2553}
2554
2556 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2557 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2559 const CGOpenMPRuntime::StaticRTInput &Values) {
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562
2563 assert(!Values.Ordered);
2564 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2565 Schedule == OMP_sch_static_balanced_chunked ||
2566 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2567 Schedule == OMP_dist_sch_static ||
2568 Schedule == OMP_dist_sch_static_chunked);
2569
2570 // Call __kmpc_for_static_init(
2571 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2572 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2573 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2574 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2575 llvm::Value *Chunk = Values.Chunk;
2576 if (Chunk == nullptr) {
2577 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2578 Schedule == OMP_dist_sch_static) &&
2579 "expected static non-chunked schedule");
2580 // If the Chunk was not specified in the clause - use default value 1.
2581 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2582 } else {
2583 assert((Schedule == OMP_sch_static_chunked ||
2584 Schedule == OMP_sch_static_balanced_chunked ||
2585 Schedule == OMP_ord_static_chunked ||
2586 Schedule == OMP_dist_sch_static_chunked) &&
2587 "expected static chunked schedule");
2588 }
2589 llvm::Value *Args[] = {
2590 UpdateLocation,
2591 ThreadId,
2592 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2593 M2)), // Schedule type
2594 Values.IL.getPointer(), // &isLastIter
2595 Values.LB.getPointer(), // &LB
2596 Values.UB.getPointer(), // &UB
2597 Values.ST.getPointer(), // &Stride
2598 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2599 Chunk // Chunk
2600 };
2601 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2602}
2603
2605 SourceLocation Loc,
2606 OpenMPDirectiveKind DKind,
2607 const OpenMPScheduleTy &ScheduleKind,
2608 const StaticRTInput &Values) {
2609 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2610 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2611 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2612 "Expected loop-based or sections-based directive.");
2613 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2615 ? OMP_IDENT_WORK_LOOP
2616 : OMP_IDENT_WORK_SECTIONS);
2617 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2618 llvm::FunctionCallee StaticInitFunction =
2619 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2620 false);
2622 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2623 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2624}
2625
2629 const CGOpenMPRuntime::StaticRTInput &Values) {
2630 OpenMPSchedType ScheduleNum =
2631 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2632 llvm::Value *UpdatedLocation =
2633 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2634 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2635 llvm::FunctionCallee StaticInitFunction;
2636 bool isGPUDistribute =
2637 CGM.getLangOpts().OpenMPIsTargetDevice &&
2638 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2639 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2640 Values.IVSize, Values.IVSigned, isGPUDistribute);
2641
2642 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2643 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2645}
2646
2648 SourceLocation Loc,
2649 OpenMPDirectiveKind DKind) {
2650 assert(DKind == OMPD_distribute || DKind == OMPD_for ||
2651 DKind == OMPD_sections &&
2652 "Expected distribute, for, or sections directive kind");
2653 if (!CGF.HaveInsertPoint())
2654 return;
2655 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2656 llvm::Value *Args[] = {
2657 emitUpdateLocation(CGF, Loc,
2659 ? OMP_IDENT_WORK_DISTRIBUTE
2660 : isOpenMPLoopDirective(DKind)
2661 ? OMP_IDENT_WORK_LOOP
2662 : OMP_IDENT_WORK_SECTIONS),
2663 getThreadID(CGF, Loc)};
2665 if (isOpenMPDistributeDirective(DKind) &&
2666 CGM.getLangOpts().OpenMPIsTargetDevice &&
2667 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2668 CGF.EmitRuntimeCall(
2669 OMPBuilder.getOrCreateRuntimeFunction(
2670 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2671 Args);
2672 else
2673 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2674 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2675 Args);
2676}
2677
2679 SourceLocation Loc,
2680 unsigned IVSize,
2681 bool IVSigned) {
2682 if (!CGF.HaveInsertPoint())
2683 return;
2684 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2685 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2686 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2687 Args);
2688}
2689
2691 SourceLocation Loc, unsigned IVSize,
2692 bool IVSigned, Address IL,
2693 Address LB, Address UB,
2694 Address ST) {
2695 // Call __kmpc_dispatch_next(
2696 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2697 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2698 // kmp_int[32|64] *p_stride);
2699 llvm::Value *Args[] = {
2700 emitUpdateLocation(CGF, Loc),
2701 getThreadID(CGF, Loc),
2702 IL.getPointer(), // &isLastIter
2703 LB.getPointer(), // &Lower
2704 UB.getPointer(), // &Upper
2705 ST.getPointer() // &Stride
2706 };
2707 llvm::Value *Call = CGF.EmitRuntimeCall(
2708 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2709 return CGF.EmitScalarConversion(
2710 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2711 CGF.getContext().BoolTy, Loc);
2712}
2713
2715 llvm::Value *NumThreads,
2716 SourceLocation Loc) {
2717 if (!CGF.HaveInsertPoint())
2718 return;
2719 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2720 llvm::Value *Args[] = {
2721 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2722 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2723 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2724 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2725 Args);
2726}
2727
2729 ProcBindKind ProcBind,
2730 SourceLocation Loc) {
2731 if (!CGF.HaveInsertPoint())
2732 return;
2733 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2734 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2735 llvm::Value *Args[] = {
2736 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2737 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2738 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2739 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2740 Args);
2741}
2742
2744 SourceLocation Loc, llvm::AtomicOrdering AO) {
2745 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2746 OMPBuilder.createFlush(CGF.Builder);
2747 } else {
2748 if (!CGF.HaveInsertPoint())
2749 return;
2750 // Build call void __kmpc_flush(ident_t *loc)
2751 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2752 CGM.getModule(), OMPRTL___kmpc_flush),
2753 emitUpdateLocation(CGF, Loc));
2754 }
2755}
2756
2757namespace {
2758/// Indexes of fields for type kmp_task_t.
2759enum KmpTaskTFields {
2760 /// List of shared variables.
2761 KmpTaskTShareds,
2762 /// Task routine.
2763 KmpTaskTRoutine,
2764 /// Partition id for the untied tasks.
2765 KmpTaskTPartId,
2766 /// Function with call of destructors for private variables.
2767 Data1,
2768 /// Task priority.
2769 Data2,
2770 /// (Taskloops only) Lower bound.
2771 KmpTaskTLowerBound,
2772 /// (Taskloops only) Upper bound.
2773 KmpTaskTUpperBound,
2774 /// (Taskloops only) Stride.
2775 KmpTaskTStride,
2776 /// (Taskloops only) Is last iteration flag.
2777 KmpTaskTLastIter,
2778 /// (Taskloops only) Reduction data.
2779 KmpTaskTReductions,
2780};
2781} // anonymous namespace
2782
2784 // If we are in simd mode or there are no entries, we don't need to do
2785 // anything.
2786 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2787 return;
2788
2789 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2790 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2791 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2792 SourceLocation Loc;
2793 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2794 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2796 I != E; ++I) {
2797 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2798 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2800 I->getFirst(), EntryInfo.Line, 1);
2801 break;
2802 }
2803 }
2804 }
2805 switch (Kind) {
2806 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2807 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2808 DiagnosticsEngine::Error, "Offloading entry for target region in "
2809 "%0 is incorrect: either the "
2810 "address or the ID is invalid.");
2811 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2812 } break;
2813 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2814 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2815 DiagnosticsEngine::Error, "Offloading entry for declare target "
2816 "variable %0 is incorrect: the "
2817 "address is invalid.");
2818 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2819 } break;
2820 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2821 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2823 "Offloading entry for declare target variable is incorrect: the "
2824 "address is invalid.");
2825 CGM.getDiags().Report(DiagID);
2826 } break;
2827 }
2828 };
2829
2830 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2831}
2832
2834 if (!KmpRoutineEntryPtrTy) {
2835 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2837 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2839 KmpRoutineEntryPtrQTy = C.getPointerType(
2840 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2842 }
2843}
2844
2845namespace {
2846struct PrivateHelpersTy {
2847 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2848 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2849 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2850 PrivateElemInit(PrivateElemInit) {}
2851 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2852 const Expr *OriginalRef = nullptr;
2853 const VarDecl *Original = nullptr;
2854 const VarDecl *PrivateCopy = nullptr;
2855 const VarDecl *PrivateElemInit = nullptr;
2856 bool isLocalPrivate() const {
2857 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2858 }
2859};
2860typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2861} // anonymous namespace
2862
2863static bool isAllocatableDecl(const VarDecl *VD) {
2864 const VarDecl *CVD = VD->getCanonicalDecl();
2865 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2866 return false;
2867 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2868 // Use the default allocation.
2869 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2870 !AA->getAllocator());
2871}
2872
2873static RecordDecl *
2875 if (!Privates.empty()) {
2876 ASTContext &C = CGM.getContext();
2877 // Build struct .kmp_privates_t. {
2878 // /* private vars */
2879 // };
2880 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2881 RD->startDefinition();
2882 for (const auto &Pair : Privates) {
2883 const VarDecl *VD = Pair.second.Original;
2885 // If the private variable is a local variable with lvalue ref type,
2886 // allocate the pointer instead of the pointee type.
2887 if (Pair.second.isLocalPrivate()) {
2888 if (VD->getType()->isLValueReferenceType())
2889 Type = C.getPointerType(Type);
2890 if (isAllocatableDecl(VD))
2891 Type = C.getPointerType(Type);
2892 }
2894 if (VD->hasAttrs()) {
2895 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2896 E(VD->getAttrs().end());
2897 I != E; ++I)
2898 FD->addAttr(*I);
2899 }
2900 }
2901 RD->completeDefinition();
2902 return RD;
2903 }
2904 return nullptr;
2905}
2906
2907static RecordDecl *
2909 QualType KmpInt32Ty,
2910 QualType KmpRoutineEntryPointerQTy) {
2911 ASTContext &C = CGM.getContext();
2912 // Build struct kmp_task_t {
2913 // void * shareds;
2914 // kmp_routine_entry_t routine;
2915 // kmp_int32 part_id;
2916 // kmp_cmplrdata_t data1;
2917 // kmp_cmplrdata_t data2;
2918 // For taskloops additional fields:
2919 // kmp_uint64 lb;
2920 // kmp_uint64 ub;
2921 // kmp_int64 st;
2922 // kmp_int32 liter;
2923 // void * reductions;
2924 // };
2925 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2926 UD->startDefinition();
2927 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2928 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2929 UD->completeDefinition();
2930 QualType KmpCmplrdataTy = C.getRecordType(UD);
2931 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2932 RD->startDefinition();
2933 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2934 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2935 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2936 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2937 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2938 if (isOpenMPTaskLoopDirective(Kind)) {
2939 QualType KmpUInt64Ty =
2940 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2941 QualType KmpInt64Ty =
2942 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2943 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2944 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2945 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2946 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2947 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2948 }
2949 RD->completeDefinition();
2950 return RD;
2951}
2952
2953static RecordDecl *
2955 ArrayRef<PrivateDataTy> Privates) {
2956 ASTContext &C = CGM.getContext();
2957 // Build struct kmp_task_t_with_privates {
2958 // kmp_task_t task_data;
2959 // .kmp_privates_t. privates;
2960 // };
2961 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2962 RD->startDefinition();
2963 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2964 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2965 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2966 RD->completeDefinition();
2967 return RD;
2968}
2969
2970/// Emit a proxy function which accepts kmp_task_t as the second
2971/// argument.
2972/// \code
2973/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2974/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2975/// For taskloops:
2976/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2977/// tt->reductions, tt->shareds);
2978/// return 0;
2979/// }
2980/// \endcode
2981static llvm::Function *
2983 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2984 QualType KmpTaskTWithPrivatesPtrQTy,
2985 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2986 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2987 llvm::Value *TaskPrivatesMap) {
2988 ASTContext &C = CGM.getContext();
2989 FunctionArgList Args;
2990 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2992 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2993 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2995 Args.push_back(&GtidArg);
2996 Args.push_back(&TaskTypeArg);
2997 const auto &TaskEntryFnInfo =
2998 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2999 llvm::FunctionType *TaskEntryTy =
3000 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3001 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3002 auto *TaskEntry = llvm::Function::Create(
3003 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3004 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3005 TaskEntry->setDoesNotRecurse();
3006 CodeGenFunction CGF(CGM);
3007 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3008 Loc, Loc);
3009
3010 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3011 // tt,
3012 // For taskloops:
3013 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3014 // tt->task_data.shareds);
3015 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3016 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3017 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3018 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3019 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3020 const auto *KmpTaskTWithPrivatesQTyRD =
3021 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3022 LValue Base =
3023 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3024 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3025 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3026 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3027 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3028
3029 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3030 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3031 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3032 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3033 CGF.ConvertTypeForMem(SharedsPtrTy));
3034
3035 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3036 llvm::Value *PrivatesParam;
3037 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3038 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3039 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3040 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3041 } else {
3042 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3043 }
3044
3045 llvm::Value *CommonArgs[] = {
3046 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3047 CGF.Builder
3049 CGF.VoidPtrTy, CGF.Int8Ty)
3050 .getPointer()};
3051 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3052 std::end(CommonArgs));
3053 if (isOpenMPTaskLoopDirective(Kind)) {
3054 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3055 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3056 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3057 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3058 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3059 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3060 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3061 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3062 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3063 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3064 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3065 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3066 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3067 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3068 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3069 CallArgs.push_back(LBParam);
3070 CallArgs.push_back(UBParam);
3071 CallArgs.push_back(StParam);
3072 CallArgs.push_back(LIParam);
3073 CallArgs.push_back(RParam);
3074 }
3075 CallArgs.push_back(SharedsParam);
3076
3077 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3078 CallArgs);
3079 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3080 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3081 CGF.FinishFunction();
3082 return TaskEntry;
3083}
3084
3086 SourceLocation Loc,
3087 QualType KmpInt32Ty,
3088 QualType KmpTaskTWithPrivatesPtrQTy,
3089 QualType KmpTaskTWithPrivatesQTy) {
3090 ASTContext &C = CGM.getContext();
3091 FunctionArgList Args;
3092 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3094 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3095 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3097 Args.push_back(&GtidArg);
3098 Args.push_back(&TaskTypeArg);
3099 const auto &DestructorFnInfo =
3100 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3101 llvm::FunctionType *DestructorFnTy =
3102 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3103 std::string Name =
3104 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3105 auto *DestructorFn =
3106 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3107 Name, &CGM.getModule());
3108 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3109 DestructorFnInfo);
3110 DestructorFn->setDoesNotRecurse();
3111 CodeGenFunction CGF(CGM);
3112 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3113 Args, Loc, Loc);
3114
3116 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3117 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3118 const auto *KmpTaskTWithPrivatesQTyRD =
3119 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3120 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3121 Base = CGF.EmitLValueForField(Base, *FI);
3122 for (const auto *Field :
3123 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3124 if (QualType::DestructionKind DtorKind =
3125 Field->getType().isDestructedType()) {
3126 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3127 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3128 }
3129 }
3130 CGF.FinishFunction();
3131 return DestructorFn;
3132}
3133
3134/// Emit a privates mapping function for correct handling of private and
3135/// firstprivate variables.
3136/// \code
3137/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3138/// **noalias priv1,..., <tyn> **noalias privn) {
3139/// *priv1 = &.privates.priv1;
3140/// ...;
3141/// *privn = &.privates.privn;
3142/// }
3143/// \endcode
3144static llvm::Value *
3146 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3147 ArrayRef<PrivateDataTy> Privates) {
3148 ASTContext &C = CGM.getContext();
3149 FunctionArgList Args;
3150 ImplicitParamDecl TaskPrivatesArg(
3151 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3152 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3154 Args.push_back(&TaskPrivatesArg);
3155 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3156 unsigned Counter = 1;
3157 for (const Expr *E : Data.PrivateVars) {
3158 Args.push_back(ImplicitParamDecl::Create(
3159 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3160 C.getPointerType(C.getPointerType(E->getType()))
3161 .withConst()
3162 .withRestrict(),
3164 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3165 PrivateVarsPos[VD] = Counter;
3166 ++Counter;
3167 }
3168 for (const Expr *E : Data.FirstprivateVars) {
3169 Args.push_back(ImplicitParamDecl::Create(
3170 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3171 C.getPointerType(C.getPointerType(E->getType()))
3172 .withConst()
3173 .withRestrict(),
3175 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3176 PrivateVarsPos[VD] = Counter;
3177 ++Counter;
3178 }
3179 for (const Expr *E : Data.LastprivateVars) {
3180 Args.push_back(ImplicitParamDecl::Create(
3181 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3182 C.getPointerType(C.getPointerType(E->getType()))
3183 .withConst()
3184 .withRestrict(),
3186 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3187 PrivateVarsPos[VD] = Counter;
3188 ++Counter;
3189 }
3190 for (const VarDecl *VD : Data.PrivateLocals) {
3192 if (VD->getType()->isLValueReferenceType())
3193 Ty = C.getPointerType(Ty);
3194 if (isAllocatableDecl(VD))
3195 Ty = C.getPointerType(Ty);
3196 Args.push_back(ImplicitParamDecl::Create(
3197 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3198 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3200 PrivateVarsPos[VD] = Counter;
3201 ++Counter;
3202 }
3203 const auto &TaskPrivatesMapFnInfo =
3204 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3205 llvm::FunctionType *TaskPrivatesMapTy =
3206 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3207 std::string Name =
3208 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3209 auto *TaskPrivatesMap = llvm::Function::Create(
3210 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3211 &CGM.getModule());
3212 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3213 TaskPrivatesMapFnInfo);
3214 if (CGM.getLangOpts().Optimize) {
3215 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3216 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3217 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3218 }
3219 CodeGenFunction CGF(CGM);
3220 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3221 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3222
3223 // *privi = &.privates.privi;
3225 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3226 TaskPrivatesArg.getType()->castAs<PointerType>());
3227 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3228 Counter = 0;
3229 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3230 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3231 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3232 LValue RefLVal =
3233 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3234 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3235 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3236 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3237 ++Counter;
3238 }
3239 CGF.FinishFunction();
3240 return TaskPrivatesMap;
3241}
3242
3243/// Emit initialization for private variables in task-based directives.
3245 const OMPExecutableDirective &D,
3246 Address KmpTaskSharedsPtr, LValue TDBase,
3247 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3248 QualType SharedsTy, QualType SharedsPtrTy,
3249 const OMPTaskDataTy &Data,
3250 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3251 ASTContext &C = CGF.getContext();
3252 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3253 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3255 ? OMPD_taskloop
3256 : OMPD_task;
3257 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3258 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3259 LValue SrcBase;
3260 bool IsTargetTask =
3263 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3264 // PointersArray, SizesArray, and MappersArray. The original variables for
3265 // these arrays are not captured and we get their addresses explicitly.
3266 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3267 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3268 SrcBase = CGF.MakeAddrLValue(
3270 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3271 CGF.ConvertTypeForMem(SharedsTy)),
3272 SharedsTy);
3273 }
3274 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3275 for (const PrivateDataTy &Pair : Privates) {
3276 // Do not initialize private locals.
3277 if (Pair.second.isLocalPrivate()) {
3278 ++FI;
3279 continue;
3280 }
3281 const VarDecl *VD = Pair.second.PrivateCopy;
3282 const Expr *Init = VD->getAnyInitializer();
3283 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3284 !CGF.isTrivialInitializer(Init)))) {
3285 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3286 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3287 const VarDecl *OriginalVD = Pair.second.Original;
3288 // Check if the variable is the target-based BasePointersArray,
3289 // PointersArray, SizesArray, or MappersArray.
3290 LValue SharedRefLValue;
3291 QualType Type = PrivateLValue.getType();
3292 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3293 if (IsTargetTask && !SharedField) {
3294 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3295 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3296 cast<CapturedDecl>(OriginalVD->getDeclContext())
3297 ->getNumParams() == 0 &&
3298 isa<TranslationUnitDecl>(
3299 cast<CapturedDecl>(OriginalVD->getDeclContext())
3300 ->getDeclContext()) &&
3301 "Expected artificial target data variable.");
3302 SharedRefLValue =
3303 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3304 } else if (ForDup) {
3305 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3306 SharedRefLValue = CGF.MakeAddrLValue(
3307 SharedRefLValue.getAddress(CGF).withAlignment(
3308 C.getDeclAlign(OriginalVD)),
3309 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3310 SharedRefLValue.getTBAAInfo());
3311 } else if (CGF.LambdaCaptureFields.count(
3312 Pair.second.Original->getCanonicalDecl()) > 0 ||
3313 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3314 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3315 } else {
3316 // Processing for implicitly captured variables.
3317 InlinedOpenMPRegionRAII Region(
3318 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3319 /*HasCancel=*/false, /*NoInheritance=*/true);
3320 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3321 }
3322 if (Type->isArrayType()) {
3323 // Initialize firstprivate array.
3324 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3325 // Perform simple memcpy.
3326 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3327 } else {
3328 // Initialize firstprivate array using element-by-element
3329 // initialization.
3331 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3332 Type,
3333 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3334 Address SrcElement) {
3335 // Clean up any temporaries needed by the initialization.
3336 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3337 InitScope.addPrivate(Elem, SrcElement);
3338 (void)InitScope.Privatize();
3339 // Emit initialization for single element.
3340 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3341 CGF, &CapturesInfo);
3342 CGF.EmitAnyExprToMem(Init, DestElement,
3343 Init->getType().getQualifiers(),
3344 /*IsInitializer=*/false);
3345 });
3346 }
3347 } else {
3348 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3349 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3350 (void)InitScope.Privatize();
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3352 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3353 /*capturedByInit=*/false);
3354 }
3355 } else {
3356 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3357 }
3358 }
3359 ++FI;
3360 }
3361}
3362
3363/// Check if duplication function is required for taskloops.
3365 ArrayRef<PrivateDataTy> Privates) {
3366 bool InitRequired = false;
3367 for (const PrivateDataTy &Pair : Privates) {
3368 if (Pair.second.isLocalPrivate())
3369 continue;
3370 const VarDecl *VD = Pair.second.PrivateCopy;
3371 const Expr *Init = VD->getAnyInitializer();
3372 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3374 if (InitRequired)
3375 break;
3376 }
3377 return InitRequired;
3378}
3379
3380
3381/// Emit task_dup function (for initialization of
3382/// private/firstprivate/lastprivate vars and last_iter flag)
3383/// \code
3384/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3385/// lastpriv) {
3386/// // setup lastprivate flag
3387/// task_dst->last = lastpriv;
3388/// // could be constructor calls here...
3389/// }
3390/// \endcode
3391static llvm::Value *
3393 const OMPExecutableDirective &D,
3394 QualType KmpTaskTWithPrivatesPtrQTy,
3395 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3396 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3397 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3398 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3399 ASTContext &C = CGM.getContext();
3400 FunctionArgList Args;
3401 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3402 KmpTaskTWithPrivatesPtrQTy,
3404 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3405 KmpTaskTWithPrivatesPtrQTy,
3407 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3409 Args.push_back(&DstArg);
3410 Args.push_back(&SrcArg);
3411 Args.push_back(&LastprivArg);
3412 const auto &TaskDupFnInfo =
3413 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3414 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3415 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3416 auto *TaskDup = llvm::Function::Create(
3417 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3418 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3419 TaskDup->setDoesNotRecurse();
3420 CodeGenFunction CGF(CGM);
3421 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3422 Loc);
3423
3424 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3425 CGF.GetAddrOfLocalVar(&DstArg),
3426 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3427 // task_dst->liter = lastpriv;
3428 if (WithLastIter) {
3429 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3431 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3432 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3433 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3434 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3435 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3436 }
3437
3438 // Emit initial values for private copies (if any).
3439 assert(!Privates.empty());
3440 Address KmpTaskSharedsPtr = Address::invalid();
3441 if (!Data.FirstprivateVars.empty()) {
3442 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3443 CGF.GetAddrOfLocalVar(&SrcArg),
3444 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3446 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3447 KmpTaskSharedsPtr = Address(
3449 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3450 KmpTaskTShareds)),
3451 Loc),
3452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3453 }
3454 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3455 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3456 CGF.FinishFunction();
3457 return TaskDup;
3458}
3459
3460/// Checks if destructor function is required to be generated.
3461/// \return true if cleanups are required, false otherwise.
3462static bool
3463checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3464 ArrayRef<PrivateDataTy> Privates) {
3465 for (const PrivateDataTy &P : Privates) {
3466 if (P.second.isLocalPrivate())
3467 continue;
3468 QualType Ty = P.second.Original->getType().getNonReferenceType();
3469 if (Ty.isDestructedType())
3470 return true;
3471 }
3472 return false;
3473}
3474
3475namespace {
3476/// Loop generator for OpenMP iterator expression.
3477class OMPIteratorGeneratorScope final
3478 : public CodeGenFunction::OMPPrivateScope {
3479 CodeGenFunction &CGF;
3480 const OMPIteratorExpr *E = nullptr;
3483 OMPIteratorGeneratorScope() = delete;
3484 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3485
3486public:
3487 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3488 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3489 if (!E)
3490 return;
3492 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3493 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3494 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3495 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3496 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3497 addPrivate(
3498 HelperData.CounterVD,
3499 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3500 }
3501 Privatize();
3502
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3505 LValue CLVal =
3506 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3507 HelperData.CounterVD->getType());
3508 // Counter = 0;
3510 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3511 CLVal);
3512 CodeGenFunction::JumpDest &ContDest =
3513 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3514 CodeGenFunction::JumpDest &ExitDest =
3515 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3516 // N = <number-of_iterations>;
3517 llvm::Value *N = Uppers[I];
3518 // cont:
3519 // if (Counter < N) goto body; else goto exit;
3520 CGF.EmitBlock(ContDest.getBlock());
3521 auto *CVal =
3522 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3523 llvm::Value *Cmp =
3525 ? CGF.Builder.CreateICmpSLT(CVal, N)
3526 : CGF.Builder.CreateICmpULT(CVal, N);
3527 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3528 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3529 // body:
3530 CGF.EmitBlock(BodyBB);
3531 // Iteri = Begini + Counter * Stepi;
3532 CGF.EmitIgnoredExpr(HelperData.Update);
3533 }
3534 }
3535 ~OMPIteratorGeneratorScope() {
3536 if (!E)
3537 return;
3538 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3539 // Counter = Counter + 1;
3540 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3541 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3542 // goto cont;
3543 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3544 // exit:
3545 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3546 }
3547 }
3548};
3549} // namespace
3550
3551static std::pair<llvm::Value *, llvm::Value *>
3553 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3554 llvm::Value *Addr;
3555 if (OASE) {
3556 const Expr *Base = OASE->getBase();
3557 Addr = CGF.EmitScalarExpr(Base);
3558 } else {
3559 Addr = CGF.EmitLValue(E).getPointer(CGF);
3560 }
3561 llvm::Value *SizeVal;
3562 QualType Ty = E->getType();
3563 if (OASE) {
3564 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3565 for (const Expr *SE : OASE->getDimensions()) {
3566 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3567 Sz = CGF.EmitScalarConversion(
3568 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3569 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3570 }
3571 } else if (const auto *ASE =
3572 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3573 LValue UpAddrLVal =
3574 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3575 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3576 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3577 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3578 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3579 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3580 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3581 } else {
3582 SizeVal = CGF.getTypeSize(Ty);
3583 }
3584 return std::make_pair(Addr, SizeVal);
3585}
3586
3587/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3588static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3589 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3590 if (KmpTaskAffinityInfoTy.isNull()) {
3591 RecordDecl *KmpAffinityInfoRD =
3592 C.buildImplicitRecord("kmp_task_affinity_info_t");
3593 KmpAffinityInfoRD->startDefinition();
3594 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3595 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3596 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3597 KmpAffinityInfoRD->completeDefinition();
3598 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3599 }
3600}
3601
3604 const OMPExecutableDirective &D,
3605 llvm::Function *TaskFunction, QualType SharedsTy,
3606 Address Shareds, const OMPTaskDataTy &Data) {
3609 // Aggregate privates and sort them by the alignment.
3610 const auto *I = Data.PrivateCopies.begin();
3611 for (const Expr *E : Data.PrivateVars) {
3612 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3613 Privates.emplace_back(
3614 C.getDeclAlign(VD),
3615 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3616 /*PrivateElemInit=*/nullptr));
3617 ++I;
3618 }
3619 I = Data.FirstprivateCopies.begin();
3620 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3621 for (const Expr *E : Data.FirstprivateVars) {
3622 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3623 Privates.emplace_back(
3624 C.getDeclAlign(VD),
3625 PrivateHelpersTy(
3626 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3628 ++I;
3629 ++IElemInitRef;
3630 }
3631 I = Data.LastprivateCopies.begin();
3632 for (const Expr *E : Data.LastprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3637 /*PrivateElemInit=*/nullptr));
3638 ++I;
3639 }
3640 for (const VarDecl *VD : Data.PrivateLocals) {
3641 if (isAllocatableDecl(VD))
3642 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3643 else
3644 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3645 }
3646 llvm::stable_sort(Privates,
3647 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3648 return L.first > R.first;
3649 });
3650 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3651 // Build type kmp_routine_entry_t (if not built yet).
3652 emitKmpRoutineEntryT(KmpInt32Ty);
3653 // Build type kmp_task_t (if not built yet).
3657 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3658 }
3660 } else {
3661 assert((D.getDirectiveKind() == OMPD_task ||
3664 "Expected taskloop, task or target directive");
3665 if (SavedKmpTaskTQTy.isNull()) {
3667 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3668 }
3670 }
3671 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3672 // Build particular struct kmp_task_t for the given task.
3673 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3675 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3676 QualType KmpTaskTWithPrivatesPtrQTy =
3677 C.getPointerType(KmpTaskTWithPrivatesQTy);
3678 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3679 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3680 KmpTaskTWithPrivatesTy->getPointerTo();
3681 llvm::Value *KmpTaskTWithPrivatesTySize =
3682 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3683 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3684
3685 // Emit initial values for private copies (if any).
3686 llvm::Value *TaskPrivatesMap = nullptr;
3687 llvm::Type *TaskPrivatesMapTy =
3688 std::next(TaskFunction->arg_begin(), 3)->getType();
3689 if (!Privates.empty()) {
3690 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3691 TaskPrivatesMap =
3692 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3693 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3694 TaskPrivatesMap, TaskPrivatesMapTy);
3695 } else {
3696 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3697 cast<llvm::PointerType>(TaskPrivatesMapTy));
3698 }
3699 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3700 // kmp_task_t *tt);
3701 llvm::Function *TaskEntry = emitProxyTaskFunction(
3702 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3703 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3704 TaskPrivatesMap);
3705
3706 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3707 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3708 // kmp_routine_entry_t *task_entry);
3709 // Task flags. Format is taken from
3710 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3711 // description of kmp_tasking_flags struct.
3712 enum {
3713 TiedFlag = 0x1,
3714 FinalFlag = 0x2,
3715 DestructorsFlag = 0x8,
3716 PriorityFlag = 0x20,
3717 DetachableFlag = 0x40,
3718 };
3719 unsigned Flags = Data.Tied ? TiedFlag : 0;
3720 bool NeedsCleanup = false;
3721 if (!Privates.empty()) {
3722 NeedsCleanup =
3723 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3724 if (NeedsCleanup)
3725 Flags = Flags | DestructorsFlag;
3726 }
3727 if (Data.Priority.getInt())
3728 Flags = Flags | PriorityFlag;
3730 Flags = Flags | DetachableFlag;
3731 llvm::Value *TaskFlags =
3732 Data.Final.getPointer()
3733 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3734 CGF.Builder.getInt32(FinalFlag),
3735 CGF.Builder.getInt32(/*C=*/0))
3736 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3737 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3738 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3740 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3742 TaskEntry, KmpRoutineEntryPtrTy)};
3743 llvm::Value *NewTask;
3745 // Check if we have any device clause associated with the directive.
3746 const Expr *Device = nullptr;
3747 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3748 Device = C->getDevice();
3749 // Emit device ID if any otherwise use default value.
3750 llvm::Value *DeviceID;
3751 if (Device)
3752 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3753 CGF.Int64Ty, /*isSigned=*/true);
3754 else
3755 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3756 AllocArgs.push_back(DeviceID);
3757 NewTask = CGF.EmitRuntimeCall(
3758 OMPBuilder.getOrCreateRuntimeFunction(
3759 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3760 AllocArgs);
3761 } else {
3762 NewTask =
3763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3764 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3765 AllocArgs);
3766 }
3767 // Emit detach clause initialization.
3768 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3769 // task_descriptor);
3770 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3771 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3772 LValue EvtLVal = CGF.EmitLValue(Evt);
3773
3774 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3775 // int gtid, kmp_task_t *task);
3776 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3777 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3778 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3779 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3780 OMPBuilder.getOrCreateRuntimeFunction(
3781 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3782 {Loc, Tid, NewTask});
3783 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3784 Evt->getExprLoc());
3785 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3786 }
3787 // Process affinity clauses.
3789 // Process list of affinity data.
3791 Address AffinitiesArray = Address::invalid();
3792 // Calculate number of elements to form the array of affinity data.
3793 llvm::Value *NumOfElements = nullptr;
3794 unsigned NumAffinities = 0;
3795 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3796 if (const Expr *Modifier = C->getModifier()) {
3797 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3798 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3799 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3800 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3801 NumOfElements =
3802 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3803 }
3804 } else {
3805 NumAffinities += C->varlist_size();
3806 }
3807 }
3809 // Fields ids in kmp_task_affinity_info record.
3810 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3811
3812 QualType KmpTaskAffinityInfoArrayTy;
3813 if (NumOfElements) {
3814 NumOfElements = CGF.Builder.CreateNUWAdd(
3815 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3816 auto *OVE = new (C) OpaqueValueExpr(
3817 Loc,
3818 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3819 VK_PRValue);
3820 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3821 RValue::get(NumOfElements));
3822 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3824 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3825 // Properly emit variable-sized array.
3826 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3828 CGF.EmitVarDecl(*PD);
3829 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3830 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3831 /*isSigned=*/false);
3832 } else {
3833 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3835 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3836 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3837 AffinitiesArray =
3838 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3839 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3840 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3841 /*isSigned=*/false);
3842 }
3843
3844 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3845 // Fill array by elements without iterators.
3846 unsigned Pos = 0;
3847 bool HasIterator = false;
3848 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3849 if (C->getModifier()) {
3850 HasIterator = true;
3851 continue;
3852 }
3853 for (const Expr *E : C->varlists()) {
3854 llvm::Value *Addr;
3855 llvm::Value *Size;
3856 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3857 LValue Base =
3858 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3860 // affs[i].base_addr = &<Affinities[i].second>;
3861 LValue BaseAddrLVal = CGF.EmitLValueForField(
3862 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3863 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3864 BaseAddrLVal);
3865 // affs[i].len = sizeof(<Affinities[i].second>);
3866 LValue LenLVal = CGF.EmitLValueForField(
3867 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3868 CGF.EmitStoreOfScalar(Size, LenLVal);
3869 ++Pos;
3870 }
3871 }
3872 LValue PosLVal;
3873 if (HasIterator) {
3874 PosLVal = CGF.MakeAddrLValue(
3875 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3876 C.getSizeType());
3877 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3878 }
3879 // Process elements with iterators.
3880 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3881 const Expr *Modifier = C->getModifier();
3882 if (!Modifier)
3883 continue;
3884 OMPIteratorGeneratorScope IteratorScope(
3885 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3886 for (const Expr *E : C->varlists()) {
3887 llvm::Value *Addr;
3888 llvm::Value *Size;
3889 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3890 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3892 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3893 // affs[i].base_addr = &<Affinities[i].second>;
3894 LValue BaseAddrLVal = CGF.EmitLValueForField(
3895 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3896 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3897 BaseAddrLVal);
3898 // affs[i].len = sizeof(<Affinities[i].second>);
3899 LValue LenLVal = CGF.EmitLValueForField(
3900 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3901 CGF.EmitStoreOfScalar(Size, LenLVal);
3902 Idx = CGF.Builder.CreateNUWAdd(
3903 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3904 CGF.EmitStoreOfScalar(Idx, PosLVal);
3905 }
3906 }
3907 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3908 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3909 // naffins, kmp_task_affinity_info_t *affin_list);
3910 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3911 llvm::Value *GTid = getThreadID(CGF, Loc);
3912 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3913 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
3914 // FIXME: Emit the function and ignore its result for now unless the
3915 // runtime function is properly implemented.
3916 (void)CGF.EmitRuntimeCall(
3917 OMPBuilder.getOrCreateRuntimeFunction(
3918 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3919 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3920 }
3921 llvm::Value *NewTaskNewTaskTTy =
3923 NewTask, KmpTaskTWithPrivatesPtrTy);
3924 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3925 KmpTaskTWithPrivatesQTy);
3926 LValue TDBase =
3927 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3928 // Fill the data in the resulting kmp_task_t record.
3929 // Copy shareds if there are any.
3930 Address KmpTaskSharedsPtr = Address::invalid();
3931 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3932 KmpTaskSharedsPtr = Address(
3933 CGF.EmitLoadOfScalar(
3935 TDBase,
3936 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3937 Loc),
3938 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3939 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3940 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3941 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3942 }
3943 // Emit initial values for private copies (if any).
3945 if (!Privates.empty()) {
3946 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3947 SharedsTy, SharedsPtrTy, Data, Privates,
3948 /*ForDup=*/false);
3950 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3951 Result.TaskDupFn = emitTaskDupFunction(
3952 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3953 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3954 /*WithLastIter=*/!Data.LastprivateVars.empty());
3955 }
3956 }
3957 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3958 enum { Priority = 0, Destructors = 1 };
3959 // Provide pointer to function with destructors for privates.
3960 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3961 const RecordDecl *KmpCmplrdataUD =
3962 (*FI)->getType()->getAsUnionType()->getDecl();
3963 if (NeedsCleanup) {
3964 llvm::Value *DestructorFn = emitDestructorsFunction(
3965 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3966 KmpTaskTWithPrivatesQTy);
3967 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3968 LValue DestructorsLV = CGF.EmitLValueForField(
3969 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3971 DestructorFn, KmpRoutineEntryPtrTy),
3972 DestructorsLV);
3973 }
3974 // Set priority.
3975 if (Data.Priority.getInt()) {
3976 LValue Data2LV = CGF.EmitLValueForField(
3977 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3978 LValue PriorityLV = CGF.EmitLValueForField(
3979 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3980 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3981 }
3982 Result.NewTask = NewTask;
3983 Result.TaskEntry = TaskEntry;
3984 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3985 Result.TDBase = TDBase;
3986 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3987 return Result;
3988}
3989
3990/// Translates internal dependency kind into the runtime kind.
3992 RTLDependenceKindTy DepKind;
3993 switch (K) {
3994 case OMPC_DEPEND_in:
3995 DepKind = RTLDependenceKindTy::DepIn;
3996 break;
3997 // Out and InOut dependencies must use the same code.
3998 case OMPC_DEPEND_out:
3999 case OMPC_DEPEND_inout:
4000 DepKind = RTLDependenceKindTy::DepInOut;
4001 break;
4002 case OMPC_DEPEND_mutexinoutset:
4003 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4004 break;
4005 case OMPC_DEPEND_inoutset:
4006 DepKind = RTLDependenceKindTy::DepInOutSet;
4007 break;
4008 case OMPC_DEPEND_outallmemory:
4009 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4010 break;
4011 case OMPC_DEPEND_source:
4012 case OMPC_DEPEND_sink:
4013 case OMPC_DEPEND_depobj:
4014 case OMPC_DEPEND_inoutallmemory:
4016 llvm_unreachable("Unknown task dependence type");
4017 }
4018 return DepKind;
4019}
4020
4021/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4022static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4023 QualType &FlagsTy) {
4024 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4025 if (KmpDependInfoTy.isNull()) {
4026 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4027 KmpDependInfoRD->startDefinition();
4028 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4029 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4030 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4031 KmpDependInfoRD->completeDefinition();
4032 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4033 }
4034}
4035
4036std::pair<llvm::Value *, LValue>
4038 SourceLocation Loc) {
4040 QualType FlagsTy;
4041 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4042 RecordDecl *KmpDependInfoRD =
4043 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4044 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4046 DepobjLVal.getAddress(CGF).withElementType(
4047 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4048 KmpDependInfoPtrTy->castAs<PointerType>());
4049 Address DepObjAddr = CGF.Builder.CreateGEP(
4050 Base.getAddress(CGF),
4051 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4052 LValue NumDepsBase = CGF.MakeAddrLValue(
4053 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4054 // NumDeps = deps[i].base_addr;
4055 LValue BaseAddrLVal = CGF.EmitLValueForField(
4056 NumDepsBase,
4057 *std::next(KmpDependInfoRD->field_begin(),
4058 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4059 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4060 return std::make_pair(NumDeps, Base);
4061}
4062
4063static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4064 llvm::PointerUnion<unsigned *, LValue *> Pos,
4066 Address DependenciesArray) {
4067 CodeGenModule &CGM = CGF.CGM;
4068 ASTContext &C = CGM.getContext();
4069 QualType FlagsTy;
4070 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4071 RecordDecl *KmpDependInfoRD =
4072 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4073 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4074
4075 OMPIteratorGeneratorScope IteratorScope(
4076 CGF, cast_or_null<OMPIteratorExpr>(
4077 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4078 : nullptr));
4079 for (const Expr *E : Data.DepExprs) {
4080 llvm::Value *Addr;
4081 llvm::Value *Size;
4082
4083 // The expression will be a nullptr in the 'omp_all_memory' case.
4084 if (E) {
4085 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4086 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4087 } else {
4088 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4089 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4090 }
4091 LValue Base;
4092 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4093 Base = CGF.MakeAddrLValue(
4094 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4095 } else {
4096 assert(E && "Expected a non-null expression");
4097 LValue &PosLVal = *Pos.get<LValue *>();
4098 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4099 Base = CGF.MakeAddrLValue(
4100 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4101 }
4102 // deps[i].base_addr = &<Dependencies[i].second>;
4103 LValue BaseAddrLVal = CGF.EmitLValueForField(
4104 Base,
4105 *std::next(KmpDependInfoRD->field_begin(),
4106 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4107 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4108 // deps[i].len = sizeof(<Dependencies[i].second>);
4109 LValue LenLVal = CGF.EmitLValueForField(
4110 Base, *std::next(KmpDependInfoRD->field_begin(),
4111 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4112 CGF.EmitStoreOfScalar(Size, LenLVal);
4113 // deps[i].flags = <Dependencies[i].first>;
4114 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4115 LValue FlagsLVal = CGF.EmitLValueForField(
4116 Base,
4117 *std::next(KmpDependInfoRD->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4120 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4121 FlagsLVal);
4122 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4123 ++(*P);
4124 } else {
4125 LValue &PosLVal = *Pos.get<LValue *>();
4126 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4127 Idx = CGF.Builder.CreateNUWAdd(Idx,
4128 llvm::ConstantInt::get(Idx->getType(), 1));
4129 CGF.EmitStoreOfScalar(Idx, PosLVal);
4130 }
4131 }
4132}
4133
4135 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4137 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4138 "Expected depobj dependency kind.");
4140 SmallVector<LValue, 4> SizeLVals;
4141 ASTContext &C = CGF.getContext();
4142 {
4143 OMPIteratorGeneratorScope IteratorScope(
4144 CGF, cast_or_null<OMPIteratorExpr>(
4145 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4146 : nullptr));
4147 for (const Expr *E : Data.DepExprs) {
4148 llvm::Value *NumDeps;
4149 LValue Base;
4150 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4151 std::tie(NumDeps, Base) =
4152 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4153 LValue NumLVal = CGF.MakeAddrLValue(
4154 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4155 C.getUIntPtrType());
4156 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4157 NumLVal.getAddress(CGF));
4158 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4159 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4160 CGF.EmitStoreOfScalar(Add, NumLVal);
4161 SizeLVals.push_back(NumLVal);
4162 }
4163 }
4164 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4165 llvm::Value *Size =
4166 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4167 Sizes.push_back(Size);
4168 }
4169 return Sizes;
4170}
4171
4173 QualType &KmpDependInfoTy,
4174 LValue PosLVal,
4176 Address DependenciesArray) {
4177 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4178 "Expected depobj dependency kind.");
4179 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4180 {
4181 OMPIteratorGeneratorScope IteratorScope(
4182 CGF, cast_or_null<OMPIteratorExpr>(
4183 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4184 : nullptr));
4185 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4186 const Expr *E = Data.DepExprs[I];
4187 llvm::Value *NumDeps;
4188 LValue Base;
4189 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4190 std::tie(NumDeps, Base) =
4191 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4192
4193 // memcopy dependency data.
4194 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4195 ElSize,
4196 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4197 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4198 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4199 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4200
4201 // Increase pos.
4202 // pos += size;
4203 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4204 CGF.EmitStoreOfScalar(Add, PosLVal);
4205 }
4206 }
4207}
4208
4209std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4211 SourceLocation Loc) {
4212 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4213 return D.DepExprs.empty();
4214 }))
4215 return std::make_pair(nullptr, Address::invalid());
4216 // Process list of dependencies.
4218 Address DependenciesArray = Address::invalid();
4219 llvm::Value *NumOfElements = nullptr;
4220 unsigned NumDependencies = std::accumulate(
4221 Dependencies.begin(), Dependencies.end(), 0,
4222 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4223 return D.DepKind == OMPC_DEPEND_depobj
4224 ? V
4225 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4226 });
4227 QualType FlagsTy;
4228 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4229 bool HasDepobjDeps = false;
4230 bool HasRegularWithIterators = false;
4231 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4232 llvm::Value *NumOfRegularWithIterators =
4233 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4234 // Calculate number of depobj dependencies and regular deps with the
4235 // iterators.
4236 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4237 if (D.DepKind == OMPC_DEPEND_depobj) {
4240 for (llvm::Value *Size : Sizes) {
4241 NumOfDepobjElements =
4242 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4243 }
4244 HasDepobjDeps = true;
4245 continue;
4246 }
4247 // Include number of iterations, if any.
4248
4249 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4250 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4251 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4252 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4253 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4254 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4255 NumOfRegularWithIterators =
4256 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4257 }
4258 HasRegularWithIterators = true;
4259 continue;
4260 }
4261 }
4262
4263 QualType KmpDependInfoArrayTy;
4264 if (HasDepobjDeps || HasRegularWithIterators) {
4265 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4266 /*isSigned=*/false);
4267 if (HasDepobjDeps) {
4268 NumOfElements =
4269 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4270 }
4271 if (HasRegularWithIterators) {
4272 NumOfElements =
4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4274 }
4275 auto *OVE = new (C) OpaqueValueExpr(
4276 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4277 VK_PRValue);
4278 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4279 RValue::get(NumOfElements));
4280 KmpDependInfoArrayTy =
4281 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4282 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4283 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4284 // Properly emit variable-sized array.
4285 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4287 CGF.EmitVarDecl(*PD);
4288 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4289 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4290 /*isSigned=*/false);
4291 } else {
4292 KmpDependInfoArrayTy = C.getConstantArrayType(
4293 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4294 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4295 DependenciesArray =
4296 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4297 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4298 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4299 /*isSigned=*/false);
4300 }
4301 unsigned Pos = 0;
4302 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4303 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4304 Dependencies[I].IteratorExpr)
4305 continue;
4306 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4307 DependenciesArray);
4308 }
4309 // Copy regular dependencies with iterators.
4310 LValue PosLVal = CGF.MakeAddrLValue(
4311 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4312 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4313 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4314 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4315 !Dependencies[I].IteratorExpr)
4316 continue;
4317 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4318 DependenciesArray);
4319 }
4320 // Copy final depobj arrays without iterators.
4321 if (HasDepobjDeps) {
4322 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4323 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4324 continue;
4325 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4326 DependenciesArray);
4327 }
4328 }
4329 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4331 return std::make_pair(NumOfElements, DependenciesArray);
4332}
4333
4335 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4336 SourceLocation Loc) {
4337 if (Dependencies.DepExprs.empty())
4338 return Address::invalid();
4339 // Process list of dependencies.
4341 Address DependenciesArray = Address::invalid();
4342 unsigned NumDependencies = Dependencies.DepExprs.size();
4343 QualType FlagsTy;
4344 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4345 RecordDecl *KmpDependInfoRD =
4346 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4347
4348 llvm::Value *Size;
4349 // Define type kmp_depend_info[<Dependencies.size()>];
4350 // For depobj reserve one extra element to store the number of elements.
4351 // It is required to handle depobj(x) update(in) construct.
4352 // kmp_depend_info[<Dependencies.size()>] deps;
4353 llvm::Value *NumDepsVal;
4354 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4355 if (const auto *IE =
4356 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4357 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4358 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4359 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4360 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4361 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4362 }
4363 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4364 NumDepsVal);
4365 CharUnits SizeInBytes =
4366 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4367 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4368 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4369 NumDepsVal =
4370 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4371 } else {
4372 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4373 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4374 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4375 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4376 Size = CGM.getSize(Sz.alignTo(Align));
4377 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4378 }
4379 // Need to allocate on the dynamic memory.
4380 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4381 // Use default allocator.
4382 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4383 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4384
4385 llvm::Value *Addr =
4386 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4387 CGM.getModule(), OMPRTL___kmpc_alloc),
4388 Args, ".dep.arr.addr");
4389 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4391 Addr, KmpDependInfoLlvmTy->getPointerTo());
4392 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4393 // Write number of elements in the first element of array for depobj.
4394 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4395 // deps[i].base_addr = NumDependencies;
4396 LValue BaseAddrLVal = CGF.EmitLValueForField(
4397 Base,
4398 *std::next(KmpDependInfoRD->field_begin(),
4399 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4400 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4401 llvm::PointerUnion<unsigned *, LValue *> Pos;
4402 unsigned Idx = 1;
4403 LValue PosLVal;
4404 if (Dependencies.IteratorExpr) {
4405 PosLVal = CGF.MakeAddrLValue(
4406 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4407 C.getSizeType());
4408 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4409 /*IsInit=*/true);
4410 Pos = &PosLVal;
4411 } else {
4412 Pos = &Idx;
4413 }
4414 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4415 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4416 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4417 CGF.Int8Ty);
4418 return DependenciesArray;
4419}
4420
4422 SourceLocation Loc) {
4424 QualType FlagsTy;
4425 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4427 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4428 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4430 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4432 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4433 Addr.getElementType(), Addr.getPointer(),
4434 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4435 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4436 CGF.VoidPtrTy);
4437 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4438 // Use default allocator.
4439 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4440 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4441
4442 // _kmpc_free(gtid, addr, nullptr);
4443 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4444 CGM.getModule(), OMPRTL___kmpc_free),
4445 Args);
4446}
4447
4449 OpenMPDependClauseKind NewDepKind,
4450 SourceLocation Loc) {
4452 QualType FlagsTy;
4453 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4454 RecordDecl *KmpDependInfoRD =
4455 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4456 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4457 llvm::Value *NumDeps;
4458 LValue Base;
4459 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4460
4461 Address Begin = Base.getAddress(CGF);
4462 // Cast from pointer to array type to pointer to single element.
4463 llvm::Value *End = CGF.Builder.CreateGEP(
4464 Begin.getElementType(), Begin.getPointer(), NumDeps);
4465 // The basic structure here is a while-do loop.
4466 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4467 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4468 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4469 CGF.EmitBlock(BodyBB);
4470 llvm::PHINode *ElementPHI =
4471 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4472 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4473 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4474 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4475 Base.getTBAAInfo());
4476 // deps[i].flags = NewDepKind;
4477 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4478 LValue FlagsLVal = CGF.EmitLValueForField(
4479 Base, *std::next(KmpDependInfoRD->field_begin(),
4480 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4482 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4483 FlagsLVal);
4484
4485 // Shift the address forward by one element.
4486 Address ElementNext =
4487 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4488 ElementPHI->addIncoming(ElementNext.getPointer(),
4489 CGF.Builder.GetInsertBlock());
4490 llvm::Value *IsEmpty =
4491 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4492 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4493 // Done.
4494 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4495}
4496
4498 const OMPExecutableDirective &D,
4499 llvm::Function *TaskFunction,
4500 QualType SharedsTy, Address Shareds,
4501 const Expr *IfCond,
4502 const OMPTaskDataTy &Data) {
4503 if (!CGF.HaveInsertPoint())
4504 return;
4505
4507 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4508 llvm::Value *NewTask = Result.NewTask;
4509 llvm::Function *TaskEntry = Result.TaskEntry;
4510 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4511 LValue TDBase = Result.TDBase;
4512 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4513 // Process list of dependences.
4514 Address DependenciesArray = Address::invalid();
4515 llvm::Value *NumOfElements;
4516 std::tie(NumOfElements, DependenciesArray) =
4517 emitDependClause(CGF, Data.Dependences, Loc);
4518
4519 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4520 // libcall.
4521 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4522 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4523 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4524 // list is not empty
4525 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4526 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4527 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4528 llvm::Value *DepTaskArgs[7];
4529 if (!Data.Dependences.empty()) {
4530 DepTaskArgs[0] = UpLoc;
4531 DepTaskArgs[1] = ThreadID;
4532 DepTaskArgs[2] = NewTask;
4533 DepTaskArgs[3] = NumOfElements;
4534 DepTaskArgs[4] = DependenciesArray.getPointer();
4535 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4536 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4537 }
4538 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4539 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4540 if (!Data.Tied) {
4541 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4542 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4543 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4544 }
4545 if (!Data.Dependences.empty()) {
4546 CGF.EmitRuntimeCall(
4547 OMPBuilder.getOrCreateRuntimeFunction(
4548 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4549 DepTaskArgs);
4550 } else {
4551 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4552 CGM.getModule(), OMPRTL___kmpc_omp_task),
4553 TaskArgs);
4554 }
4555 // Check if parent region is untied and build return for untied task;
4556 if (auto *Region =
4557 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4558 Region->emitUntiedSwitch(CGF);
4559 };
4560
4561 llvm::Value *DepWaitTaskArgs[7];
4562 if (!Data.Dependences.empty()) {
4563 DepWaitTaskArgs[0] = UpLoc;
4564 DepWaitTaskArgs[1] = ThreadID;
4565 DepWaitTaskArgs[2] = NumOfElements;
4566 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4567 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4568 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4569 DepWaitTaskArgs[6] =
4570 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4571 }
4572 auto &M = CGM.getModule();
4573 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4574 TaskEntry, &Data, &DepWaitTaskArgs,
4575 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4576 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4577 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4578 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4579 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4580 // is specified.
4581 if (!Data.Dependences.empty())
4582 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4583 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4584 DepWaitTaskArgs);
4585 // Call proxy_task_entry(gtid, new_task);
4586 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4587 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4588 Action.Enter(CGF);
4589 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4590 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4591 OutlinedFnArgs);
4592 };
4593
4594 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4595 // kmp_task_t *new_task);
4596 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4597 // kmp_task_t *new_task);
4598 RegionCodeGenTy RCG(CodeGen);
4599 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4600 M, OMPRTL___kmpc_omp_task_begin_if0),
4601 TaskArgs,
4602 OMPBuilder.getOrCreateRuntimeFunction(
4603 M, OMPRTL___kmpc_omp_task_complete_if0),
4604 TaskArgs);
4605 RCG.setAction(Action);
4606 RCG(CGF);
4607 };
4608
4609 if (IfCond) {
4610 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4611 } else {
4612 RegionCodeGenTy ThenRCG(ThenCodeGen);
4613 ThenRCG(CGF);
4614 }
4615}
4616
4618 const OMPLoopDirective &D,
4619 llvm::Function *TaskFunction,
4620 QualType SharedsTy, Address Shareds,
4621 const Expr *IfCond,
4622 const OMPTaskDataTy &Data) {
4623 if (!CGF.HaveInsertPoint())
4624 return;
4626 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4627 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4628 // libcall.
4629 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4630 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4631 // sched, kmp_uint64 grainsize, void *task_dup);
4632 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4634 llvm::Value *IfVal;
4635 if (IfCond) {
4636 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4637 /*isSigned=*/true);
4638 } else {
4639 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4640 }
4641
4642 LValue LBLVal = CGF.EmitLValueForField(
4643 Result.TDBase,
4644 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4645 const auto *LBVar =
4646 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4647 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4648 LBLVal.getQuals(),
4649 /*IsInitializer=*/true);
4650 LValue UBLVal = CGF.EmitLValueForField(
4651 Result.TDBase,
4652 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4653 const auto *UBVar =
4654 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4655 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4656 UBLVal.getQuals(),
4657 /*IsInitializer=*/true);
4658 LValue StLVal = CGF.EmitLValueForField(
4659 Result.TDBase,
4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4661 const auto *StVar =
4662 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4663 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4664 StLVal.getQuals(),
4665 /*IsInitializer=*/true);
4666 // Store reductions address.
4667 LValue RedLVal = CGF.EmitLValueForField(
4668 Result.TDBase,
4669 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4670 if (Data.Reductions) {
4671 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4672 } else {
4673 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4674 CGF.getContext().VoidPtrTy);
4675 }
4676 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4677 llvm::Value *TaskArgs[] = {
4678 UpLoc,
4679 ThreadID,
4680 Result.NewTask,
4681 IfVal,
4682 LBLVal.getPointer(CGF),
4683 UBLVal.getPointer(CGF),
4684 CGF.EmitLoadOfScalar(StLVal, Loc),
4685 llvm::ConstantInt::getSigned(
4686 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4687 llvm::ConstantInt::getSigned(
4688 CGF.IntTy, Data.Schedule.getPointer()
4689 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4690 : NoSchedule),
4691 Data.Schedule.getPointer()
4692 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4693 /*isSigned=*/false)
4694 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4696 Result.TaskDupFn, CGF.VoidPtrTy)
4697 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4698 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4699 CGM.getModule(), OMPRTL___kmpc_taskloop),
4700 TaskArgs);
4701}
4702
4703/// Emit reduction operation for each element of array (required for
4704/// array sections) LHS op = RHS.
4705/// \param Type Type of array.
4706/// \param LHSVar Variable on the left side of the reduction operation
4707/// (references element of array in original variable).
4708/// \param RHSVar Variable on the right side of the reduction operation
4709/// (references element of array in original variable).
4710/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4711/// RHSVar.
4713 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4714 const VarDecl *RHSVar,
4715 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4716 const Expr *, const Expr *)> &RedOpGen,
4717 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4718 const Expr *UpExpr = nullptr) {
4719 // Perform element-by-element initialization.
4720 QualType ElementTy;
4721 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4722 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4723
4724 // Drill down to the base element type on both arrays.
4725 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4726 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4727
4728 llvm::Value *RHSBegin = RHSAddr.getPointer();
4729 llvm::Value *LHSBegin = LHSAddr.getPointer();
4730 // Cast from pointer to array type to pointer to single element.
4731 llvm::Value *LHSEnd =
4732 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4733 // The basic structure here is a while-do loop.
4734 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4735 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4736 llvm::Value *IsEmpty =
4737 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4738 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4739
4740 // Enter the loop body, making that address the current address.
4741 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4742 CGF.EmitBlock(BodyBB);
4743
4744 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4745
4746 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4747 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4748 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4749 Address RHSElementCurrent(
4750 RHSElementPHI, RHSAddr.getElementType(),
4751 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4752
4753 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4754 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4755 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4756 Address LHSElementCurrent(
4757 LHSElementPHI, LHSAddr.getElementType(),
4758 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4759
4760 // Emit copy.
4761 CodeGenFunction::OMPPrivateScope Scope(CGF);
4762 Scope.addPrivate(LHSVar, LHSElementCurrent);
4763 Scope.addPrivate(RHSVar, RHSElementCurrent);
4764 Scope.Privatize();
4765 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4766 Scope.ForceCleanup();
4767
4768 // Shift the address forward by one element.
4769 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4771 "omp.arraycpy.dest.element");
4772 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4773 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4774 "omp.arraycpy.src.element");
4775 // Check whether we've reached the end.
4776 llvm::Value *Done =
4777 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4778 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4779 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4780 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4781
4782 // Done.
4783 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4784}
4785
4786/// Emit reduction combiner. If the combiner is a simple expression emit it as
4787/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4788/// UDR combiner function.
4790 const Expr *ReductionOp) {
4791 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4792 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4793 if (const auto *DRE =
4794 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4795 if (const auto *DRD =
4796 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4797 std::pair<llvm::Function *, llvm::Function *> Reduction =
4800 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4801 CGF.EmitIgnoredExpr(ReductionOp);
4802 return;
4803 }
4804 CGF.EmitIgnoredExpr(ReductionOp);
4805}
4806
4808 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4810 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4812
4813 // void reduction_func(void *LHSArg, void *RHSArg);
4814 FunctionArgList Args;
4815 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4817 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4819 Args.push_back(&LHSArg);
4820 Args.push_back(&RHSArg);
4821 const auto &CGFI =
4823 std::string Name = getReductionFuncName(ReducerName);
4824 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4825 llvm::GlobalValue::InternalLinkage, Name,
4826 &CGM.getModule());
4828 Fn->setDoesNotRecurse();
4829 CodeGenFunction CGF(CGM);
4830 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4831
4832 // Dst = (void*[n])(LHSArg);
4833 // Src = (void*[n])(RHSArg);
4835 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4836 ArgsElemType->getPointerTo()),
4837 ArgsElemType, CGF.getPointerAlign());
4839 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4840 ArgsElemType->getPointerTo()),
4841 ArgsElemType, CGF.getPointerAlign());
4842
4843 // ...
4844 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4845 // ...
4847 const auto *IPriv = Privates.begin();
4848 unsigned Idx = 0;
4849 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4850 const auto *RHSVar =
4851 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4852 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4853 const auto *LHSVar =
4854 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4855 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4856 QualType PrivTy = (*IPriv)->getType();
4857 if (PrivTy->isVariablyModifiedType()) {
4858 // Get array size and emit VLA type.
4859 ++Idx;
4860 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4861 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4862 const VariableArrayType *VLA =
4863 CGF.getContext().getAsVariableArrayType(PrivTy);
4864 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4866 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4867 CGF.EmitVariablyModifiedType(PrivTy);
4868 }
4869 }
4870 Scope.Privatize();
4871 IPriv = Privates.begin();
4872 const auto *ILHS = LHSExprs.begin();
4873 const auto *IRHS = RHSExprs.begin();
4874 for (const Expr *E : ReductionOps) {
4875 if ((*IPriv)->getType()->isArrayType()) {
4876 // Emit reduction for array section.
4877 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4878 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4880 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4881 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4882 emitReductionCombiner(CGF, E);
4883 });
4884 } else {
4885 // Emit reduction for array subscript or single variable.
4886 emitReductionCombiner(CGF, E);
4887 }
4888 ++IPriv;
4889 ++ILHS;
4890 ++IRHS;
4891 }
4892 Scope.ForceCleanup();
4893 CGF.FinishFunction();
4894 return Fn;
4895}
4896
4898 const Expr *ReductionOp,
4899 const Expr *PrivateRef,
4900 const DeclRefExpr *LHS,
4901 const DeclRefExpr *RHS) {
4902 if (PrivateRef->getType()->isArrayType()) {
4903 // Emit reduction for array section.
4904 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4905 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4907 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4908 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4909 emitReductionCombiner(CGF, ReductionOp);
4910 });
4911 } else {
4912 // Emit reduction for array subscript or single variable.
4913 emitReductionCombiner(CGF, ReductionOp);
4914 }
4915}
4916
4918 ArrayRef<const Expr *> Privates,
4919 ArrayRef<const Expr *> LHSExprs,
4920 ArrayRef<const Expr *> RHSExprs,
4921 ArrayRef<const Expr *> ReductionOps,
4922 ReductionOptionsTy Options) {
4923 if (!CGF.HaveInsertPoint())
4924 return;
4925
4926 bool WithNowait = Options.WithNowait;
4927 bool SimpleReduction = Options.SimpleReduction;
4928
4929 // Next code should be emitted for reduction:
4930 //
4931 // static kmp_critical_name lock = { 0 };
4932 //
4933 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4934 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4935 // ...
4936 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4937 // *(Type<n>-1*)rhs[<n>-1]);
4938 // }
4939 //
4940 // ...
4941 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4942 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4943 // RedList, reduce_func, &<lock>)) {
4944 // case 1:
4945 // ...
4946 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4947 // ...
4948 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4949 // break;
4950 // case 2:
4951 // ...
4952 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4953 // ...
4954 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4955 // break;
4956 // default:;
4957 // }
4958 //
4959 // if SimpleReduction is true, only the next code is generated:
4960 // ...
4961 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4962 // ...
4963
4965
4966 if (SimpleReduction) {
4968 const auto *IPriv = Privates.begin();
4969 const auto *ILHS = LHSExprs.begin();
4970 const auto *IRHS = RHSExprs.begin();
4971 for (const Expr *E : ReductionOps) {
4972 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4973 cast<DeclRefExpr>(*IRHS));
4974 ++IPriv;
4975 ++ILHS;
4976 ++IRHS;
4977 }
4978 return;
4979 }
4980
4981 // 1. Build a list of reduction variables.
4982 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4983 auto Size = RHSExprs.size();
4984 for (const Expr *E : Privates) {
4985 if (E->getType()->isVariablyModifiedType())
4986 // Reserve place for array size.
4987 ++Size;
4988 }
4989 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4990 QualType ReductionArrayTy = C.getConstantArrayType(
4991 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4992 /*IndexTypeQuals=*/0);
4993 Address ReductionList =
4994 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4995 const auto *IPriv = Privates.begin();
4996 unsigned Idx = 0;
4997 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4998 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4999 CGF.Builder.CreateStore(
5001 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5002 Elem);
5003 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5004 // Store array size.
5005 ++Idx;
5006 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5007 llvm::Value *Size = CGF.Builder.CreateIntCast(
5008 CGF.getVLASize(
5009 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5010 .NumElts,
5011 CGF.SizeTy, /*isSigned=*/false);
5012 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5013 Elem);
5014 }
5015 }
5016
5017 // 2. Emit reduce_func().
5018 llvm::Function *ReductionFn = emitReductionFunction(
5019 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5020 Privates, LHSExprs, RHSExprs, ReductionOps);
5021
5022 // 3. Create static kmp_critical_name lock = { 0 };
5023 std::string Name = getName({"reduction"});
5024 llvm::Value *Lock = getCriticalRegionLock(Name);
5025
5026 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5027 // RedList, reduce_func, &<lock>);
5028 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5029 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5030 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5031 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5032 ReductionList.getPointer(), CGF.VoidPtrTy);
5033 llvm::Value *Args[] = {
5034 IdentTLoc, // ident_t *<loc>
5035 ThreadId, // i32 <gtid>
5036 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5037 ReductionArrayTySize, // size_type sizeof(RedList)
5038 RL, // void *RedList
5039 ReductionFn, // void (*) (void *, void *) <reduce_func>
5040 Lock // kmp_critical_name *&<lock>
5041 };
5042 llvm::Value *Res = CGF.EmitRuntimeCall(
5043 OMPBuilder.getOrCreateRuntimeFunction(
5044 CGM.getModule(),
5045 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5046 Args);
5047
5048 // 5. Build switch(res)
5049 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5050 llvm::SwitchInst *SwInst =
5051 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5052
5053 // 6. Build case 1:
5054 // ...
5055 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5056 // ...
5057 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5058 // break;
5059 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5060 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5061 CGF.EmitBlock(Case1BB);
5062
5063 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5064 llvm::Value *EndArgs[] = {
5065 IdentTLoc, // ident_t *<loc>
5066 ThreadId, // i32 <gtid>
5067 Lock // kmp_critical_name *&<lock>
5068 };
5069 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5070 CodeGenFunction &CGF, PrePostActionTy &Action) {
5072 const auto *IPriv = Privates.begin();
5073 const auto *ILHS = LHSExprs.begin();
5074 const auto *IRHS = RHSExprs.begin();
5075 for (const Expr *E : ReductionOps) {
5076 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5077 cast<DeclRefExpr>(*IRHS));
5078 ++IPriv;
5079 ++ILHS;
5080 ++IRHS;
5081 }
5082 };
5083 RegionCodeGenTy RCG(CodeGen);
5084 CommonActionTy Action(
5085 nullptr, std::nullopt,
5086 OMPBuilder.getOrCreateRuntimeFunction(
5087 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5088 : OMPRTL___kmpc_end_reduce),
5089 EndArgs);
5090 RCG.setAction(Action);
5091 RCG(CGF);
5092
5093 CGF.EmitBranch(DefaultBB);
5094
5095 // 7. Build case 2:
5096 // ...
5097 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5098 // ...
5099 // break;
5100 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5101 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5102 CGF.EmitBlock(Case2BB);
5103
5104 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5105 CodeGenFunction &CGF, PrePostActionTy &Action) {
5106 const auto *ILHS = LHSExprs.begin();
5107 const auto *IRHS = RHSExprs.begin();
5108 const auto *IPriv = Privates.begin();
5109 for (const Expr *E : ReductionOps) {
5110 const Expr *XExpr = nullptr;
5111 const Expr *EExpr = nullptr;
5112 const Expr *UpExpr = nullptr;
5113 BinaryOperatorKind BO = BO_Comma;
5114 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5115 if (BO->getOpcode() == BO_Assign) {
5116 XExpr = BO->getLHS();
5117 UpExpr = BO->getRHS();
5118 }
5119 }
5120 // Try to emit update expression as a simple atomic.
5121 const Expr *RHSExpr = UpExpr;
5122 if (RHSExpr) {
5123 // Analyze RHS part of the whole expression.
5124 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5125 RHSExpr->IgnoreParenImpCasts())) {
5126 // If this is a conditional operator, analyze its condition for
5127 // min/max reduction operator.
5128 RHSExpr = ACO->getCond();
5129 }
5130 if (const auto *BORHS =
5131 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5132 EExpr = BORHS->getRHS();
5133 BO = BORHS->getOpcode();
5134 }
5135 }
5136 if (XExpr) {
5137 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5138 auto &&AtomicRedGen = [BO, VD,
5139 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5140 const Expr *EExpr, const Expr *UpExpr) {
5141 LValue X = CGF.EmitLValue(XExpr);
5142 RValue E;
5143 if (EExpr)
5144 E = CGF.EmitAnyExpr(EExpr);
5145 CGF.EmitOMPAtomicSimpleUpdateExpr(
5146 X, E, BO, /*IsXLHSInRHSPart=*/true,
5147 llvm::AtomicOrdering::Monotonic, Loc,
5148 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5149 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5150 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5151 CGF.emitOMPSimpleStore(
5152 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5153 VD->getType().getNonReferenceType(), Loc);
5154 PrivateScope.addPrivate(VD, LHSTemp);
5155 (void)PrivateScope.Privatize();
5156 return CGF.EmitAnyExpr(UpExpr);
5157 });
5158 };
5159 if ((*IPriv)->getType()->isArrayType()) {
5160 // Emit atomic reduction for array section.
5161 const auto *RHSVar =
5162 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5163 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5164 AtomicRedGen, XExpr, EExpr, UpExpr);
5165 } else {
5166 // Emit atomic reduction for array subscript or single variable.
5167 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5168 }
5169 } else {
5170 // Emit as a critical region.
5171 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5172 const Expr *, const Expr *) {
5174 std::string Name = RT.getName({"atomic_reduction"});
5176 CGF, Name,
5177 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5178 Action.Enter(CGF);
5179 emitReductionCombiner(CGF, E);
5180 },
5181 Loc);
5182 };
5183 if ((*IPriv)->getType()->isArrayType()) {
5184 const auto *LHSVar =
5185 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5186 const auto *RHSVar =
5187 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5188 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5189 CritRedGen);
5190 } else {
5191 CritRedGen(CGF, nullptr, nullptr, nullptr);
5192 }
5193 }
5194 ++ILHS;
5195 ++IRHS;
5196 ++IPriv;
5197 }
5198 };
5199 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5200 if (!WithNowait) {
5201 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5202 llvm::Value *EndArgs[] = {
5203 IdentTLoc, // ident_t *<loc>
5204 ThreadId, // i32 <gtid>
5205 Lock // kmp_critical_name *&<lock>
5206 };
5207 CommonActionTy Action(nullptr, std::nullopt,
5208 OMPBuilder.getOrCreateRuntimeFunction(
5209 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5210 EndArgs);
5211 AtomicRCG.setAction(Action);
5212 AtomicRCG(CGF);
5213 } else {
5214 AtomicRCG(CGF);
5215 }
5216
5217 CGF.EmitBranch(DefaultBB);
5218 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5219}
5220
5221/// Generates unique name for artificial threadprivate variables.
5222/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5223static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5224 const Expr *Ref) {
5225 SmallString<256> Buffer;
5226 llvm::raw_svector_ostream Out(Buffer);
5227 const clang::DeclRefExpr *DE;
5228 const VarDecl *D = ::getBaseDecl(Ref, DE);
5229 if (!D)
5230 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5231 D = D->getCanonicalDecl();
5232 std::string Name = CGM.getOpenMPRuntime().getName(
5233 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5234 Out << Prefix << Name << "_"
5236 return std::string(Out.str());
5237}
5238
5239/// Emits reduction initializer function:
5240/// \code
5241/// void @.red_init(void* %arg, void* %orig) {
5242/// %0 = bitcast void* %arg to <type>*
5243/// store <type> <init>, <type>* %0
5244/// ret void
5245/// }
5246/// \endcode
5247static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5248 SourceLocation Loc,
5249 ReductionCodeGen &RCG, unsigned N) {
5250 ASTContext &C = CGM.getContext();
5251 QualType VoidPtrTy = C.VoidPtrTy;
5252 VoidPtrTy.addRestrict();
5253 FunctionArgList Args;
5254 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5256 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5258 Args.emplace_back(&Param);
5259 Args.emplace_back(&ParamOrig);
5260 const auto &FnInfo =
5261 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5262 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5263 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5264 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5265 Name, &CGM.getModule());
5266 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5267 Fn->setDoesNotRecurse();
5268 CodeGenFunction CGF(CGM);
5269 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5270 QualType PrivateType = RCG.getPrivateType(N);
5271 Address PrivateAddr = CGF.EmitLoadOfPointer(
5273 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5274 C.getPointerType(PrivateType)->castAs<PointerType>());
5275 llvm::Value *Size = nullptr;
5276 // If the size of the reduction item is non-constant, load it from global
5277 // threadprivate variable.
5278 if (RCG.getSizes(N).second) {
5280 CGF, CGM.getContext().getSizeType(),
5281 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5282 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5283 CGM.getContext().getSizeType(), Loc);
5284 }
5285 RCG.emitAggregateType(CGF, N, Size);
5286 Address OrigAddr = Address::invalid();
5287 // If initializer uses initializer from declare reduction construct, emit a
5288 // pointer to the address of the original reduction item (reuired by reduction
5289 // initializer)
5290 if (RCG.usesReductionInitializer(N)) {
5291 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5292 OrigAddr = CGF.EmitLoadOfPointer(
5293 SharedAddr,
5294 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5295 }
5296 // Emit the initializer:
5297 // %0 = bitcast void* %arg to <type>*
5298 // store <type> <init>, <type>* %0
5299 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5300 [](CodeGenFunction &) { return false; });
5301 CGF.FinishFunction();
5302 return Fn;
5303}
5304
5305/// Emits reduction combiner function:
5306/// \code
5307/// void @.red_comb(void* %arg0, void* %arg1) {
5308/// %lhs = bitcast void* %arg0 to <type>*
5309/// %rhs = bitcast void* %arg1 to <type>*
5310/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5311/// store <type> %2, <type>* %lhs
5312/// ret void
5313/// }
5314/// \endcode
5315static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5316 SourceLocation Loc,
5317 ReductionCodeGen &RCG, unsigned N,
5318 const Expr *ReductionOp,
5319 const Expr *LHS, const Expr *RHS,
5320 const Expr *PrivateRef) {
5321 ASTContext &C = CGM.getContext();
5322 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5323 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5324 FunctionArgList Args;
5325 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5326 C.VoidPtrTy, ImplicitParamKind::Other);
5327 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5329 Args.emplace_back(&ParamInOut);
5330 Args.emplace_back(&ParamIn);
5331 const auto &FnInfo =
5332 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5333 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5334 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5335 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5336 Name, &CGM.getModule());
5337 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5338 Fn->setDoesNotRecurse();
5339 CodeGenFunction CGF(CGM);
5340 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5341 llvm::Value *Size = nullptr;
5342 // If the size of the reduction item is non-constant, load it from global
5343 // threadprivate variable.
5344 if (RCG.getSizes(N).second) {
5346 CGF, CGM.getContext().getSizeType(),
5347 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5348 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5349 CGM.getContext().getSizeType(), Loc);
5350 }
5351 RCG.emitAggregateType(CGF, N, Size);
5352 // Remap lhs and rhs variables to the addresses of the function arguments.
5353 // %lhs = bitcast void* %arg0 to <type>*
5354 // %rhs = bitcast void* %arg1 to <type>*
5355 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5356 PrivateScope.addPrivate(
5357 LHSVD,
5358 // Pull out the pointer to the variable.
5360 CGF.GetAddrOfLocalVar(&ParamInOut)
5362 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5363 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5364 PrivateScope.addPrivate(
5365 RHSVD,
5366 // Pull out the pointer to the variable.
5368 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5369 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5370 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5371 PrivateScope.Privatize();
5372 // Emit the combiner body:
5373 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5374 // store <type> %2, <type>* %lhs
5376 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5377 cast<DeclRefExpr>(RHS));
5378 CGF.FinishFunction();
5379 return Fn;
5380}
5381
5382/// Emits reduction finalizer function:
5383/// \code
5384/// void @.red_fini(void* %arg) {
5385/// %0 = bitcast void* %arg to <type>*
5386/// <destroy>(<type>* %0)
5387/// ret void
5388/// }
5389/// \endcode
5390static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5391 SourceLocation Loc,
5392 ReductionCodeGen &RCG, unsigned N) {
5393 if (!RCG.needCleanups(N))
5394 return nullptr;
5395 ASTContext &C = CGM.getContext();
5396 FunctionArgList Args;
5397 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5399 Args.emplace_back(&Param);
5400 const auto &FnInfo =
5401 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5402 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5403 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5404 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5405 Name, &CGM.getModule());
5406 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5407 Fn->setDoesNotRecurse();
5408 CodeGenFunction CGF(CGM);
5409 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5410 Address PrivateAddr = CGF.EmitLoadOfPointer(
5411 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5412 llvm::Value *Size = nullptr;
5413 // If the size of the reduction item is non-constant, load it from global
5414 // threadprivate variable.
5415 if (RCG.getSizes(N).second) {
5417 CGF, CGM.getContext().getSizeType(),
5418 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5419 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5420 CGM.getContext().getSizeType(), Loc);
5421 }
5422 RCG.emitAggregateType(CGF, N, Size);
5423 // Emit the finalizer body:
5424 // <destroy>(<type>* %0)
5425 RCG.emitCleanups(CGF, N, PrivateAddr);
5426 CGF.FinishFunction(Loc);
5427 return Fn;
5428}
5429
5432 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5433 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5434 return nullptr;
5435
5436 // Build typedef struct:
5437 // kmp_taskred_input {
5438 // void *reduce_shar; // shared reduction item
5439 // void *reduce_orig; // original reduction item used for initialization
5440 // size_t reduce_size; // size of data item
5441 // void *reduce_init; // data initialization routine
5442 // void *reduce_fini; // data finalization routine
5443 // void *reduce_comb; // data combiner routine
5444 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5445 // } kmp_taskred_input_t;
5447 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5448 RD->startDefinition();
5449 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5452 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5453 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5454 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5455 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5456 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5457 RD->completeDefinition();
5458 QualType RDType = C.getRecordType(RD);
5459 unsigned Size = Data.ReductionVars.size();
5460 llvm::APInt ArraySize(/*numBits=*/64, Size);
5461 QualType ArrayRDType =
5462 C.getConstantArrayType(RDType, ArraySize, nullptr,
5463 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5464 // kmp_task_red_input_t .rd_input.[Size];
5465 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5466 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5467 Data.ReductionCopies, Data.ReductionOps);
5468 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5469 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5470 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5471 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5472 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5473 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5474 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5475 ".rd_input.gep.");
5476 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5477 // ElemLVal.reduce_shar = &Shareds[Cnt];
5478 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5479 RCG.emitSharedOrigLValue(CGF, Cnt);
5480 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5481 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5482 // ElemLVal.reduce_orig = &Origs[Cnt];
5483 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5484 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5485 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5486 RCG.emitAggregateType(CGF, Cnt);
5487 llvm::Value *SizeValInChars;
5488 llvm::Value *SizeVal;
5489 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5490 // We use delayed creation/initialization for VLAs and array sections. It is
5491 // required because runtime does not provide the way to pass the sizes of
5492 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5493 // threadprivate global variables are used to store these values and use
5494 // them in the functions.
5495 bool DelayedCreation = !!SizeVal;
5496 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5497 /*isSigned=*/false);
5498 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5499 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5500 // ElemLVal.reduce_init = init;
5501 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5502 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5503 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5504 // ElemLVal.reduce_fini = fini;
5505 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5506 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5507 llvm::Value *FiniAddr =
5508 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5509 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5510 // ElemLVal.reduce_comb = comb;
5511 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5512 llvm::Value *CombAddr = emitReduceCombFunction(
5513 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5514 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5515 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5516 // ElemLVal.flags = 0;
5517 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5518 if (DelayedCreation) {
5520 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5521 FlagsLVal);
5522 } else
5523 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5524 FlagsLVal.getType());
5525 }
5526 if (Data.IsReductionWithTaskMod) {
5527 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5528 // is_ws, int num, void *data);
5529 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5530 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5531 CGM.IntTy, /*isSigned=*/true);
5532 llvm::Value *Args[] = {
5533 IdentTLoc, GTid,
5534 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5535 /*isSigned=*/true),
5536 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5538 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5539 return CGF.EmitRuntimeCall(
5540 OMPBuilder.getOrCreateRuntimeFunction(
5541 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5542 Args);
5543 }
5544 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5545 llvm::Value *Args[] = {
5546 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5547 /*isSigned=*/true),
5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5550 CGM.VoidPtrTy)};
5551 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5552 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5553 Args);
5554}
5555
5557 SourceLocation Loc,
5558 bool IsWorksharingReduction) {
5559 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5560 // is_ws, int num, void *data);
5561 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5562 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5563 CGM.IntTy, /*isSigned=*/true);
5564 llvm::Value *Args[] = {IdentTLoc, GTid,
5565 llvm::ConstantInt::get(CGM.IntTy,
5566 IsWorksharingReduction ? 1 : 0,
5567 /*isSigned=*/true)};
5568 (void)CGF.EmitRuntimeCall(
5569 OMPBuilder.getOrCreateRuntimeFunction(
5570 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5571 Args);
5572}
5573
5575 SourceLocation Loc,
5576 ReductionCodeGen &RCG,
5577 unsigned N) {
5578 auto Sizes = RCG.getSizes(N);
5579 // Emit threadprivate global variable if the type is non-constant
5580 // (Sizes.second = nullptr).
5581 if (Sizes.second) {
5582 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5583 /*isSigned=*/false);
5585 CGF, CGM.getContext().getSizeType(),
5586 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5587 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5588 }
5589}
5590
5592 SourceLocation Loc,
5593 llvm::Value *ReductionsPtr,
5594 LValue SharedLVal) {
5595 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5596 // *d);
5597 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5598 CGM.IntTy,
5599 /*isSigned=*/true),
5600 ReductionsPtr,
5602 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5603 return Address(
5604 CGF.EmitRuntimeCall(
5605 OMPBuilder.getOrCreateRuntimeFunction(
5606 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5607 Args),
5608 CGF.Int8Ty, SharedLVal.getAlignment());
5609}
5610
5612 const OMPTaskDataTy &Data) {
5613 if (!CGF.HaveInsertPoint())
5614 return;
5615
5616 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5617 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5618 OMPBuilder.createTaskwait(CGF.Builder);
5619 } else {
5620 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5621 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5622 auto &M = CGM.getModule();
5623 Address DependenciesArray = Address::invalid();
5624 llvm::Value *NumOfElements;
5625 std::tie(NumOfElements, DependenciesArray) =
5626 emitDependClause(CGF, Data.Dependences, Loc);
5627 if (!Data.Dependences.empty()) {
5628 llvm::Value *DepWaitTaskArgs[7];
5629 DepWaitTaskArgs[0] = UpLoc;
5630 DepWaitTaskArgs[1] = ThreadID;
5631 DepWaitTaskArgs[2] = NumOfElements;
5632 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5633 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5634 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5635 DepWaitTaskArgs[6] =
5636 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5637
5638 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5639
5640 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5641 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5642 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5643 // kmp_int32 has_no_wait); if dependence info is specified.
5644 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5645 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5646 DepWaitTaskArgs);
5647
5648 } else {
5649
5650 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5651 // global_tid);
5652 llvm::Value *Args[] = {UpLoc, ThreadID};
5653 // Ignore return result until untied tasks are supported.
5654 CGF.EmitRuntimeCall(
5655 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5656 Args);
5657 }
5658 }
5659
5660 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5661 Region->emitUntiedSwitch(CGF);
5662}
5663
5665 OpenMPDirectiveKind InnerKind,
5666 const RegionCodeGenTy &CodeGen,
5667 bool HasCancel) {
5668 if (!CGF.HaveInsertPoint())
5669 return;
5670 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5671 InnerKind != OMPD_critical &&
5672 InnerKind != OMPD_master &&
5673 InnerKind != OMPD_masked);
5674 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5675}
5676
5677namespace {
5678enum RTCancelKind {
5679 CancelNoreq = 0,
5680 CancelParallel = 1,
5681 CancelLoop = 2,
5682 CancelSections = 3,
5683 CancelTaskgroup = 4
5684};
5685} // anonymous namespace
5686
5687static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5688 RTCancelKind CancelKind = CancelNoreq;
5689 if (CancelRegion == OMPD_parallel)
5690 CancelKind = CancelParallel;
5691 else if (CancelRegion == OMPD_for)
5692 CancelKind = CancelLoop;
5693 else if (CancelRegion == OMPD_sections)
5694 CancelKind = CancelSections;
5695 else {
5696 assert(CancelRegion == OMPD_taskgroup);
5697 CancelKind = CancelTaskgroup;
5698 }
5699 return CancelKind;
5700}
5701
5704 OpenMPDirectiveKind CancelRegion) {
5705 if (!CGF.HaveInsertPoint())
5706 return;
5707 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5708 // global_tid, kmp_int32 cncl_kind);
5709 if (auto *OMPRegionInfo =
5710 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5711 // For 'cancellation point taskgroup', the task region info may not have a
5712 // cancel. This may instead happen in another adjacent task.
5713 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5714 llvm::Value *Args[] = {
5715 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5716 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5717 // Ignore return result until untied tasks are supported.
5718 llvm::Value *Result = CGF.EmitRuntimeCall(
5719 OMPBuilder.getOrCreateRuntimeFunction(
5720 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5721 Args);
5722 // if (__kmpc_cancellationpoint()) {
5723 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5724 // exit from construct;
5725 // }
5726 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5727 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5728 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5729 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5730 CGF.EmitBlock(ExitBB);
5731 if (CancelRegion == OMPD_parallel)
5732 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5733 // exit from construct;
5734 CodeGenFunction::JumpDest CancelDest =
5735 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5736 CGF.EmitBranchThroughCleanup(CancelDest);
5737 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5738 }
5739 }
5740}
5741
5743 const Expr *IfCond,
5744 OpenMPDirectiveKind CancelRegion) {
5745 if (!CGF.HaveInsertPoint())
5746 return;
5747 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5748 // kmp_int32 cncl_kind);
5749 auto &M = CGM.getModule();
5750 if (auto *OMPRegionInfo =
5751 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5752 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5753 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5755 llvm::Value *Args[] = {
5756 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5757 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5758 // Ignore return result until untied tasks are supported.
5759 llvm::Value *Result = CGF.EmitRuntimeCall(
5760 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5761 // if (__kmpc_cancel()) {
5762 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5763 // exit from construct;
5764 // }
5765 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5766 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5767 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5768 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5769 CGF.EmitBlock(ExitBB);
5770 if (CancelRegion == OMPD_parallel)
5771 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5772 // exit from construct;
5773 CodeGenFunction::JumpDest CancelDest =
5774 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5775 CGF.EmitBranchThroughCleanup(CancelDest);
5776 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5777 };
5778 if (IfCond) {
5779 emitIfClause(CGF, IfCond, ThenGen,
5780 [](CodeGenFunction &, PrePostActionTy &) {});
5781 } else {
5782 RegionCodeGenTy ThenRCG(ThenGen);
5783 ThenRCG(CGF);
5784 }
5785 }
5786}
5787
5788namespace {
5789/// Cleanup action for uses_allocators support.
5790class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5792
5793public:
5794 OMPUsesAllocatorsActionTy(
5795 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5796 : Allocators(Allocators) {}
5797 void Enter(CodeGenFunction &CGF) override {
5798 if (!CGF.HaveInsertPoint())
5799 return;
5800 for (const auto &AllocatorData : Allocators) {
5802 CGF, AllocatorData.first, AllocatorData.second);
5803 }
5804 }
5805 void Exit(CodeGenFunction &CGF) override {
5806 if (!CGF.HaveInsertPoint())
5807 return;
5808 for (const auto &AllocatorData : Allocators) {
5810 AllocatorData.first);
5811 }
5812 }
5813};
5814} // namespace
5815
5817 const OMPExecutableDirective &D, StringRef ParentName,
5818 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5819 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5820 assert(!ParentName.empty() && "Invalid target entry parent name!");
5823 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5824 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5825 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5826 if (!D.AllocatorTraits)
5827 continue;
5828 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5829 }
5830 }
5831 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5832 CodeGen.setAction(UsesAllocatorAction);
5833 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5834 IsOffloadEntry, CodeGen);
5835}
5836
5838 const Expr *Allocator,
5839 const Expr *AllocatorTraits) {
5840 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5841 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5842 // Use default memspace handle.
5843 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5844 llvm::Value *NumTraits = llvm::ConstantInt::get(
5845 CGF.IntTy, cast<ConstantArrayType>(
5846 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5847 ->getSize()
5848 .getLimitedValue());
5849 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5851 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5852 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5853 AllocatorTraitsLVal.getBaseInfo(),
5854 AllocatorTraitsLVal.getTBAAInfo());
5855 llvm::Value *Traits = Addr.getPointer();
5856
5857 llvm::Value *AllocatorVal =
5858 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5859 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5860 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5861 // Store to allocator.
5862 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5863 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5864 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5865 AllocatorVal =
5866 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5867 Allocator->getType(), Allocator->getExprLoc());
5868 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5869}
5870
5872 const Expr *Allocator) {
5873 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5874 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5875 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5876 llvm::Value *AllocatorVal =
5877 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5878 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5879 CGF.getContext().VoidPtrTy,
5880 Allocator->getExprLoc());
5881 (void)CGF.EmitRuntimeCall(
5882 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5883 OMPRTL___kmpc_destroy_allocator),
5884 {ThreadId, AllocatorVal});
5885}
5886
5889 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5890 int32_t &MaxTeamsVal) {
5891
5892 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5893 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5894 /*UpperBoundOnly=*/true);
5895
5896 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5897 for (auto *A : C->getAttrs()) {
5898 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5899 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5900 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5901 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5902 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5903 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5905 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5906 &AttrMaxThreadsVal);
5907 else
5908 continue;
5909
5910 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5911 if (AttrMaxThreadsVal > 0)
5912 MaxThreadsVal = MaxThreadsVal > 0
5913 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5914 : AttrMaxThreadsVal;
5915 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5916 if (AttrMaxBlocksVal > 0)
5917 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5918 : AttrMaxBlocksVal;
5919 }
5920 }
5921}
5922
5924 const OMPExecutableDirective &D, StringRef ParentName,
5925 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5926 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5927
5928 llvm::TargetRegionEntryInfo EntryInfo =
5930
5931 CodeGenFunction CGF(CGM, true);
5932 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5933 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5934 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5935
5936 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5937 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5939 };
5940
5941 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5942 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5943
5944 if (!OutlinedFn)
5945 return;
5946
5947 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5948
5949 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5950 for (auto *A : C->getAttrs()) {
5951 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5952 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5953 }
5954 }
5955}
5956
5957/// Checks if the expression is constant or does not have non-trivial function
5958/// calls.
5959static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5960 // We can skip constant expressions.
5961 // We can skip expressions with trivial calls or simple expressions.
5963 !E->hasNonTrivialCall(Ctx)) &&
5964 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5965}
5966
5968 const Stmt *Body) {
5969 const Stmt *Child = Body->IgnoreContainers();
5970 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5971 Child = nullptr;
5972 for (const Stmt *S : C->body()) {
5973 if (const auto *E = dyn_cast<Expr>(S)) {
5974 if (isTrivial(Ctx, E))
5975 continue;
5976 }
5977 // Some of the statements can be ignored.
5978 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5979 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5980 continue;
5981 // Analyze declarations.
5982 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5983 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5984 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5985 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5986 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5987 isa<UsingDirectiveDecl>(D) ||
5988 isa<OMPDeclareReductionDecl>(D) ||
5989 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5990 return true;
5991 const auto *VD = dyn_cast<VarDecl>(D);
5992 if (!VD)
5993 return false;
5994 return VD->hasGlobalStorage() || !VD->isUsed();
5995 }))
5996 continue;
5997 }
5998 // Found multiple children - cannot get the one child only.
5999 if (Child)
6000 return nullptr;
6001 Child = S;
6002 }
6003 if (Child)
6004 Child = Child->IgnoreContainers();
6005 }
6006 return Child;
6007}
6008
6010 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6011 int32_t &MaxTeamsVal) {
6012
6013 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6014 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6015 "Expected target-based executable directive.");
6016 switch (DirectiveKind) {
6017 case OMPD_target: {
6018 const auto *CS = D.getInnermostCapturedStmt();
6019 const auto *Body =
6020 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6021 const Stmt *ChildStmt =
6023 if (const auto *NestedDir =
6024 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6025 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6026 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6027 const Expr *NumTeams =
6028 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6029 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6030 if (auto Constant =
6031 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6032 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6033 return NumTeams;
6034 }
6035 MinTeamsVal = MaxTeamsVal = 0;
6036 return nullptr;
6037 }
6038 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6039 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6040 MinTeamsVal = MaxTeamsVal = 1;
6041 return nullptr;
6042 }
6043 MinTeamsVal = MaxTeamsVal = 1;
6044 return nullptr;
6045 }
6046 // A value of -1 is used to check if we need to emit no teams region
6047 MinTeamsVal = MaxTeamsVal = -1;
6048 return nullptr;
6049 }
6050 case OMPD_target_teams_loop:
6051 case OMPD_target_teams:
6052 case OMPD_target_teams_distribute:
6053 case OMPD_target_teams_distribute_simd:
6054 case OMPD_target_teams_distribute_parallel_for:
6055 case OMPD_target_teams_distribute_parallel_for_simd: {
6057 const Expr *NumTeams =
6058 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6059 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6060 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6061 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6062 return NumTeams;
6063 }
6064 MinTeamsVal = MaxTeamsVal = 0;
6065 return nullptr;
6066 }
6067 case OMPD_target_parallel:
6068 case OMPD_target_parallel_for:
6069 case OMPD_target_parallel_for_simd:
6070 case OMPD_target_parallel_loop:
6071 case OMPD_target_simd:
6072 MinTeamsVal = MaxTeamsVal = 1;
6073 return nullptr;
6074 case OMPD_parallel:
6075 case OMPD_for:
6076 case OMPD_parallel_for:
6077 case OMPD_parallel_loop:
6078 case OMPD_parallel_master:
6079 case OMPD_parallel_sections:
6080 case OMPD_for_simd:
6081 case OMPD_parallel_for_simd:
6082 case OMPD_cancel:
6083 case OMPD_cancellation_point:
6084 case OMPD_ordered:
6085 case OMPD_threadprivate:
6086 case OMPD_allocate:
6087 case OMPD_task:
6088 case OMPD_simd:
6089 case OMPD_tile:
6090 case OMPD_unroll:
6091 case OMPD_sections:
6092 case OMPD_section:
6093 case OMPD_single:
6094 case OMPD_master:
6095 case OMPD_critical:
6096 case OMPD_taskyield:
6097 case OMPD_barrier:
6098 case OMPD_taskwait:
6099 case OMPD_taskgroup:
6100 case OMPD_atomic:
6101 case OMPD_flush:
6102 case OMPD_depobj:
6103 case OMPD_scan:
6104 case OMPD_teams:
6105 case OMPD_target_data:
6106 case OMPD_target_exit_data:
6107 case OMPD_target_enter_data:
6108 case OMPD_distribute:
6109 case OMPD_distribute_simd:
6110 case OMPD_distribute_parallel_for:
6111 case OMPD_distribute_parallel_for_simd:
6112 case OMPD_teams_distribute:
6113 case OMPD_teams_distribute_simd:
6114 case OMPD_teams_distribute_parallel_for:
6115 case OMPD_teams_distribute_parallel_for_simd:
6116 case OMPD_target_update:
6117 case OMPD_declare_simd:
6118 case OMPD_declare_variant:
6119 case OMPD_begin_declare_variant:
6120 case OMPD_end_declare_variant:
6121 case OMPD_declare_target:
6122 case OMPD_end_declare_target:
6123 case OMPD_declare_reduction:
6124 case OMPD_declare_mapper:
6125 case OMPD_taskloop:
6126 case OMPD_taskloop_simd:
6127 case OMPD_master_taskloop:
6128 case OMPD_master_taskloop_simd:
6129 case OMPD_parallel_master_taskloop:
6130 case OMPD_parallel_master_taskloop_simd:
6131 case OMPD_requires:
6132 case OMPD_metadirective:
6133 case OMPD_unknown:
6134 break;
6135 default:
6136 break;
6137 }
6138 llvm_unreachable("Unexpected directive kind.");
6139}
6140
6142 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6143 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6144 "Clauses associated with the teams directive expected to be emitted "
6145 "only for the host!");
6146 CGBuilderTy &Bld = CGF.Builder;
6147 int32_t MinNT = -1, MaxNT = -1;
6148 const Expr *NumTeams =
6149 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6150 if (NumTeams != nullptr) {
6151 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6152
6153 switch (DirectiveKind) {
6154 case OMPD_target: {
6155 const auto *CS = D.getInnermostCapturedStmt();
6156 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6157 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6158 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6159 /*IgnoreResultAssign*/ true);
6160 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6161 /*isSigned=*/true);
6162 }
6163 case OMPD_target_teams:
6164 case OMPD_target_teams_distribute:
6165 case OMPD_target_teams_distribute_simd:
6166 case OMPD_target_teams_distribute_parallel_for:
6167 case OMPD_target_teams_distribute_parallel_for_simd: {
6168 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6169 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6170 /*IgnoreResultAssign*/ true);
6171 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6172 /*isSigned=*/true);
6173 }
6174 default:
6175 break;
6176 }
6177 }
6178
6179 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6180 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6181}
6182
6183/// Check for a num threads constant value (stored in \p DefaultVal), or
6184/// expression (stored in \p E). If the value is conditional (via an if-clause),
6185/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6186/// nullptr, no expression evaluation is perfomed.
6187static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6188 const Expr **E, int32_t &UpperBound,
6189 bool UpperBoundOnly, llvm::Value **CondVal) {
6191 CGF.getContext(), CS->getCapturedStmt());
6192 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6193 if (!Dir)
6194 return;
6195
6196 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6197 // Handle if clause. If if clause present, the number of threads is
6198 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6199 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6200 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6201 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6202 const OMPIfClause *IfClause = nullptr;
6203 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6204 if (C->getNameModifier() == OMPD_unknown ||
6205 C->getNameModifier() == OMPD_parallel) {
6206 IfClause = C;
6207 break;
6208 }
6209 }
6210 if (IfClause) {
6211 const Expr *CondExpr = IfClause->getCondition();
6212 bool Result;
6213 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6214 if (!Result) {
6215 UpperBound = 1;
6216 return;
6217 }
6218 } else {
6219 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6220 if (const auto *PreInit =
6221 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6222 for (const auto *I : PreInit->decls()) {
6223 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6224 CGF.EmitVarDecl(cast<VarDecl>(*I));
6225 } else {
6226 CodeGenFunction::AutoVarEmission Emission =
6227 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6228 CGF.EmitAutoVarCleanups(Emission);
6229 }
6230 }
6231 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6232 }
6233 }
6234 }
6235 }
6236 // Check the value of num_threads clause iff if clause was not specified
6237 // or is not evaluated to false.
6238 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6239 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6241 const auto *NumThreadsClause =
6242 Dir->getSingleClause<OMPNumThreadsClause>();
6243 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6244 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6245 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6246 UpperBound =
6247 UpperBound
6248 ? Constant->getZExtValue()
6249 : std::min(UpperBound,
6250 static_cast<int32_t>(Constant->getZExtValue()));
6251 // If we haven't found a upper bound, remember we saw a thread limiting
6252 // clause.
6253 if (UpperBound == -1)
6254 UpperBound = 0;
6255 if (!E)
6256 return;
6257 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6258 if (const auto *PreInit =
6259 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6260 for (const auto *I : PreInit->decls()) {
6261 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6262 CGF.EmitVarDecl(cast<VarDecl>(*I));
6263 } else {
6264 CodeGenFunction::AutoVarEmission Emission =
6265 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6266 CGF.EmitAutoVarCleanups(Emission);
6267 }
6268 }
6269 }
6270 *E = NTExpr;
6271 }
6272 return;
6273 }
6274 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6275 UpperBound = 1;
6276}
6277
6279 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6280 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6281 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6282 "Clauses associated with the teams directive expected to be emitted "
6283 "only for the host!");
6284 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6285 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6286 "Expected target-based executable directive.");
6287
6288 const Expr *NT = nullptr;
6289 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6290
6291 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6292 if (E->isIntegerConstantExpr(CGF.getContext())) {
6293 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6294 UpperBound = UpperBound ? Constant->getZExtValue()
6295 : std::min(UpperBound,
6296 int32_t(Constant->getZExtValue()));
6297 }
6298 // If we haven't found a upper bound, remember we saw a thread limiting
6299 // clause.
6300 if (UpperBound == -1)
6301 UpperBound = 0;
6302 if (EPtr)
6303 *EPtr = E;
6304 };
6305
6306 auto ReturnSequential = [&]() {
6307 UpperBound = 1;
6308 return NT;
6309 };
6310
6311 switch (DirectiveKind) {
6312 case OMPD_target: {
6313 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6314 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6316 CGF.getContext(), CS->getCapturedStmt());
6317 // TODO: The standard is not clear how to resolve two thread limit clauses,
6318 // let's pick the teams one if it's present, otherwise the target one.
6319 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6320 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6321 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6322 ThreadLimitClause = TLC;
6323 if (ThreadLimitExpr) {
6324 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6327 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6328 if (const auto *PreInit =
6329 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6330 for (const auto *I : PreInit->decls()) {
6331 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6332 CGF.EmitVarDecl(cast<VarDecl>(*I));
6333 } else {
6335 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6336 CGF.EmitAutoVarCleanups(Emission);
6337 }
6338 }
6339 }
6340 }
6341 }
6342 }
6343 if (ThreadLimitClause)
6344 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6345 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6346 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6347 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6348 CS = Dir->getInnermostCapturedStmt();
6350 CGF.getContext(), CS->getCapturedStmt());
6351 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6352 }
6353 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6354 CS = Dir->getInnermostCapturedStmt();
6355 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6356 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6357 return ReturnSequential();
6358 }
6359 return NT;
6360 }
6361 case OMPD_target_teams: {
6363 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6364 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6365 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6366 }
6367 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6368 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6370 CGF.getContext(), CS->getCapturedStmt());
6371 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6372 if (Dir->getDirectiveKind() == OMPD_distribute) {
6373 CS = Dir->getInnermostCapturedStmt();
6374 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6375 }
6376 }
6377 return NT;
6378 }
6379 case OMPD_target_teams_distribute:
6381 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6382 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6383 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6384 }
6385 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6386 UpperBoundOnly, CondVal);
6387 return NT;
6388 case OMPD_target_teams_loop:
6389 case OMPD_target_parallel_loop:
6390 case OMPD_target_parallel:
6391 case OMPD_target_parallel_for:
6392 case OMPD_target_parallel_for_simd:
6393 case OMPD_target_teams_distribute_parallel_for:
6394 case OMPD_target_teams_distribute_parallel_for_simd: {
6395 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6396 const OMPIfClause *IfClause = nullptr;
6397 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6398 if (C->getNameModifier() == OMPD_unknown ||
6399 C->getNameModifier() == OMPD_parallel) {
6400 IfClause = C;
6401 break;
6402 }
6403 }
6404 if (IfClause) {
6405 const Expr *Cond = IfClause->getCondition();
6406 bool Result;
6407 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6408 if (!Result)
6409 return ReturnSequential();
6410 } else {
6412 *CondVal = CGF.EvaluateExprAsBool(Cond);
6413 }
6414 }
6415 }
6417 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6418 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6419 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6420 }
6422 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6423 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6424 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6425 return NumThreadsClause->getNumThreads();
6426 }
6427 return NT;
6428 }
6429 case OMPD_target_teams_distribute_simd:
6430 case OMPD_target_simd:
6431 return ReturnSequential();
6432 default:
6433 break;
6434 }
6435 llvm_unreachable("Unsupported directive kind.");
6436}
6437
6439 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6440 llvm::Value *NumThreadsVal = nullptr;
6441 llvm::Value *CondVal = nullptr;
6442 llvm::Value *ThreadLimitVal = nullptr;
6443 const Expr *ThreadLimitExpr = nullptr;
6444 int32_t UpperBound = -1;
6445
6447 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6448 &ThreadLimitExpr);
6449
6450 // Thread limit expressions are used below, emit them.
6451 if (ThreadLimitExpr) {
6452 ThreadLimitVal =
6453 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6454 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6455 /*isSigned=*/false);
6456 }
6457
6458 // Generate the num teams expression.
6459 if (UpperBound == 1) {
6460 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6461 } else if (NT) {
6462 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6463 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6464 /*isSigned=*/false);
6465 } else if (ThreadLimitVal) {
6466 // If we do not have a num threads value but a thread limit, replace the
6467 // former with the latter. We know handled the thread limit expression.
6468 NumThreadsVal = ThreadLimitVal;
6469 ThreadLimitVal = nullptr;
6470 } else {
6471 // Default to "0" which means runtime choice.
6472 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6473 NumThreadsVal = CGF.Builder.getInt32(0);
6474 }
6475
6476 // Handle if clause. If if clause present, the number of threads is
6477 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6478 if (CondVal) {
6480 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6481 CGF.Builder.getInt32(1));
6482 }
6483
6484 // If the thread limit and num teams expression were present, take the
6485 // minimum.
6486 if (ThreadLimitVal) {
6487 NumThreadsVal = CGF.Builder.CreateSelect(
6488 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6489 ThreadLimitVal, NumThreadsVal);
6490 }
6491
6492 return NumThreadsVal;
6493}
6494
6495namespace {
6497
6498// Utility to handle information from clauses associated with a given
6499// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6500// It provides a convenient interface to obtain the information and generate
6501// code for that information.
6502class MappableExprsHandler {
6503public:
6504 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6505 static unsigned getFlagMemberOffset() {
6506 unsigned Offset = 0;
6507 for (uint64_t Remain =
6508 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6509 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6510 !(Remain & 1); Remain = Remain >> 1)
6511 Offset++;
6512 return Offset;
6513 }
6514
6515 /// Class that holds debugging information for a data mapping to be passed to
6516 /// the runtime library.
6517 class MappingExprInfo {
6518 /// The variable declaration used for the data mapping.
6519 const ValueDecl *MapDecl = nullptr;
6520 /// The original expression used in the map clause, or null if there is
6521 /// none.
6522 const Expr *MapExpr = nullptr;
6523
6524 public:
6525 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6526 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6527
6528 const ValueDecl *getMapDecl() const { return MapDecl; }
6529 const Expr *getMapExpr() const { return MapExpr; }
6530 };
6531
6532 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6533 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6534 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6535 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6536 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6537 using MapNonContiguousArrayTy =
6538 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6539 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6540 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6541
6542 /// This structure contains combined information generated for mappable
6543 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6544 /// mappers, and non-contiguous information.
6545 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6546 MapExprsArrayTy Exprs;
6547 MapValueDeclsArrayTy Mappers;
6548 MapValueDeclsArrayTy DevicePtrDecls;
6549
6550 /// Append arrays in \a CurInfo.
6551 void append(MapCombinedInfoTy &CurInfo) {
6552 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6553 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6554 CurInfo.DevicePtrDecls.end());
6555 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6556 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6557 }
6558 };
6559
6560 /// Map between a struct and the its lowest & highest elements which have been
6561 /// mapped.
6562 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6563 /// HE(FieldIndex, Pointer)}
6564 struct StructRangeInfoTy {
6565 MapCombinedInfoTy PreliminaryMapData;
6566 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6567 0, Address::invalid()};
6568 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6569 0, Address::invalid()};
6572 bool IsArraySection = false;
6573 bool HasCompleteRecord = false;
6574 };
6575
6576private:
6577 /// Kind that defines how a device pointer has to be returned.
6578 struct MapInfo {
6582 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6583 bool ReturnDevicePointer = false;
6584 bool IsImplicit = false;
6585 const ValueDecl *Mapper = nullptr;
6586 const Expr *VarRef = nullptr;
6587 bool ForDeviceAddr = false;
6588
6589 MapInfo() = default;
6590 MapInfo(
6592 OpenMPMapClauseKind MapType,
6594 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6595 bool ReturnDevicePointer, bool IsImplicit,
6596 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6597 bool ForDeviceAddr = false)
6598 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6599 MotionModifiers(MotionModifiers),
6600 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6601 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6602 };
6603
6604 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6605 /// member and there is no map information about it, then emission of that
6606 /// entry is deferred until the whole struct has been processed.
6607 struct DeferredDevicePtrEntryTy {
6608 const Expr *IE = nullptr;
6609 const ValueDecl *VD = nullptr;
6610 bool ForDeviceAddr = false;
6611
6612 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6613 bool ForDeviceAddr)
6614 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6615 };
6616
6617 /// The target directive from where the mappable clauses were extracted. It
6618 /// is either a executable directive or a user-defined mapper directive.
6619 llvm::PointerUnion<const OMPExecutableDirective *,
6620 const OMPDeclareMapperDecl *>
6621 CurDir;
6622
6623 /// Function the directive is being generated for.
6624 CodeGenFunction &CGF;
6625
6626 /// Set of all first private variables in the current directive.
6627 /// bool data is set to true if the variable is implicitly marked as
6628 /// firstprivate, false otherwise.
6629 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6630
6631 /// Map between device pointer declarations and their expression components.
6632 /// The key value for declarations in 'this' is null.
6633 llvm::DenseMap<
6634 const ValueDecl *,
6636 DevPointersMap;
6637
6638 /// Map between device addr declarations and their expression components.
6639 /// The key value for declarations in 'this' is null.
6640 llvm::DenseMap<
6641 const ValueDecl *,
6643 HasDevAddrsMap;
6644
6645 /// Map between lambda declarations and their map type.
6646 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6647
6648 llvm::Value *getExprTypeSize(const Expr *E) const {
6649 QualType ExprTy = E->getType().getCanonicalType();
6650
6651 // Calculate the size for array shaping expression.
6652 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6653 llvm::Value *Size =
6654 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6655 for (const Expr *SE : OAE->getDimensions()) {
6656 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6657 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6658 CGF.getContext().getSizeType(),
6659 SE->getExprLoc());
6660 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6661 }
6662 return Size;
6663 }
6664
6665 // Reference types are ignored for mapping purposes.
6666 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6667 ExprTy = RefTy->getPointeeType().getCanonicalType();
6668
6669 // Given that an array section is considered a built-in type, we need to
6670 // do the calculation based on the length of the section instead of relying
6671 // on CGF.getTypeSize(E->getType()).
6672 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6674 OAE->getBase()->IgnoreParenImpCasts())
6676
6677 // If there is no length associated with the expression and lower bound is
6678 // not specified too, that means we are using the whole length of the
6679 // base.
6680 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6681 !OAE->getLowerBound())
6682 return CGF.getTypeSize(BaseTy);
6683
6684 llvm::Value *ElemSize;
6685 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6686 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6687 } else {
6688 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6689 assert(ATy && "Expecting array type if not a pointer type.");
6690 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6691 }
6692
6693 // If we don't have a length at this point, that is because we have an
6694 // array section with a single element.
6695 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6696 return ElemSize;
6697
6698 if (const Expr *LenExpr = OAE->getLength()) {
6699 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6700 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6701 CGF.getContext().getSizeType(),
6702 LenExpr->getExprLoc());
6703 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6704 }
6705 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6706 OAE->getLowerBound() && "expected array_section[lb:].");
6707 // Size = sizetype - lb * elemtype;
6708 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6709 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6710 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6711 CGF.getContext().getSizeType(),
6712 OAE->getLowerBound()->getExprLoc());
6713 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6714 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6715 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6716 LengthVal = CGF.Builder.CreateSelect(
6717 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6718 return LengthVal;
6719 }
6720 return CGF.getTypeSize(ExprTy);
6721 }
6722
6723 /// Return the corresponding bits for a given map clause modifier. Add
6724 /// a flag marking the map as a pointer if requested. Add a flag marking the
6725 /// map as the first one of a series of maps that relate to the same map
6726 /// expression.
6727 OpenMPOffloadMappingFlags getMapTypeBits(
6729 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6730 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6731 OpenMPOffloadMappingFlags Bits =
6732 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6733 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6734 switch (MapType) {
6735 case OMPC_MAP_alloc:
6736 case OMPC_MAP_release:
6737 // alloc and release is the default behavior in the runtime library, i.e.
6738 // if we don't pass any bits alloc/release that is what the runtime is
6739 // going to do. Therefore, we don't need to signal anything for these two
6740 // type modifiers.
6741 break;
6742 case OMPC_MAP_to:
6743 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6744 break;
6745 case OMPC_MAP_from:
6746 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6747 break;
6748 case OMPC_MAP_tofrom:
6749 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6750 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6751 break;
6752 case OMPC_MAP_delete:
6753 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6754 break;
6755 case OMPC_MAP_unknown:
6756 llvm_unreachable("Unexpected map type!");
6757 }
6758 if (AddPtrFlag)
6759 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6760 if (AddIsTargetParamFlag)
6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6762 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6763 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6764 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6766 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6767 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6768 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6769 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6770 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6771 if (IsNonContiguous)
6772 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6773 return Bits;
6774 }
6775
6776 /// Return true if the provided expression is a final array section. A
6777 /// final array section, is one whose length can't be proved to be one.
6778 bool isFinalArraySectionExpression(const Expr *E) const {
6779 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6780
6781 // It is not an array section and therefore not a unity-size one.
6782 if (!OASE)
6783 return false;
6784
6785 // An array section with no colon always refer to a single element.
6786 if (OASE->getColonLocFirst().isInvalid())
6787 return false;
6788
6789 const Expr *Length = OASE->getLength();
6790
6791 // If we don't have a length we have to check if the array has size 1
6792 // for this dimension. Also, we should always expect a length if the
6793 // base type is pointer.
6794 if (!Length) {
6796 OASE->getBase()->IgnoreParenImpCasts())
6798 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6799 return ATy->getSize().getSExtValue() != 1;
6800 // If we don't have a constant dimension length, we have to consider
6801 // the current section as having any size, so it is not necessarily
6802 // unitary. If it happen to be unity size, that's user fault.
6803 return true;
6804 }
6805
6806 // Check if the length evaluates to 1.
6807 Expr::EvalResult Result;
6808 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6809 return true; // Can have more that size 1.
6810
6811 llvm::APSInt ConstLength = Result.Val.getInt();
6812 return ConstLength.getSExtValue() != 1;
6813 }
6814
6815 /// Generate the base pointers, section pointers, sizes, map type bits, and
6816 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6817 /// map type, map or motion modifiers, and expression components.
6818 /// \a IsFirstComponent should be set to true if the provided set of
6819 /// components is the first associated with a capture.
6820 void generateInfoForComponentList(
6822 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6824 MapCombinedInfoTy &CombinedInfo,
6825 MapCombinedInfoTy &StructBaseCombinedInfo,
6826 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6827 bool IsImplicit, bool GenerateAllInfoForClauses,
6828 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6829 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6831 OverlappedElements = std::nullopt) const {
6832 // The following summarizes what has to be generated for each map and the
6833 // types below. The generated information is expressed in this order:
6834 // base pointer, section pointer, size, flags
6835 // (to add to the ones that come from the map type and modifier).
6836 //
6837 // double d;
6838 // int i[100];
6839 // float *p;
6840 // int **a = &i;
6841 //
6842 // struct S1 {
6843 // int i;
6844 // float f[50];
6845 // }
6846 // struct S2 {
6847 // int i;
6848 // float f[50];
6849 // S1 s;
6850 // double *p;
6851 // struct S2 *ps;
6852 // int &ref;
6853 // }
6854 // S2 s;
6855 // S2 *ps;
6856 //
6857 // map(d)
6858 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6859 //
6860 // map(i)
6861 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6862 //
6863 // map(i[1:23])
6864 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6865 //
6866 // map(p)
6867 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6868 //
6869 // map(p[1:24])
6870 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6871 // in unified shared memory mode or for local pointers
6872 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6873 //
6874 // map((*a)[0:3])
6875 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6876 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6877 //
6878 // map(**a)
6879 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6880 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6881 //
6882 // map(s)
6883 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6884 //
6885 // map(s.i)
6886 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6887 //
6888 // map(s.s.f)
6889 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6890 //
6891 // map(s.p)
6892 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6893 //
6894 // map(to: s.p[:22])
6895 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6896 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6897 // &(s.p), &(s.p[0]), 22*sizeof(double),
6898 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6899 // (*) alloc space for struct members, only this is a target parameter
6900 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6901 // optimizes this entry out, same in the examples below)
6902 // (***) map the pointee (map: to)
6903 //
6904 // map(to: s.ref)
6905 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6906 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6907 // (*) alloc space for struct members, only this is a target parameter
6908 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6909 // optimizes this entry out, same in the examples below)
6910 // (***) map the pointee (map: to)
6911 //
6912 // map(s.ps)
6913 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6914 //
6915 // map(from: s.ps->s.i)
6916 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6917 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6918 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6919 //
6920 // map(to: s.ps->ps)
6921 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6922 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6923 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6924 //
6925 // map(s.ps->ps->ps)
6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6927 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6928 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6929 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6930 //
6931 // map(to: s.ps->ps->s.f[:22])
6932 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6933 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6934 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6935 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6936 //
6937 // map(ps)
6938 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6939 //
6940 // map(ps->i)
6941 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6942 //
6943 // map(ps->s.f)
6944 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6945 //
6946 // map(from: ps->p)
6947 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6948 //
6949 // map(to: ps->p[:22])
6950 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6951 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6952 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6953 //
6954 // map(ps->ps)
6955 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6956 //
6957 // map(from: ps->ps->s.i)
6958 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6959 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6960 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6961 //
6962 // map(from: ps->ps->ps)
6963 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6964 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6965 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6966 //
6967 // map(ps->ps->ps->ps)
6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6969 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6970 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6971 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6972 //
6973 // map(to: ps->ps->ps->s.f[:22])
6974 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6975 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6976 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6977 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6978 //
6979 // map(to: s.f[:22]) map(from: s.p[:33])
6980 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6981 // sizeof(double*) (**), TARGET_PARAM
6982 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6983 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6984 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6985 // (*) allocate contiguous space needed to fit all mapped members even if
6986 // we allocate space for members not mapped (in this example,
6987 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6988 // them as well because they fall between &s.f[0] and &s.p)
6989 //
6990 // map(from: s.f[:22]) map(to: ps->p[:33])
6991 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6992 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6993 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6994 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6995 // (*) the struct this entry pertains to is the 2nd element in the list of
6996 // arguments, hence MEMBER_OF(2)
6997 //
6998 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6999 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7000 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7001 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7002 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7003 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7004 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7005 // (*) the struct this entry pertains to is the 4th element in the list
7006 // of arguments, hence MEMBER_OF(4)
7007
7008 // Track if the map information being generated is the first for a capture.
7009 bool IsCaptureFirstInfo = IsFirstComponentList;
7010 // When the variable is on a declare target link or in a to clause with
7011 // unified memory, a reference is needed to hold the host/device address
7012 // of the variable.
7013 bool RequiresReference = false;
7014
7015 // Scan the components from the base to the complete expression.
7016 auto CI = Components.rbegin();
7017 auto CE = Components.rend();
7018 auto I = CI;
7019
7020 // Track if the map information being generated is the first for a list of
7021 // components.
7022 bool IsExpressionFirstInfo = true;
7023 bool FirstPointerInComplexData = false;
7025 const Expr *AssocExpr = I->getAssociatedExpression();
7026 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7027 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7028 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7029
7030 if (isa<MemberExpr>(AssocExpr)) {
7031 // The base is the 'this' pointer. The content of the pointer is going
7032 // to be the base of the field being mapped.
7033 BP = CGF.LoadCXXThisAddress();
7034 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7035 (OASE &&
7036 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7037 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7038 } else if (OAShE &&
7039 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7040 BP = Address(
7041 CGF.EmitScalarExpr(OAShE->getBase()),
7042 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7043 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7044 } else {
7045 // The base is the reference to the variable.
7046 // BP = &Var.
7047 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7048 if (const auto *VD =
7049 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7050 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7052 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7053 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7054 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7056 RequiresReference = true;
7058 }
7059 }
7060 }
7061
7062 // If the variable is a pointer and is being dereferenced (i.e. is not
7063 // the last component), the base has to be the pointer itself, not its
7064 // reference. References are ignored for mapping purposes.
7065 QualType Ty =
7066 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7067 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7068 // No need to generate individual map information for the pointer, it
7069 // can be associated with the combined storage if shared memory mode is
7070 // active or the base declaration is not global variable.
7071 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7073 !VD || VD->hasLocalStorage())
7074 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7075 else
7076 FirstPointerInComplexData = true;
7077 ++I;
7078 }
7079 }
7080
7081 // Track whether a component of the list should be marked as MEMBER_OF some
7082 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7083 // in a component list should be marked as MEMBER_OF, all subsequent entries
7084 // do not belong to the base struct. E.g.
7085 // struct S2 s;
7086 // s.ps->ps->ps->f[:]
7087 // (1) (2) (3) (4)
7088 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7089 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7090 // is the pointee of ps(2) which is not member of struct s, so it should not
7091 // be marked as such (it is still PTR_AND_OBJ).
7092 // The variable is initialized to false so that PTR_AND_OBJ entries which
7093 // are not struct members are not considered (e.g. array of pointers to
7094 // data).
7095 bool ShouldBeMemberOf = false;
7096
7097 // Variable keeping track of whether or not we have encountered a component
7098 // in the component list which is a member expression. Useful when we have a
7099 // pointer or a final array section, in which case it is the previous
7100 // component in the list which tells us whether we have a member expression.
7101 // E.g. X.f[:]
7102 // While processing the final array section "[:]" it is "f" which tells us
7103 // whether we are dealing with a member of a declared struct.
7104 const MemberExpr *EncounteredME = nullptr;
7105
7106 // Track for the total number of dimension. Start from one for the dummy
7107 // dimension.
7108 uint64_t DimSize = 1;
7109
7110 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7111 bool IsPrevMemberReference = false;
7112
7113 // We need to check if we will be encountering any MEs. If we do not
7114 // encounter any ME expression it means we will be mapping the whole struct.
7115 // In that case we need to skip adding an entry for the struct to the
7116 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7117 // list only when generating all info for clauses.
7118 bool IsMappingWholeStruct = true;
7119 if (!GenerateAllInfoForClauses) {
7120 IsMappingWholeStruct = false;
7121 } else {
7122 for (auto TempI = I; TempI != CE; ++TempI) {
7123 const MemberExpr *PossibleME =
7124 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7125 if (PossibleME) {
7126 IsMappingWholeStruct = false;
7127 break;
7128 }
7129 }
7130 }
7131
7132 for (; I != CE; ++I) {
7133 // If the current component is member of a struct (parent struct) mark it.
7134 if (!EncounteredME) {
7135 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7136 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7137 // as MEMBER_OF the parent struct.
7138 if (EncounteredME) {
7139 ShouldBeMemberOf = true;
7140 // Do not emit as complex pointer if this is actually not array-like
7141 // expression.
7142 if (FirstPointerInComplexData) {
7143 QualType Ty = std::prev(I)
7144 ->getAssociatedDeclaration()
7145 ->getType()
7146 .getNonReferenceType();
7147 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7148 FirstPointerInComplexData = false;
7149 }
7150 }
7151 }
7152
7153 auto Next = std::next(I);
7154
7155 // We need to generate the addresses and sizes if this is the last
7156 // component, if the component is a pointer or if it is an array section
7157 // whose length can't be proved to be one. If this is a pointer, it
7158 // becomes the base address for the following components.
7159
7160 // A final array section, is one whose length can't be proved to be one.
7161 // If the map item is non-contiguous then we don't treat any array section
7162 // as final array section.
7163 bool IsFinalArraySection =
7164 !IsNonContiguous &&
7165 isFinalArraySectionExpression(I->getAssociatedExpression());
7166
7167 // If we have a declaration for the mapping use that, otherwise use
7168 // the base declaration of the map clause.
7169 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7170 ? I->getAssociatedDeclaration()
7171 : BaseDecl;
7172 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7173 : MapExpr;
7174
7175 // Get information on whether the element is a pointer. Have to do a
7176 // special treatment for array sections given that they are built-in
7177 // types.
7178 const auto *OASE =
7179 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7180 const auto *OAShE =
7181 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7182 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7183 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7184 bool IsPointer =
7185 OAShE ||
7188 ->isAnyPointerType()) ||
7189 I->getAssociatedExpression()->getType()->isAnyPointerType();
7190 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7191 MapDecl &&
7192 MapDecl->getType()->isLValueReferenceType();
7193 bool IsNonDerefPointer = IsPointer &&
7194 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7195 !IsNonContiguous;
7196
7197 if (OASE)
7198 ++DimSize;
7199
7200 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7201 IsFinalArraySection) {
7202 // If this is not the last component, we expect the pointer to be
7203 // associated with an array expression or member expression.
7204 assert((Next == CE ||
7205 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7206 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7207 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7208 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7209 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7210 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7211 "Unexpected expression");
7212
7214 Address LowestElem = Address::invalid();
7215 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7216 const MemberExpr *E) {
7217 const Expr *BaseExpr = E->getBase();
7218 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7219 // scalar.
7220 LValue BaseLV;
7221 if (E->isArrow()) {
7222 LValueBaseInfo BaseInfo;
7223 TBAAAccessInfo TBAAInfo;
7224 Address Addr =
7225 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7226 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7227 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7228 } else {
7229 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7230 }
7231 return BaseLV;
7232 };
7233 if (OAShE) {
7234 LowestElem = LB =
7235 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7237 OAShE->getBase()->getType()->getPointeeType()),
7239 OAShE->getBase()->getType()));
7240 } else if (IsMemberReference) {
7241 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7242 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7243 LowestElem = CGF.EmitLValueForFieldInitialization(
7244 BaseLVal, cast<FieldDecl>(MapDecl))
7245 .getAddress(CGF);
7246 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7247 .getAddress(CGF);
7248 } else {
7249 LowestElem = LB =
7250 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7251 .getAddress(CGF);
7252 }
7253
7254 // If this component is a pointer inside the base struct then we don't
7255 // need to create any entry for it - it will be combined with the object
7256 // it is pointing to into a single PTR_AND_OBJ entry.
7257 bool IsMemberPointerOrAddr =
7258 EncounteredME &&
7259 (((IsPointer || ForDeviceAddr) &&
7260 I->getAssociatedExpression() == EncounteredME) ||
7261 (IsPrevMemberReference && !IsPointer) ||
7262 (IsMemberReference && Next != CE &&
7263 !Next->getAssociatedExpression()->getType()->isPointerType()));
7264 if (!OverlappedElements.empty() && Next == CE) {
7265 // Handle base element with the info for overlapped elements.
7266 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7267 assert(!IsPointer &&
7268 "Unexpected base element with the pointer type.");
7269 // Mark the whole struct as the struct that requires allocation on the
7270 // device.
7271 PartialStruct.LowestElem = {0, LowestElem};
7272 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7273 I->getAssociatedExpression()->getType());
7276 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7277 TypeSize.getQuantity() - 1);
7278 PartialStruct.HighestElem = {
7279 std::numeric_limits<decltype(
7280 PartialStruct.HighestElem.first)>::max(),
7281 HB};
7282 PartialStruct.Base = BP;
7283 PartialStruct.LB = LB;
7284 assert(
7285 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7286 "Overlapped elements must be used only once for the variable.");
7287 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7288 // Emit data for non-overlapped data.
7289 OpenMPOffloadMappingFlags Flags =
7290 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7291 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7292 /*AddPtrFlag=*/false,
7293 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7294 llvm::Value *Size = nullptr;
7295 // Do bitcopy of all non-overlapped structure elements.
7297 Component : OverlappedElements) {
7298 Address ComponentLB = Address::invalid();
7300 Component) {
7301 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7302 const auto *FD = dyn_cast<FieldDecl>(VD);
7303 if (FD && FD->getType()->isLValueReferenceType()) {
7304 const auto *ME =
7305 cast<MemberExpr>(MC.getAssociatedExpression());
7306 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7307 ComponentLB =
7308 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7309 .getAddress(CGF);
7310 } else {
7311 ComponentLB =
7312 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7313 .getAddress(CGF);
7314 }
7315 Size = CGF.Builder.CreatePtrDiff(
7316 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7317 break;
7318 }
7319 }
7320 assert(Size && "Failed to determine structure size");
7321 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7322 CombinedInfo.BasePointers.push_back(BP.getPointer());
7323 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7324 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7325 CombinedInfo.Pointers.push_back(LB.getPointer());
7326 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7327 Size, CGF.Int64Ty, /*isSigned=*/true));
7328 CombinedInfo.Types.push_back(Flags);
7329 CombinedInfo.Mappers.push_back(nullptr);
7330 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7331 : 1);
7332 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7333 }
7334 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7335 CombinedInfo.BasePointers.push_back(BP.getPointer());
7336 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7337 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7338 CombinedInfo.Pointers.push_back(LB.getPointer());
7339 Size = CGF.Builder.CreatePtrDiff(
7340 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7341 LB.getPointer());
7342 CombinedInfo.Sizes.push_back(
7343 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7344 CombinedInfo.Types.push_back(Flags);
7345 CombinedInfo.Mappers.push_back(nullptr);
7346 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7347 : 1);
7348 break;
7349 }
7350 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7351 // Skip adding an entry in the CurInfo of this combined entry if the
7352 // whole struct is currently being mapped. The struct needs to be added
7353 // in the first position before any data internal to the struct is being
7354 // mapped.
7355 if (!IsMemberPointerOrAddr ||
7356 (Next == CE && MapType != OMPC_MAP_unknown)) {
7357 if (!IsMappingWholeStruct) {
7358 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7359 CombinedInfo.BasePointers.push_back(BP.getPointer());
7360 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7361 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7362 CombinedInfo.Pointers.push_back(LB.getPointer());
7363 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7364 Size, CGF.Int64Ty, /*isSigned=*/true));
7365 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7366 : 1);
7367 } else {
7368 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7369 StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
7370 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7371 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7372 StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
7373 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7374 Size, CGF.Int64Ty, /*isSigned=*/true));
7375 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7376 IsNonContiguous ? DimSize : 1);
7377 }
7378
7379 // If Mapper is valid, the last component inherits the mapper.
7380 bool HasMapper = Mapper && Next == CE;
7381 if (!IsMappingWholeStruct)
7382 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7383 else
7384 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7385 : nullptr);
7386
7387 // We need to add a pointer flag for each map that comes from the
7388 // same expression except for the first one. We also need to signal
7389 // this map is the first one that relates with the current capture
7390 // (there is a set of entries for each capture).
7391 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7392 MapType, MapModifiers, MotionModifiers, IsImplicit,
7393 !IsExpressionFirstInfo || RequiresReference ||
7394 FirstPointerInComplexData || IsMemberReference,
7395 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7396
7397 if (!IsExpressionFirstInfo || IsMemberReference) {
7398 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7399 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7400 if (IsPointer || (IsMemberReference && Next != CE))
7401 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7402 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7403 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7404 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7405 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7406
7407 if (ShouldBeMemberOf) {
7408 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7409 // should be later updated with the correct value of MEMBER_OF.
7410 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7411 // From now on, all subsequent PTR_AND_OBJ entries should not be
7412 // marked as MEMBER_OF.
7413 ShouldBeMemberOf = false;
7414 }
7415 }
7416
7417 if (!IsMappingWholeStruct)
7418 CombinedInfo.Types.push_back(Flags);
7419 else
7420 StructBaseCombinedInfo.Types.push_back(Flags);
7421 }
7422
7423 // If we have encountered a member expression so far, keep track of the
7424 // mapped member. If the parent is "*this", then the value declaration
7425 // is nullptr.
7426 if (EncounteredME) {
7427 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7428 unsigned FieldIndex = FD->getFieldIndex();
7429
7430 // Update info about the lowest and highest elements for this struct
7431 if (!PartialStruct.Base.isValid()) {
7432 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7433 if (IsFinalArraySection) {
7434 Address HB =
7435 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7436 .getAddress(CGF);
7437 PartialStruct.HighestElem = {FieldIndex, HB};
7438 } else {
7439 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7440 }
7441 PartialStruct.Base = BP;
7442 PartialStruct.LB = BP;
7443 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7444 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7445 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7446 if (IsFinalArraySection) {
7447 Address HB =
7448 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7449 .getAddress(CGF);
7450 PartialStruct.HighestElem = {FieldIndex, HB};
7451 } else {
7452 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7453 }
7454 }
7455 }
7456
7457 // Need to emit combined struct for array sections.
7458 if (IsFinalArraySection || IsNonContiguous)
7459 PartialStruct.IsArraySection = true;
7460
7461 // If we have a final array section, we are done with this expression.
7462 if (IsFinalArraySection)
7463 break;
7464
7465 // The pointer becomes the base for the next element.
7466 if (Next != CE)
7467 BP = IsMemberReference ? LowestElem : LB;
7468
7469 IsExpressionFirstInfo = false;
7470 IsCaptureFirstInfo = false;
7471 FirstPointerInComplexData = false;
7472 IsPrevMemberReference = IsMemberReference;
7473 } else if (FirstPointerInComplexData) {
7474 QualType Ty = Components.rbegin()
7475 ->getAssociatedDeclaration()
7476 ->getType()
7477 .getNonReferenceType();
7478 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7479 FirstPointerInComplexData = false;
7480 }
7481 }
7482 // If ran into the whole component - allocate the space for the whole
7483 // record.
7484 if (!EncounteredME)
7485 PartialStruct.HasCompleteRecord = true;
7486
7487 if (!IsNonContiguous)
7488 return;
7489
7490 const ASTContext &Context = CGF.getContext();
7491
7492 // For supporting stride in array section, we need to initialize the first
7493 // dimension size as 1, first offset as 0, and first count as 1
7494 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7495 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7496 MapValuesArrayTy CurStrides;
7497 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7498 uint64_t ElementTypeSize;
7499
7500 // Collect Size information for each dimension and get the element size as
7501 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7502 // should be [10, 10] and the first stride is 4 btyes.
7504 Components) {
7505 const Expr *AssocExpr = Component.getAssociatedExpression();
7506 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7507
7508 if (!OASE)
7509 continue;
7510
7512 auto *CAT = Context.getAsConstantArrayType(Ty);
7513 auto *VAT = Context.getAsVariableArrayType(Ty);
7514
7515 // We need all the dimension size except for the last dimension.
7516 assert((VAT || CAT || &Component == &*Components.begin()) &&
7517 "Should be either ConstantArray or VariableArray if not the "
7518 "first Component");
7519
7520 // Get element size if CurStrides is empty.
7521 if (CurStrides.empty()) {
7522 const Type *ElementType = nullptr;
7523 if (CAT)
7524 ElementType = CAT->getElementType().getTypePtr();
7525 else if (VAT)
7526 ElementType = VAT->getElementType().getTypePtr();
7527 else
7528 assert(&Component == &*Components.begin() &&
7529 "Only expect pointer (non CAT or VAT) when this is the "
7530 "first Component");
7531 // If ElementType is null, then it means the base is a pointer
7532 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7533 // for next iteration.
7534 if (ElementType) {
7535 // For the case that having pointer as base, we need to remove one
7536 // level of indirection.
7537 if (&Component != &*Components.begin())
7538 ElementType = ElementType->getPointeeOrArrayElementType();
7539 ElementTypeSize =
7540 Context.getTypeSizeInChars(ElementType).getQuantity();
7541 CurStrides.push_back(
7542 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7543 }
7544 }
7545 // Get dimension value except for the last dimension since we don't need
7546 // it.
7547 if (DimSizes.size() < Components.size() - 1) {
7548 if (CAT)
7549 DimSizes.push_back(llvm::ConstantInt::get(
7550 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7551 else if (VAT)
7552 DimSizes.push_back(CGF.Builder.CreateIntCast(
7553 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7554 /*IsSigned=*/false));
7555 }
7556 }
7557
7558 // Skip the dummy dimension since we have already have its information.
7559 auto *DI = DimSizes.begin() + 1;
7560 // Product of dimension.
7561 llvm::Value *DimProd =
7562 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7563
7564 // Collect info for non-contiguous. Notice that offset, count, and stride
7565 // are only meaningful for array-section, so we insert a null for anything
7566 // other than array-section.
7567 // Also, the size of offset, count, and stride are not the same as
7568 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7569 // count, and stride are the same as the number of non-contiguous
7570 // declaration in target update to/from clause.
7572 Components) {
7573 const Expr *AssocExpr = Component.getAssociatedExpression();
7574
7575 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7576 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7577 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7578 /*isSigned=*/false);
7579 CurOffsets.push_back(Offset);
7580 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7581 CurStrides.push_back(CurStrides.back());
7582 continue;
7583 }
7584
7585 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7586
7587 if (!OASE)
7588 continue;
7589
7590 // Offset
7591 const Expr *OffsetExpr = OASE->getLowerBound();
7592 llvm::Value *Offset = nullptr;
7593 if (!OffsetExpr) {
7594 // If offset is absent, then we just set it to zero.
7595 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7596 } else {
7597 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7598 CGF.Int64Ty,
7599 /*isSigned=*/false);
7600 }
7601 CurOffsets.push_back(Offset);
7602
7603 // Count
7604 const Expr *CountExpr = OASE->getLength();
7605 llvm::Value *Count = nullptr;
7606 if (!CountExpr) {
7607 // In Clang, once a high dimension is an array section, we construct all
7608 // the lower dimension as array section, however, for case like
7609 // arr[0:2][2], Clang construct the inner dimension as an array section
7610 // but it actually is not in an array section form according to spec.
7611 if (!OASE->getColonLocFirst().isValid() &&
7612 !OASE->getColonLocSecond().isValid()) {
7613 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7614 } else {
7615 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7616 // When the length is absent it defaults to ⌈(size −
7617 // lower-bound)/stride⌉, where size is the size of the array
7618 // dimension.
7619 const Expr *StrideExpr = OASE->getStride();
7620 llvm::Value *Stride =
7621 StrideExpr
7622 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7623 CGF.Int64Ty, /*isSigned=*/false)
7624 : nullptr;
7625 if (Stride)
7626 Count = CGF.Builder.CreateUDiv(
7627 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7628 else
7629 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7630 }
7631 } else {
7632 Count = CGF.EmitScalarExpr(CountExpr);
7633 }
7634 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7635 CurCounts.push_back(Count);
7636
7637 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7638 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7639 // Offset Count Stride
7640 // D0 0 1 4 (int) <- dummy dimension
7641 // D1 0 2 8 (2 * (1) * 4)
7642 // D2 1 2 20 (1 * (1 * 5) * 4)
7643 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7644 const Expr *StrideExpr = OASE->getStride();
7645 llvm::Value *Stride =
7646 StrideExpr
7647 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7648 CGF.Int64Ty, /*isSigned=*/false)
7649 : nullptr;
7650 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7651 if (Stride)
7652 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7653 else
7654 CurStrides.push_back(DimProd);
7655 if (DI != DimSizes.end())
7656 ++DI;
7657 }
7658
7659 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7660 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7661 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7662 }
7663
7664 /// Return the adjusted map modifiers if the declaration a capture refers to
7665 /// appears in a first-private clause. This is expected to be used only with
7666 /// directives that start with 'target'.
7667 OpenMPOffloadMappingFlags
7668 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7669 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7670
7671 // A first private variable captured by reference will use only the
7672 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7673 // declaration is known as first-private in this handler.
7674 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7675 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7676 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7677 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7678 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7679 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7680 }
7681 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7682 if (I != LambdasMap.end())
7683 // for map(to: lambda): using user specified map type.
7684 return getMapTypeBits(
7685 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7686 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7687 /*AddPtrFlag=*/false,
7688 /*AddIsTargetParamFlag=*/false,
7689 /*isNonContiguous=*/false);
7690 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7691 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7692 }
7693
7694 void getPlainLayout(const CXXRecordDecl *RD,
7696 bool AsBase) const {
7697 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7698
7699 llvm::StructType *St =
7700 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7701
7702 unsigned NumElements = St->getNumElements();
7704 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7705 RecordLayout(NumElements);
7706
7707 // Fill bases.
7708 for (const auto &I : RD->bases()) {
7709 if (I.isVirtual())
7710 continue;
7711 const auto *Base = I.getType()->getAsCXXRecordDecl();
7712 // Ignore empty bases.
7713 if (Base->isEmpty() || CGF.getContext()
7716 .isZero())
7717 continue;
7718
7719 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7720 RecordLayout[FieldIndex] = Base;
7721 }
7722 // Fill in virtual bases.
7723 for (const auto &I : RD->vbases()) {
7724 const auto *Base = I.getType()->getAsCXXRecordDecl();
7725 // Ignore empty bases.
7726 if (Base->isEmpty())
7727 continue;
7728 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7729 if (RecordLayout[FieldIndex])
7730 continue;
7731 RecordLayout[FieldIndex] = Base;
7732 }
7733 // Fill in all the fields.
7734 assert(!RD->isUnion() && "Unexpected union.");
7735 for (const auto *Field : RD->fields()) {
7736 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7737 // will fill in later.)
7738 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7739 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7740 RecordLayout[FieldIndex] = Field;
7741 }
7742 }
7743 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7744 &Data : RecordLayout) {
7745 if (Data.isNull())
7746 continue;
7747 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7748 getPlainLayout(Base, Layout, /*AsBase=*/true);
7749 else
7750 Layout.push_back(Data.get<const FieldDecl *>());
7751 }
7752 }
7753
7754 /// Generate all the base pointers, section pointers, sizes, map types, and
7755 /// mappers for the extracted mappable expressions (all included in \a
7756 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7757 /// pair of the relevant declaration and index where it occurs is appended to
7758 /// the device pointers info array.
7759 void generateAllInfoForClauses(
7760 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7761 llvm::OpenMPIRBuilder &OMPBuilder,
7762 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7764 // We have to process the component lists that relate with the same
7765 // declaration in a single chunk so that we can generate the map flags
7766 // correctly. Therefore, we organize all lists in a map.
7767 enum MapKind { Present, Allocs, Other, Total };
7768 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7770 Info;
7771
7772 // Helper function to fill the information map for the different supported
7773 // clauses.
7774 auto &&InfoGen =
7775 [&Info, &SkipVarSet](
7776 const ValueDecl *D, MapKind Kind,
7778 OpenMPMapClauseKind MapType,
7780 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7781 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7782 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7783 if (SkipVarSet.contains(D))
7784 return;
7785 auto It = Info.find(D);
7786 if (It == Info.end())
7787 It = Info
7788 .insert(std::make_pair(
7789 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7790 .first;
7791 It->second[Kind].emplace_back(
7792 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7793 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7794 };
7795
7796 for (const auto *Cl : Clauses) {
7797 const auto *C = dyn_cast<OMPMapClause>(Cl);
7798 if (!C)
7799 continue;
7800 MapKind Kind = Other;
7801 if (llvm::is_contained(C->getMapTypeModifiers(),
7802 OMPC_MAP_MODIFIER_present))
7803 Kind = Present;
7804 else if (C->getMapType() == OMPC_MAP_alloc)
7805 Kind = Allocs;
7806 const auto *EI = C->getVarRefs().begin();
7807 for (const auto L : C->component_lists()) {
7808 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7809 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7810 C->getMapTypeModifiers(), std::nullopt,
7811 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7812 E);
7813 ++EI;
7814 }
7815 }
7816 for (const auto *Cl : Clauses) {
7817 const auto *C = dyn_cast<OMPToClause>(Cl);
7818 if (!C)
7819 continue;
7820 MapKind Kind = Other;
7821 if (llvm::is_contained(C->getMotionModifiers(),
7822 OMPC_MOTION_MODIFIER_present))
7823 Kind = Present;
7824 const auto *EI = C->getVarRefs().begin();
7825 for (const auto L : C->component_lists()) {
7826 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7827 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7828 C->isImplicit(), std::get<2>(L), *EI);
7829 ++EI;
7830 }
7831 }
7832 for (const auto *Cl : Clauses) {
7833 const auto *C = dyn_cast<OMPFromClause>(Cl);
7834 if (!C)
7835 continue;
7836 MapKind Kind = Other;
7837 if (llvm::is_contained(C->getMotionModifiers(),
7838 OMPC_MOTION_MODIFIER_present))
7839 Kind = Present;
7840 const auto *EI = C->getVarRefs().begin();
7841 for (const auto L : C->component_lists()) {
7842 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7843 std::nullopt, C->getMotionModifiers(),
7844 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7845 *EI);
7846 ++EI;
7847 }
7848 }
7849
7850 // Look at the use_device_ptr and use_device_addr clauses information and
7851 // mark the existing map entries as such. If there is no map information for
7852 // an entry in the use_device_ptr and use_device_addr list, we create one
7853 // with map type 'alloc' and zero size section. It is the user fault if that
7854 // was not mapped before. If there is no map information and the pointer is
7855 // a struct member, then we defer the emission of that entry until the whole
7856 // struct has been processed.
7857 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7859 DeferredInfo;
7860 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7861
7862 auto &&UseDeviceDataCombinedInfoGen =
7863 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7864 CodeGenFunction &CGF, bool IsDevAddr) {
7865 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7866 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7867 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7868 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7869 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7870 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7871 UseDeviceDataCombinedInfo.Sizes.push_back(
7872 llvm::Constant::getNullValue(CGF.Int64Ty));
7873 UseDeviceDataCombinedInfo.Types.push_back(
7874 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7875 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7876 };
7877
7878 auto &&MapInfoGen =
7879 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7880 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7882 Components,
7883 bool IsImplicit, bool IsDevAddr) {
7884 // We didn't find any match in our map information - generate a zero
7885 // size array section - if the pointer is a struct member we defer
7886 // this action until the whole struct has been processed.
7887 if (isa<MemberExpr>(IE)) {
7888 // Insert the pointer into Info to be processed by
7889 // generateInfoForComponentList. Because it is a member pointer
7890 // without a pointee, no entry will be generated for it, therefore
7891 // we need to generate one after the whole struct has been
7892 // processed. Nonetheless, generateInfoForComponentList must be
7893 // called to take the pointer into account for the calculation of
7894 // the range of the partial struct.
7895 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7896 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7897 nullptr, nullptr, IsDevAddr);
7898 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7899 } else {
7900 llvm::Value *Ptr;
7901 if (IsDevAddr) {
7902 if (IE->isGLValue())
7903 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7904 else
7905 Ptr = CGF.EmitScalarExpr(IE);
7906 } else {
7907 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7908 }
7909 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7910 }
7911 };
7912
7913 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7914 const Expr *IE, bool IsDevAddr) -> bool {
7915 // We potentially have map information for this declaration already.
7916 // Look for the first set of components that refer to it. If found,
7917 // return true.
7918 // If the first component is a member expression, we have to look into
7919 // 'this', which maps to null in the map of map information. Otherwise
7920 // look directly for the information.
7921 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7922 if (It != Info.end()) {
7923 bool Found = false;
7924 for (auto &Data : It->second) {
7925 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7926 return MI.Components.back().getAssociatedDeclaration() == VD;
7927 });
7928 // If we found a map entry, signal that the pointer has to be
7929 // returned and move on to the next declaration. Exclude cases where
7930 // the base pointer is mapped as array subscript, array section or
7931 // array shaping. The base address is passed as a pointer to base in
7932 // this case and cannot be used as a base for use_device_ptr list
7933 // item.
7934 if (CI != Data.end()) {
7935 if (IsDevAddr) {
7936 CI->ForDeviceAddr = IsDevAddr;
7937 CI->ReturnDevicePointer = true;
7938 Found = true;
7939 break;
7940 } else {
7941 auto PrevCI = std::next(CI->Components.rbegin());
7942 const auto *VarD = dyn_cast<VarDecl>(VD);
7944 isa<MemberExpr>(IE) ||
7945 !VD->getType().getNonReferenceType()->isPointerType() ||
7946 PrevCI == CI->Components.rend() ||
7947 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7948 VarD->hasLocalStorage()) {
7949 CI->ForDeviceAddr = IsDevAddr;
7950 CI->ReturnDevicePointer = true;
7951 Found = true;
7952 break;
7953 }
7954 }
7955 }
7956 }
7957 return Found;
7958 }
7959 return false;
7960 };
7961
7962 // Look at the use_device_ptr clause information and mark the existing map
7963 // entries as such. If there is no map information for an entry in the
7964 // use_device_ptr list, we create one with map type 'alloc' and zero size
7965 // section. It is the user fault if that was not mapped before. If there is
7966 // no map information and the pointer is a struct member, then we defer the
7967 // emission of that entry until the whole struct has been processed.
7968 for (const auto *Cl : Clauses) {
7969 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7970 if (!C)
7971 continue;
7972 for (const auto L : C->component_lists()) {
7974 std::get<1>(L);
7975 assert(!Components.empty() &&
7976 "Not expecting empty list of components!");
7977 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7978 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7979 const Expr *IE = Components.back().getAssociatedExpression();
7980 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7981 continue;
7982 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7983 /*IsDevAddr=*/false);
7984 }
7985 }
7986
7987 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7988 for (const auto *Cl : Clauses) {
7989 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7990 if (!C)
7991 continue;
7992 for (const auto L : C->component_lists()) {
7994 std::get<1>(L);
7995 assert(!std::get<1>(L).empty() &&
7996 "Not expecting empty list of components!");
7997 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7998 if (!Processed.insert(VD).second)
7999 continue;
8000 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8001 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8002 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8003 continue;
8004 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8005 /*IsDevAddr=*/true);
8006 }
8007 }
8008
8009 for (const auto &Data : Info) {
8010 StructRangeInfoTy PartialStruct;
8011 // Current struct information:
8012 MapCombinedInfoTy CurInfo;
8013 // Current struct base information:
8014 MapCombinedInfoTy StructBaseCurInfo;
8015 const Decl *D = Data.first;
8016 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8017 for (const auto &M : Data.second) {
8018 for (const MapInfo &L : M) {
8019 assert(!L.Components.empty() &&
8020 "Not expecting declaration with no component lists.");
8021
8022 // Remember the current base pointer index.
8023 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8024 unsigned StructBasePointersIdx =
8025 StructBaseCurInfo.BasePointers.size();
8026 CurInfo.NonContigInfo.IsNonContiguous =
8027 L.Components.back().isNonContiguous();
8028 generateInfoForComponentList(
8029 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8030 CurInfo, StructBaseCurInfo, PartialStruct,
8031 /*IsFirstComponentList=*/false, L.IsImplicit,
8032 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8033 L.VarRef);
8034
8035 // If this entry relates to a device pointer, set the relevant
8036 // declaration and add the 'return pointer' flag.
8037 if (L.ReturnDevicePointer) {
8038 // Check whether a value was added to either CurInfo or
8039 // StructBaseCurInfo and error if no value was added to either of
8040 // them:
8041 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8042 StructBasePointersIdx <
8043 StructBaseCurInfo.BasePointers.size()) &&
8044 "Unexpected number of mapped base pointers.");
8045
8046 // Choose a base pointer index which is always valid:
8047 const ValueDecl *RelevantVD =
8048 L.Components.back().getAssociatedDeclaration();
8049 assert(RelevantVD &&
8050 "No relevant declaration related with device pointer??");
8051
8052 // If StructBaseCurInfo has been updated this iteration then work on
8053 // the first new entry added to it i.e. make sure that when multiple
8054 // values are added to any of the lists, the first value added is
8055 // being modified by the assignments below (not the last value
8056 // added).
8057 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8058 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8059 RelevantVD;
8060 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8061 L.ForDeviceAddr ? DeviceInfoTy::Address
8062 : DeviceInfoTy::Pointer;
8063 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8064 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8065 } else {
8066 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8067 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8068 L.ForDeviceAddr ? DeviceInfoTy::Address
8069 : DeviceInfoTy::Pointer;
8070 CurInfo.Types[CurrentBasePointersIdx] |=
8071 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8072 }
8073 }
8074 }
8075 }
8076
8077 // Append any pending zero-length pointers which are struct members and
8078 // used with use_device_ptr or use_device_addr.
8079 auto CI = DeferredInfo.find(Data.first);
8080 if (CI != DeferredInfo.end()) {
8081 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8082 llvm::Value *BasePtr;
8083 llvm::Value *Ptr;
8084 if (L.ForDeviceAddr) {
8085 if (L.IE->isGLValue())
8086 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8087 else
8088 Ptr = this->CGF.EmitScalarExpr(L.IE);
8089 BasePtr = Ptr;
8090 // Entry is RETURN_PARAM. Also, set the placeholder value
8091 // MEMBER_OF=FFFF so that the entry is later updated with the
8092 // correct value of MEMBER_OF.
8093 CurInfo.Types.push_back(
8094 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8095 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8096 } else {
8097 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8098 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8099 L.IE->getExprLoc());
8100 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8101 // placeholder value MEMBER_OF=FFFF so that the entry is later
8102 // updated with the correct value of MEMBER_OF.
8103 CurInfo.Types.push_back(
8104 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8105 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8106 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8107 }
8108 CurInfo.Exprs.push_back(L.VD);
8109 CurInfo.BasePointers.emplace_back(BasePtr);
8110 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8111 CurInfo.DevicePointers.emplace_back(
8112 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8113 CurInfo.Pointers.push_back(Ptr);
8114 CurInfo.Sizes.push_back(
8115 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8116 CurInfo.Mappers.push_back(nullptr);
8117 }
8118 }
8119
8120 // Unify entries in one list making sure the struct mapping precedes the
8121 // individual fields:
8122 MapCombinedInfoTy UnionCurInfo;
8123 UnionCurInfo.append(StructBaseCurInfo);
8124 UnionCurInfo.append(CurInfo);
8125
8126 // If there is an entry in PartialStruct it means we have a struct with
8127 // individual members mapped. Emit an extra combined entry.
8128 if (PartialStruct.Base.isValid()) {
8129 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8130 // Emit a combined entry:
8131 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8132 /*IsMapThis*/ !VD, OMPBuilder, VD);
8133 }
8134
8135 // We need to append the results of this capture to what we already have.
8136 CombinedInfo.append(UnionCurInfo);
8137 }
8138 // Append data for use_device_ptr clauses.
8139 CombinedInfo.append(UseDeviceDataCombinedInfo);
8140 }
8141
8142public:
8143 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8144 : CurDir(&Dir), CGF(CGF) {
8145 // Extract firstprivate clause information.
8146 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8147 for (const auto *D : C->varlists())
8148 FirstPrivateDecls.try_emplace(
8149 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8150 // Extract implicit firstprivates from uses_allocators clauses.
8151 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8152 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8153 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8154 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8155 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8156 /*Implicit=*/true);
8157 else if (const auto *VD = dyn_cast<VarDecl>(
8158 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8159 ->getDecl()))
8160 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8161 }
8162 }
8163 // Extract device pointer clause information.
8164 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8165 for (auto L : C->component_lists())
8166 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8167 // Extract device addr clause information.
8168 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8169 for (auto L : C->component_lists())
8170 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8171 // Extract map information.
8172 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8173 if (C->getMapType() != OMPC_MAP_to)
8174 continue;
8175 for (auto L : C->component_lists()) {
8176 const ValueDecl *VD = std::get<0>(L);
8177 const auto *RD = VD ? VD->getType()
8181 : nullptr;
8182 if (RD && RD->isLambda())
8183 LambdasMap.try_emplace(std::get<0>(L), C);
8184 }
8185 }
8186 }
8187
8188 /// Constructor for the declare mapper directive.
8189 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8190 : CurDir(&Dir), CGF(CGF) {}
8191
8192 /// Generate code for the combined entry if we have a partially mapped struct
8193 /// and take care of the mapping flags of the arguments corresponding to
8194 /// individual struct members.
8195 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8196 MapFlagsArrayTy &CurTypes,
8197 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8198 llvm::OpenMPIRBuilder &OMPBuilder,
8199 const ValueDecl *VD = nullptr,
8200 bool NotTargetParams = true) const {
8201 if (CurTypes.size() == 1 &&
8202 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8203 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8204 !PartialStruct.IsArraySection)
8205 return;
8206 Address LBAddr = PartialStruct.LowestElem.second;
8207 Address HBAddr = PartialStruct.HighestElem.second;
8208 if (PartialStruct.HasCompleteRecord) {
8209 LBAddr = PartialStruct.LB;
8210 HBAddr = PartialStruct.LB;
8211 }
8212 CombinedInfo.Exprs.push_back(VD);
8213 // Base is the base of the struct
8214 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8215 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8216 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8217 // Pointer is the address of the lowest element
8218 llvm::Value *LB = LBAddr.getPointer();
8219 const CXXMethodDecl *MD =
8220 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8221 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8222 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8223 // There should not be a mapper for a combined entry.
8224 if (HasBaseClass) {
8225 // OpenMP 5.2 148:21:
8226 // If the target construct is within a class non-static member function,
8227 // and a variable is an accessible data member of the object for which the
8228 // non-static data member function is invoked, the variable is treated as
8229 // if the this[:1] expression had appeared in a map clause with a map-type
8230 // of tofrom.
8231 // Emit this[:1]
8232 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8234 llvm::Value *Size =
8235 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8236 /*isSigned=*/true);
8237 CombinedInfo.Sizes.push_back(Size);
8238 } else {
8239 CombinedInfo.Pointers.push_back(LB);
8240 // Size is (addr of {highest+1} element) - (addr of lowest element)
8241 llvm::Value *HB = HBAddr.getPointer();
8242 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8243 HBAddr.getElementType(), HB, /*Idx0=*/1);
8244 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8245 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8246 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8247 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8248 /*isSigned=*/false);
8249 CombinedInfo.Sizes.push_back(Size);
8250 }
8251 CombinedInfo.Mappers.push_back(nullptr);
8252 // Map type is always TARGET_PARAM, if generate info for captures.
8253 CombinedInfo.Types.push_back(
8254 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8255 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8256 // If any element has the present modifier, then make sure the runtime
8257 // doesn't attempt to allocate the struct.
8258 if (CurTypes.end() !=
8259 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8260 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8261 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8262 }))
8263 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8264 // Remove TARGET_PARAM flag from the first element
8265 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8266 // If any element has the ompx_hold modifier, then make sure the runtime
8267 // uses the hold reference count for the struct as a whole so that it won't
8268 // be unmapped by an extra dynamic reference count decrement. Add it to all
8269 // elements as well so the runtime knows which reference count to check
8270 // when determining whether it's time for device-to-host transfers of
8271 // individual elements.
8272 if (CurTypes.end() !=
8273 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8274 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8275 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8276 })) {
8277 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8278 for (auto &M : CurTypes)
8279 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8280 }
8281
8282 // All other current entries will be MEMBER_OF the combined entry
8283 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8284 // 0xFFFF in the MEMBER_OF field).
8285 OpenMPOffloadMappingFlags MemberOfFlag =
8286 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8287 for (auto &M : CurTypes)
8288 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8289 }
8290
8291 /// Generate all the base pointers, section pointers, sizes, map types, and
8292 /// mappers for the extracted mappable expressions (all included in \a
8293 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8294 /// pair of the relevant declaration and index where it occurs is appended to
8295 /// the device pointers info array.
8296 void generateAllInfo(
8297 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8298 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8300 assert(CurDir.is<const OMPExecutableDirective *>() &&
8301 "Expect a executable directive");
8302 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8303 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8304 SkipVarSet);
8305 }
8306
8307 /// Generate all the base pointers, section pointers, sizes, map types, and
8308 /// mappers for the extracted map clauses of user-defined mapper (all included
8309 /// in \a CombinedInfo).
8310 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8311 llvm::OpenMPIRBuilder &OMPBuilder) const {
8312 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8313 "Expect a declare mapper directive");
8314 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8315 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8316 OMPBuilder);
8317 }
8318
8319 /// Emit capture info for lambdas for variables captured by reference.
8320 void generateInfoForLambdaCaptures(
8321 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8322 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8324 const auto *RD = VDType->getAsCXXRecordDecl();
8325 if (!RD || !RD->isLambda())
8326 return;
8327 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8328 CGF.getContext().getDeclAlign(VD));
8329 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8330 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8331 FieldDecl *ThisCapture = nullptr;
8332 RD->getCaptureFields(Captures, ThisCapture);
8333 if (ThisCapture) {
8334 LValue ThisLVal =
8335 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8336 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8337 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8338 VDLVal.getPointer(CGF));
8339 CombinedInfo.Exprs.push_back(VD);
8340 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8341 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8342 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8343 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8344 CombinedInfo.Sizes.push_back(
8345 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8346 CGF.Int64Ty, /*isSigned=*/true));
8347 CombinedInfo.Types.push_back(
8348 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8349 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8350 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8351 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8352 CombinedInfo.Mappers.push_back(nullptr);
8353 }
8354 for (const LambdaCapture &LC : RD->captures()) {
8355 if (!LC.capturesVariable())
8356 continue;
8357 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8358 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8359 continue;
8360 auto It = Captures.find(VD);
8361 assert(It != Captures.end() && "Found lambda capture without field.");
8362 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8363 if (LC.getCaptureKind() == LCK_ByRef) {
8364 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8365 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8366 VDLVal.getPointer(CGF));
8367 CombinedInfo.Exprs.push_back(VD);
8368 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8369 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8370 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8371 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8372 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8373 CGF.getTypeSize(
8375 CGF.Int64Ty, /*isSigned=*/true));
8376 } else {
8377 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8378 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8379 VDLVal.getPointer(CGF));
8380 CombinedInfo.Exprs.push_back(VD);
8381 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8382 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8383 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8384 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8385 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8386 }
8387 CombinedInfo.Types.push_back(
8388 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8389 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8390 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8391 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8392 CombinedInfo.Mappers.push_back(nullptr);
8393 }
8394 }
8395
8396 /// Set correct indices for lambdas captures.
8397 void adjustMemberOfForLambdaCaptures(
8398 llvm::OpenMPIRBuilder &OMPBuilder,
8399 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8400 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8401 MapFlagsArrayTy &Types) const {
8402 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8403 // Set correct member_of idx for all implicit lambda captures.
8404 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8405 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8406 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8407 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8408 continue;
8409 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8410 assert(BasePtr && "Unable to find base lambda address.");
8411 int TgtIdx = -1;
8412 for (unsigned J = I; J > 0; --J) {
8413 unsigned Idx = J - 1;
8414 if (Pointers[Idx] != BasePtr)
8415 continue;
8416 TgtIdx = Idx;
8417 break;
8418 }
8419 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8420 // All other current entries will be MEMBER_OF the combined entry
8421 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8422 // 0xFFFF in the MEMBER_OF field).
8423 OpenMPOffloadMappingFlags MemberOfFlag =
8424 OMPBuilder.getMemberOfFlag(TgtIdx);
8425 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8426 }
8427 }
8428
8429 /// Generate the base pointers, section pointers, sizes, map types, and
8430 /// mappers associated to a given capture (all included in \a CombinedInfo).
8431 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8432 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8433 StructRangeInfoTy &PartialStruct) const {
8434 assert(!Cap->capturesVariableArrayType() &&
8435 "Not expecting to generate map info for a variable array type!");
8436
8437 // We need to know when we generating information for the first component
8438 const ValueDecl *VD = Cap->capturesThis()
8439 ? nullptr
8440 : Cap->getCapturedVar()->getCanonicalDecl();
8441
8442 // for map(to: lambda): skip here, processing it in
8443 // generateDefaultMapInfo
8444 if (LambdasMap.count(VD))
8445 return;
8446
8447 // If this declaration appears in a is_device_ptr clause we just have to
8448 // pass the pointer by value. If it is a reference to a declaration, we just
8449 // pass its value.
8450 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8451 CombinedInfo.Exprs.push_back(VD);
8452 CombinedInfo.BasePointers.emplace_back(Arg);
8453 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8454 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8455 CombinedInfo.Pointers.push_back(Arg);
8456 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8457 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8458 /*isSigned=*/true));
8459 CombinedInfo.Types.push_back(
8460 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8461 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8462 CombinedInfo.Mappers.push_back(nullptr);
8463 return;
8464 }
8465
8466 using MapData =
8469 const ValueDecl *, const Expr *>;
8470 SmallVector<MapData, 4> DeclComponentLists;
8471 // For member fields list in is_device_ptr, store it in
8472 // DeclComponentLists for generating components info.
8474 auto It = DevPointersMap.find(VD);
8475 if (It != DevPointersMap.end())
8476 for (const auto &MCL : It->second)
8477 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8478 /*IsImpicit = */ true, nullptr,
8479 nullptr);
8480 auto I = HasDevAddrsMap.find(VD);
8481 if (I != HasDevAddrsMap.end())
8482 for (const auto &MCL : I->second)
8483 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8484 /*IsImpicit = */ true, nullptr,
8485 nullptr);
8486 assert(CurDir.is<const OMPExecutableDirective *>() &&
8487 "Expect a executable directive");
8488 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8489 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8490 const auto *EI = C->getVarRefs().begin();
8491 for (const auto L : C->decl_component_lists(VD)) {
8492 const ValueDecl *VDecl, *Mapper;
8493 // The Expression is not correct if the mapping is implicit
8494 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8496 std::tie(VDecl, Components, Mapper) = L;
8497 assert(VDecl == VD && "We got information for the wrong declaration??");
8498 assert(!Components.empty() &&
8499 "Not expecting declaration with no component lists.");
8500 DeclComponentLists.emplace_back(Components, C->getMapType(),
8501 C->getMapTypeModifiers(),
8502 C->isImplicit(), Mapper, E);
8503 ++EI;
8504 }
8505 }
8506 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8507 const MapData &RHS) {
8508 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8509 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8510 bool HasPresent =
8511 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8512 bool HasAllocs = MapType == OMPC_MAP_alloc;
8513 MapModifiers = std::get<2>(RHS);
8514 MapType = std::get<1>(LHS);
8515 bool HasPresentR =
8516 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8517 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8518 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8519 });
8520
8521 // Find overlapping elements (including the offset from the base element).
8522 llvm::SmallDenseMap<
8523 const MapData *,
8526 4>
8527 OverlappedData;
8528 size_t Count = 0;
8529 for (const MapData &L : DeclComponentLists) {
8531 OpenMPMapClauseKind MapType;
8533 bool IsImplicit;
8534 const ValueDecl *Mapper;
8535 const Expr *VarRef;
8536 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8537 L;
8538 ++Count;
8539 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8541 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8542 VarRef) = L1;
8543 auto CI = Components.rbegin();
8544 auto CE = Components.rend();
8545 auto SI = Components1.rbegin();
8546 auto SE = Components1.rend();
8547 for (; CI != CE && SI != SE; ++CI, ++SI) {
8548 if (CI->getAssociatedExpression()->getStmtClass() !=
8549 SI->getAssociatedExpression()->getStmtClass())
8550 break;
8551 // Are we dealing with different variables/fields?
8552 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8553 break;
8554 }
8555 // Found overlapping if, at least for one component, reached the head
8556 // of the components list.
8557 if (CI == CE || SI == SE) {
8558 // Ignore it if it is the same component.
8559 if (CI == CE && SI == SE)
8560 continue;
8561 const auto It = (SI == SE) ? CI : SI;
8562 // If one component is a pointer and another one is a kind of
8563 // dereference of this pointer (array subscript, section, dereference,
8564 // etc.), it is not an overlapping.
8565 // Same, if one component is a base and another component is a
8566 // dereferenced pointer memberexpr with the same base.
8567 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8568 (std::prev(It)->getAssociatedDeclaration() &&
8569 std::prev(It)
8570 ->getAssociatedDeclaration()
8571 ->getType()
8572 ->isPointerType()) ||
8573 (It->getAssociatedDeclaration() &&
8574 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8575 std::next(It) != CE && std::next(It) != SE))
8576 continue;
8577 const MapData &BaseData = CI == CE ? L : L1;
8579 SI == SE ? Components : Components1;
8580 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8581 OverlappedElements.getSecond().push_back(SubData);
8582 }
8583 }
8584 }
8585 // Sort the overlapped elements for each item.
8587 if (!OverlappedData.empty()) {
8588 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8589 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8590 while (BaseType != OrigType) {
8591 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8592 OrigType = BaseType->getPointeeOrArrayElementType();
8593 }
8594
8595 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8596 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8597 else {
8598 const auto *RD = BaseType->getAsRecordDecl();
8599 Layout.append(RD->field_begin(), RD->field_end());
8600 }
8601 }
8602 for (auto &Pair : OverlappedData) {
8603 llvm::stable_sort(
8604 Pair.getSecond(),
8605 [&Layout](
8608 Second) {
8609 auto CI = First.rbegin();
8610 auto CE = First.rend();
8611 auto SI = Second.rbegin();
8612 auto SE = Second.rend();
8613 for (; CI != CE && SI != SE; ++CI, ++SI) {
8614 if (CI->getAssociatedExpression()->getStmtClass() !=
8615 SI->getAssociatedExpression()->getStmtClass())
8616 break;
8617 // Are we dealing with different variables/fields?
8618 if (CI->getAssociatedDeclaration() !=
8619 SI->getAssociatedDeclaration())
8620 break;
8621 }
8622
8623 // Lists contain the same elements.
8624 if (CI == CE && SI == SE)
8625 return false;
8626
8627 // List with less elements is less than list with more elements.
8628 if (CI == CE || SI == SE)
8629 return CI == CE;
8630
8631 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8632 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8633 if (FD1->getParent() == FD2->getParent())
8634 return FD1->getFieldIndex() < FD2->getFieldIndex();
8635 const auto *It =
8636 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8637 return FD == FD1 || FD == FD2;
8638 });
8639 return *It == FD1;
8640 });
8641 }
8642
8643 // Associated with a capture, because the mapping flags depend on it.
8644 // Go through all of the elements with the overlapped elements.
8645 bool IsFirstComponentList = true;
8646 MapCombinedInfoTy StructBaseCombinedInfo;
8647 for (const auto &Pair : OverlappedData) {
8648 const MapData &L = *Pair.getFirst();
8650 OpenMPMapClauseKind MapType;
8652 bool IsImplicit;
8653 const ValueDecl *Mapper;
8654 const Expr *VarRef;
8655 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8656 L;
8658 OverlappedComponents = Pair.getSecond();
8659 generateInfoForComponentList(
8660 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8661 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8662 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8663 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8664 IsFirstComponentList = false;
8665 }
8666 // Go through other elements without overlapped elements.
8667 for (const MapData &L : DeclComponentLists) {
8669 OpenMPMapClauseKind MapType;
8671 bool IsImplicit;
8672 const ValueDecl *Mapper;
8673 const Expr *VarRef;
8674 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8675 L;
8676 auto It = OverlappedData.find(&L);
8677 if (It == OverlappedData.end())
8678 generateInfoForComponentList(
8679 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8680 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8681 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8682 /*ForDeviceAddr=*/false, VD, VarRef);
8683 IsFirstComponentList = false;
8684 }
8685 }
8686
8687 /// Generate the default map information for a given capture \a CI,
8688 /// record field declaration \a RI and captured value \a CV.
8689 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8690 const FieldDecl &RI, llvm::Value *CV,
8691 MapCombinedInfoTy &CombinedInfo) const {
8692 bool IsImplicit = true;
8693 // Do the default mapping.
8694 if (CI.capturesThis()) {
8695 CombinedInfo.Exprs.push_back(nullptr);
8696 CombinedInfo.BasePointers.push_back(CV);
8697 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8698 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8699 CombinedInfo.Pointers.push_back(CV);
8700 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8701 CombinedInfo.Sizes.push_back(
8702 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8703 CGF.Int64Ty, /*isSigned=*/true));
8704 // Default map type.
8705 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8706 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8707 } else if (CI.capturesVariableByCopy()) {
8708 const VarDecl *VD = CI.getCapturedVar();
8709 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8710 CombinedInfo.BasePointers.push_back(CV);
8711 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8712 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8713 CombinedInfo.Pointers.push_back(CV);
8714 if (!RI.getType()->isAnyPointerType()) {
8715 // We have to signal to the runtime captures passed by value that are
8716 // not pointers.
8717 CombinedInfo.Types.push_back(
8718 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8719 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8720 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8721 } else {
8722 // Pointers are implicitly mapped with a zero size and no flags
8723 // (other than first map that is added for all implicit maps).
8724 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8725 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8726 }
8727 auto I = FirstPrivateDecls.find(VD);
8728 if (I != FirstPrivateDecls.end())
8729 IsImplicit = I->getSecond();
8730 } else {
8731 assert(CI.capturesVariable() && "Expected captured reference.");
8732 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8733 QualType ElementType = PtrTy->getPointeeType();
8734 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8735 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8736 // The default map type for a scalar/complex type is 'to' because by
8737 // default the value doesn't have to be retrieved. For an aggregate
8738 // type, the default is 'tofrom'.
8739 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8740 const VarDecl *VD = CI.getCapturedVar();
8741 auto I = FirstPrivateDecls.find(VD);
8742 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8743 CombinedInfo.BasePointers.push_back(CV);
8744 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8745 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8746 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8747 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8748 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8750 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8751 } else {
8752 CombinedInfo.Pointers.push_back(CV);
8753 }
8754 if (I != FirstPrivateDecls.end())
8755 IsImplicit = I->getSecond();
8756 }
8757 // Every default map produces a single argument which is a target parameter.
8758 CombinedInfo.Types.back() |=
8759 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8760
8761 // Add flag stating this is an implicit map.
8762 if (IsImplicit)
8763 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8764
8765 // No user-defined mapper for default mapping.
8766 CombinedInfo.Mappers.push_back(nullptr);
8767 }
8768};
8769} // anonymous namespace
8770
8771// Try to extract the base declaration from a `this->x` expression if possible.
8773 if (!E)
8774 return nullptr;
8775
8776 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8777 if (const MemberExpr *ME =
8778 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8779 return ME->getMemberDecl();
8780 return nullptr;
8781}
8782
8783/// Emit a string constant containing the names of the values mapped to the
8784/// offloading runtime library.
8785llvm::Constant *
8786emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8787 MappableExprsHandler::MappingExprInfo &MapExprs) {
8788
8789 uint32_t SrcLocStrSize;
8790 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8791 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8792
8793 SourceLocation Loc;
8794 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8795 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8796 Loc = VD->getLocation();
8797 else
8798 Loc = MapExprs.getMapExpr()->getExprLoc();
8799 } else {
8800 Loc = MapExprs.getMapDecl()->getLocation();
8801 }
8802
8803 std::string ExprName;
8804 if (MapExprs.getMapExpr()) {
8806 llvm::raw_string_ostream OS(ExprName);
8807 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8808 OS.flush();
8809 } else {
8810 ExprName = MapExprs.getMapDecl()->getNameAsString();
8811 }
8812
8814 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8815 PLoc.getLine(), PLoc.getColumn(),
8816 SrcLocStrSize);
8817}
8818
8819/// Emit the arrays used to pass the captures and map information to the
8820/// offloading runtime library. If there is no map or capture information,
8821/// return nullptr by reference.
8823 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8824 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8825 bool IsNonContiguous = false) {
8826 CodeGenModule &CGM = CGF.CGM;
8827
8828 // Reset the array information.
8829 Info.clearArrayInfo();
8830 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8831
8832 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8833 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8834 CGF.AllocaInsertPt->getIterator());
8835 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8836 CGF.Builder.GetInsertPoint());
8837
8838 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8839 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8840 };
8841 if (CGM.getCodeGenOpts().getDebugInfo() !=
8842 llvm::codegenoptions::NoDebugInfo) {
8843 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8844 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8845 FillInfoMap);
8846 }
8847
8848 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8849 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8850 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8851 }
8852 };
8853
8854 auto CustomMapperCB = [&](unsigned int I) {
8855 llvm::Value *MFunc = nullptr;
8856 if (CombinedInfo.Mappers[I]) {
8857 Info.HasMapper = true;
8859 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8860 }
8861 return MFunc;
8862 };
8863 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8864 /*IsNonContiguous=*/true, DeviceAddrCB,
8865 CustomMapperCB);
8866}
8867
8868/// Check for inner distribute directive.
8869static const OMPExecutableDirective *
8871 const auto *CS = D.getInnermostCapturedStmt();
8872 const auto *Body =
8873 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8874 const Stmt *ChildStmt =
8876
8877 if (const auto *NestedDir =
8878 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8879 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8880 switch (D.getDirectiveKind()) {
8881 case OMPD_target:
8882 // For now, just treat 'target teams loop' as if it's distributed.
8883 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8884 return NestedDir;
8885 if (DKind == OMPD_teams) {
8886 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8887 /*IgnoreCaptured=*/true);
8888 if (!Body)
8889 return nullptr;
8890 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8891 if (const auto *NND =
8892 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8893 DKind = NND->getDirectiveKind();
8894 if (isOpenMPDistributeDirective(DKind))
8895 return NND;
8896 }
8897 }
8898 return nullptr;
8899 case OMPD_target_teams:
8900 if (isOpenMPDistributeDirective(DKind))
8901 return NestedDir;
8902 return nullptr;
8903 case OMPD_target_parallel:
8904 case OMPD_target_simd:
8905 case OMPD_target_parallel_for:
8906 case OMPD_target_parallel_for_simd:
8907 return nullptr;
8908 case OMPD_target_teams_distribute:
8909 case OMPD_target_teams_distribute_simd:
8910 case OMPD_target_teams_distribute_parallel_for:
8911 case OMPD_target_teams_distribute_parallel_for_simd:
8912 case OMPD_parallel:
8913 case OMPD_for:
8914 case OMPD_parallel_for:
8915 case OMPD_parallel_master:
8916 case OMPD_parallel_sections:
8917 case OMPD_for_simd:
8918 case OMPD_parallel_for_simd:
8919 case OMPD_cancel:
8920 case OMPD_cancellation_point:
8921 case OMPD_ordered:
8922 case OMPD_threadprivate:
8923 case OMPD_allocate:
8924 case OMPD_task:
8925 case OMPD_simd:
8926 case OMPD_tile:
8927 case OMPD_unroll:
8928 case OMPD_sections:
8929 case OMPD_section:
8930 case OMPD_single:
8931 case OMPD_master:
8932 case OMPD_critical:
8933 case OMPD_taskyield:
8934 case OMPD_barrier:
8935 case OMPD_taskwait:
8936 case OMPD_taskgroup:
8937 case OMPD_atomic:
8938 case OMPD_flush:
8939 case OMPD_depobj:
8940 case OMPD_scan:
8941 case OMPD_teams:
8942 case OMPD_target_data:
8943 case OMPD_target_exit_data:
8944 case OMPD_target_enter_data:
8945 case OMPD_distribute:
8946 case OMPD_distribute_simd:
8947 case OMPD_distribute_parallel_for:
8948 case OMPD_distribute_parallel_for_simd:
8949 case OMPD_teams_distribute:
8950 case OMPD_teams_distribute_simd:
8951 case OMPD_teams_distribute_parallel_for:
8952 case OMPD_teams_distribute_parallel_for_simd:
8953 case OMPD_target_update:
8954 case OMPD_declare_simd:
8955 case OMPD_declare_variant:
8956 case OMPD_begin_declare_variant:
8957 case OMPD_end_declare_variant:
8958 case OMPD_declare_target:
8959 case OMPD_end_declare_target:
8960 case OMPD_declare_reduction:
8961 case OMPD_declare_mapper:
8962 case OMPD_taskloop:
8963 case OMPD_taskloop_simd:
8964 case OMPD_master_taskloop:
8965 case OMPD_master_taskloop_simd:
8966 case OMPD_parallel_master_taskloop:
8967 case OMPD_parallel_master_taskloop_simd:
8968 case OMPD_requires:
8969 case OMPD_metadirective:
8970 case OMPD_unknown:
8971 default:
8972 llvm_unreachable("Unexpected directive.");
8973 }
8974 }
8975
8976 return nullptr;
8977}
8978
8979/// Emit the user-defined mapper function. The code generation follows the
8980/// pattern in the example below.
8981/// \code
8982/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8983/// void *base, void *begin,
8984/// int64_t size, int64_t type,
8985/// void *name = nullptr) {
8986/// // Allocate space for an array section first or add a base/begin for
8987/// // pointer dereference.
8988/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8989/// !maptype.IsDelete)
8990/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8991/// size*sizeof(Ty), clearToFromMember(type));
8992/// // Map members.
8993/// for (unsigned i = 0; i < size; i++) {
8994/// // For each component specified by this mapper:
8995/// for (auto c : begin[i]->all_components) {
8996/// if (c.hasMapper())
8997/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8998/// c.arg_type, c.arg_name);
8999/// else
9000/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9001/// c.arg_begin, c.arg_size, c.arg_type,
9002/// c.arg_name);
9003/// }
9004/// }
9005/// // Delete the array section.
9006/// if (size > 1 && maptype.IsDelete)
9007/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9008/// size*sizeof(Ty), clearToFromMember(type));
9009/// }
9010/// \endcode
9012 CodeGenFunction *CGF) {
9013 if (UDMMap.count(D) > 0)
9014 return;
9016 QualType Ty = D->getType();
9017 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9018 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9019 auto *MapperVarDecl =
9020 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9021 SourceLocation Loc = D->getLocation();
9022 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9023 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9024
9025 // Prepare mapper function arguments and attributes.
9026 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9027 C.VoidPtrTy, ImplicitParamKind::Other);
9028 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9030 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9031 C.VoidPtrTy, ImplicitParamKind::Other);
9032 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9034 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9036 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9038 FunctionArgList Args;
9039 Args.push_back(&HandleArg);
9040 Args.push_back(&BaseArg);
9041 Args.push_back(&BeginArg);
9042 Args.push_back(&SizeArg);
9043 Args.push_back(&TypeArg);
9044 Args.push_back(&NameArg);
9045 const CGFunctionInfo &FnInfo =
9047 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9048 SmallString<64> TyStr;
9049 llvm::raw_svector_ostream Out(TyStr);
9051 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9052 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9053 Name, &CGM.getModule());
9055 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9056 // Start the mapper function code generation.
9057 CodeGenFunction MapperCGF(CGM);
9058 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9059 // Compute the starting and end addresses of array elements.
9060 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9061 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9062 C.getPointerType(Int64Ty), Loc);
9063 // Prepare common arguments for array initiation and deletion.
9064 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9065 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9066 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9067 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9068 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9069 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9070 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9071 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9072 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9073 // Convert the size in bytes into the number of array elements.
9074 Size = MapperCGF.Builder.CreateExactUDiv(
9075 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9076 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9077 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9078 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9079 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9080 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9081 C.getPointerType(Int64Ty), Loc);
9082 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9083 MapperCGF.GetAddrOfLocalVar(&NameArg),
9084 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9085
9086 // Emit array initiation if this is an array section and \p MapType indicates
9087 // that memory allocation is required.
9088 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9089 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9090 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9091
9092 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9093
9094 // Emit the loop header block.
9095 MapperCGF.EmitBlock(HeadBB);
9096 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9097 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9098 // Evaluate whether the initial condition is satisfied.
9099 llvm::Value *IsEmpty =
9100 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9101 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9102 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9103
9104 // Emit the loop body block.
9105 MapperCGF.EmitBlock(BodyBB);
9106 llvm::BasicBlock *LastBB = BodyBB;
9107 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9108 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9109 PtrPHI->addIncoming(PtrBegin, EntryBB);
9110 Address PtrCurrent(PtrPHI, ElemTy,
9111 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9112 .getAlignment()
9113 .alignmentOfArrayElement(ElementSize));
9114 // Privatize the declared variable of mapper to be the current array element.
9116 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9117 (void)Scope.Privatize();
9118
9119 // Get map clause information. Fill up the arrays with all mapped variables.
9120 MappableExprsHandler::MapCombinedInfoTy Info;
9121 MappableExprsHandler MEHandler(*D, MapperCGF);
9122 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9123
9124 // Call the runtime API __tgt_mapper_num_components to get the number of
9125 // pre-existing components.
9126 llvm::Value *OffloadingArgs[] = {Handle};
9127 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9128 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9129 OMPRTL___tgt_mapper_num_components),
9130 OffloadingArgs);
9131 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9132 PreviousSize,
9133 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9134
9135 // Fill up the runtime mapper handle for all components.
9136 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9137 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9138 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9139 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9140 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9141 llvm::Value *CurSizeArg = Info.Sizes[I];
9142 llvm::Value *CurNameArg =
9143 (CGM.getCodeGenOpts().getDebugInfo() ==
9144 llvm::codegenoptions::NoDebugInfo)
9145 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9146 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9147
9148 // Extract the MEMBER_OF field from the map type.
9149 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9150 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9151 Info.Types[I]));
9152 llvm::Value *MemberMapType =
9153 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9154
9155 // Combine the map type inherited from user-defined mapper with that
9156 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9157 // bits of the \a MapType, which is the input argument of the mapper
9158 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9159 // bits of MemberMapType.
9160 // [OpenMP 5.0], 1.2.6. map-type decay.
9161 // | alloc | to | from | tofrom | release | delete
9162 // ----------------------------------------------------------
9163 // alloc | alloc | alloc | alloc | alloc | release | delete
9164 // to | alloc | to | alloc | to | release | delete
9165 // from | alloc | alloc | from | from | release | delete
9166 // tofrom | alloc | to | from | tofrom | release | delete
9167 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9168 MapType,
9169 MapperCGF.Builder.getInt64(
9170 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9171 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9172 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9173 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9174 llvm::BasicBlock *AllocElseBB =
9175 MapperCGF.createBasicBlock("omp.type.alloc.else");
9176 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9177 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9178 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9179 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9180 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9181 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9182 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9183 MapperCGF.EmitBlock(AllocBB);
9184 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9185 MemberMapType,
9186 MapperCGF.Builder.getInt64(
9187 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9188 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9189 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9190 MapperCGF.Builder.CreateBr(EndBB);
9191 MapperCGF.EmitBlock(AllocElseBB);
9192 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9193 LeftToFrom,
9194 MapperCGF.Builder.getInt64(
9195 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9196 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9197 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9198 // In case of to, clear OMP_MAP_FROM.
9199 MapperCGF.EmitBlock(ToBB);
9200 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9201 MemberMapType,
9202 MapperCGF.Builder.getInt64(
9203 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9204 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9205 MapperCGF.Builder.CreateBr(EndBB);
9206 MapperCGF.EmitBlock(ToElseBB);
9207 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9208 LeftToFrom,
9209 MapperCGF.Builder.getInt64(
9210 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9211 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9212 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9213 // In case of from, clear OMP_MAP_TO.
9214 MapperCGF.EmitBlock(FromBB);
9215 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9216 MemberMapType,
9217 MapperCGF.Builder.getInt64(
9218 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9219 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9220 // In case of tofrom, do nothing.
9221 MapperCGF.EmitBlock(EndBB);
9222 LastBB = EndBB;
9223 llvm::PHINode *CurMapType =
9224 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9225 CurMapType->addIncoming(AllocMapType, AllocBB);
9226 CurMapType->addIncoming(ToMapType, ToBB);
9227 CurMapType->addIncoming(FromMapType, FromBB);
9228 CurMapType->addIncoming(MemberMapType, ToElseBB);
9229
9230 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9231 CurSizeArg, CurMapType, CurNameArg};
9232 if (Info.Mappers[I]) {
9233 // Call the corresponding mapper function.
9234 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9235 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9236 assert(MapperFunc && "Expect a valid mapper function is available.");
9237 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9238 } else {
9239 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9240 // data structure.
9241 MapperCGF.EmitRuntimeCall(
9242 OMPBuilder.getOrCreateRuntimeFunction(
9243 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9244 OffloadingArgs);
9245 }
9246 }
9247
9248 // Update the pointer to point to the next element that needs to be mapped,
9249 // and check whether we have mapped all elements.
9250 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9251 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9252 PtrPHI->addIncoming(PtrNext, LastBB);
9253 llvm::Value *IsDone =
9254 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9255 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9256 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9257
9258 MapperCGF.EmitBlock(ExitBB);
9259 // Emit array deletion if this is an array section and \p MapType indicates
9260 // that deletion is required.
9261 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9262 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9263
9264 // Emit the function exit block.
9265 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9266 MapperCGF.FinishFunction();
9267 UDMMap.try_emplace(D, Fn);
9268 if (CGF) {
9269 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9270 Decls.second.push_back(D);
9271 }
9272}
9273
9274/// Emit the array initialization or deletion portion for user-defined mapper
9275/// code generation. First, it evaluates whether an array section is mapped and
9276/// whether the \a MapType instructs to delete this section. If \a IsInit is
9277/// true, and \a MapType indicates to not delete this array, array
9278/// initialization code is generated. If \a IsInit is false, and \a MapType
9279/// indicates to not this array, array deletion code is generated.
9281 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9282 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9283 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9284 bool IsInit) {
9285 StringRef Prefix = IsInit ? ".init" : ".del";
9286
9287 // Evaluate if this is an array section.
9288 llvm::BasicBlock *BodyBB =
9289 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9290 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9291 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9292 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9293 MapType,
9294 MapperCGF.Builder.getInt64(
9295 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9296 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9297 llvm::Value *DeleteCond;
9298 llvm::Value *Cond;
9299 if (IsInit) {
9300 // base != begin?
9301 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9302 // IsPtrAndObj?
9303 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9304 MapType,
9305 MapperCGF.Builder.getInt64(
9306 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9307 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9308 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9309 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9310 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9311 DeleteCond = MapperCGF.Builder.CreateIsNull(
9312 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9313 } else {
9314 Cond = IsArray;
9315 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9316 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9317 }
9318 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9319 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9320
9321 MapperCGF.EmitBlock(BodyBB);
9322 // Get the array size by multiplying element size and element number (i.e., \p
9323 // Size).
9324 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9325 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9326 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9327 // memory allocation/deletion purpose only.
9328 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9329 MapType,
9330 MapperCGF.Builder.getInt64(
9331 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9332 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9333 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9334 MapTypeArg = MapperCGF.Builder.CreateOr(
9335 MapTypeArg,
9336 MapperCGF.Builder.getInt64(
9337 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9338 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9339
9340 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9341 // data structure.
9342 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9343 ArraySize, MapTypeArg, MapName};
9344 MapperCGF.EmitRuntimeCall(
9345 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9346 OMPRTL___tgt_push_mapper_component),
9347 OffloadingArgs);
9348}
9349
9351 const OMPDeclareMapperDecl *D) {
9352 auto I = UDMMap.find(D);
9353 if (I != UDMMap.end())
9354 return I->second;
9356 return UDMMap.lookup(D);
9357}
9358
9361 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9362 const OMPLoopDirective &D)>
9363 SizeEmitter) {
9365 const OMPExecutableDirective *TD = &D;
9366 // Get nested teams distribute kind directive, if any.
9367 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9368 Kind != OMPD_target_teams_loop)
9370 if (!TD)
9371 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9372
9373 const auto *LD = cast<OMPLoopDirective>(TD);
9374 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9375 return NumIterations;
9376 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9377}
9378
9379static void
9380emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9381 const OMPExecutableDirective &D,
9383 bool RequiresOuterTask, const CapturedStmt &CS,
9384 bool OffloadingMandatory, CodeGenFunction &CGF) {
9385 if (OffloadingMandatory) {
9386 CGF.Builder.CreateUnreachable();
9387 } else {
9388 if (RequiresOuterTask) {
9389 CapturedVars.clear();
9390 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9391 }
9392 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9393 CapturedVars);
9394 }
9395}
9396
9397static llvm::Value *emitDeviceID(
9398 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9399 CodeGenFunction &CGF) {
9400 // Emit device ID if any.
9401 llvm::Value *DeviceID;
9402 if (Device.getPointer()) {
9403 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9404 Device.getInt() == OMPC_DEVICE_device_num) &&
9405 "Expected device_num modifier.");
9406 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9407 DeviceID =
9408 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9409 } else {
9410 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9411 }
9412 return DeviceID;
9413}
9414
9416 CodeGenFunction &CGF) {
9417 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9418
9419 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9420 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9421 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9422 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9423 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9424 /*isSigned=*/false);
9425 }
9426 return DynCGroupMem;
9427}
9428
9430 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9431 const OMPExecutableDirective &D,
9432 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9433 const CapturedStmt &CS, bool OffloadingMandatory,
9434 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9435 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9436 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9437 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9438 const OMPLoopDirective &D)>
9439 SizeEmitter,
9440 CodeGenFunction &CGF, CodeGenModule &CGM) {
9441 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9442
9443 // Fill up the arrays with all the captured variables.
9444 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9445
9446 // Get mappable expression information.
9447 MappableExprsHandler MEHandler(D, CGF);
9448 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9450
9451 auto RI = CS.getCapturedRecordDecl()->field_begin();
9452 auto *CV = CapturedVars.begin();
9454 CE = CS.capture_end();
9455 CI != CE; ++CI, ++RI, ++CV) {
9456 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9457 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9458
9459 // VLA sizes are passed to the outlined region by copy and do not have map
9460 // information associated.
9461 if (CI->capturesVariableArrayType()) {
9462 CurInfo.Exprs.push_back(nullptr);
9463 CurInfo.BasePointers.push_back(*CV);
9464 CurInfo.DevicePtrDecls.push_back(nullptr);
9465 CurInfo.DevicePointers.push_back(
9466 MappableExprsHandler::DeviceInfoTy::None);
9467 CurInfo.Pointers.push_back(*CV);
9468 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9469 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9470 // Copy to the device as an argument. No need to retrieve it.
9471 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9472 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9473 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9474 CurInfo.Mappers.push_back(nullptr);
9475 } else {
9476 // If we have any information in the map clause, we use it, otherwise we
9477 // just do a default mapping.
9478 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9479 if (!CI->capturesThis())
9480 MappedVarSet.insert(CI->getCapturedVar());
9481 else
9482 MappedVarSet.insert(nullptr);
9483 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9484 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9485 // Generate correct mapping for variables captured by reference in
9486 // lambdas.
9487 if (CI->capturesVariable())
9488 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9489 CurInfo, LambdaPointers);
9490 }
9491 // We expect to have at least an element of information for this capture.
9492 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9493 "Non-existing map pointer for capture!");
9494 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9495 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9496 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9497 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9498 "Inconsistent map information sizes!");
9499
9500 // If there is an entry in PartialStruct it means we have a struct with
9501 // individual members mapped. Emit an extra combined entry.
9502 if (PartialStruct.Base.isValid()) {
9503 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9504 MEHandler.emitCombinedEntry(
9505 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9506 OMPBuilder, nullptr,
9507 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9508 }
9509
9510 // We need to append the results of this capture to what we already have.
9511 CombinedInfo.append(CurInfo);
9512 }
9513 // Adjust MEMBER_OF flags for the lambdas captures.
9514 MEHandler.adjustMemberOfForLambdaCaptures(
9515 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9516 CombinedInfo.Pointers, CombinedInfo.Types);
9517 // Map any list items in a map clause that were not captures because they
9518 // weren't referenced within the construct.
9519 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9520
9522 // Fill up the arrays and create the arguments.
9523 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9524 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9525 llvm::codegenoptions::NoDebugInfo;
9526 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9527 EmitDebug,
9528 /*ForEndCall=*/false);
9529
9530 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9531 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9532 CGF.VoidPtrTy, CGM.getPointerAlign());
9533 InputInfo.PointersArray =
9534 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9535 InputInfo.SizesArray =
9536 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9537 InputInfo.MappersArray =
9538 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9539 MapTypesArray = Info.RTArgs.MapTypesArray;
9540 MapNamesArray = Info.RTArgs.MapNamesArray;
9541
9542 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9543 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9544 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9545 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9546 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9547
9548 if (IsReverseOffloading) {
9549 // Reverse offloading is not supported, so just execute on the host.
9550 // FIXME: This fallback solution is incorrect since it ignores the
9551 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9552 // assert here and ensure SEMA emits an error.
9553 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9554 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9555 return;
9556 }
9557
9558 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9559 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9560
9561 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9562 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9563 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9564 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9565
9566 auto &&EmitTargetCallFallbackCB =
9567 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9568 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9569 -> llvm::OpenMPIRBuilder::InsertPointTy {
9570 CGF.Builder.restoreIP(IP);
9571 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9572 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9573 return CGF.Builder.saveIP();
9574 };
9575
9576 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9577 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9578 llvm::Value *NumThreads =
9579 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9580 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9581 llvm::Value *NumIterations =
9582 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9583 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9584 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9585 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9586
9587 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9588 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9589 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9590
9591 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9592 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9593 DynCGGroupMem, HasNoWait);
9594
9595 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9596 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9597 DeviceID, RTLoc, AllocaIP));
9598 };
9599
9600 if (RequiresOuterTask)
9601 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9602 else
9603 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9604}
9605
9606static void
9607emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9608 const OMPExecutableDirective &D,
9610 bool RequiresOuterTask, const CapturedStmt &CS,
9611 bool OffloadingMandatory, CodeGenFunction &CGF) {
9612
9613 // Notify that the host version must be executed.
9614 auto &&ElseGen =
9615 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9616 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9617 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9618 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9619 };
9620
9621 if (RequiresOuterTask) {
9622 CodeGenFunction::OMPTargetDataInfo InputInfo;
9623 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9624 } else {
9625 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9626 }
9627}
9628
9631 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9632 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9633 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9634 const OMPLoopDirective &D)>
9635 SizeEmitter) {
9636 if (!CGF.HaveInsertPoint())
9637 return;
9638
9639 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9640 CGM.getLangOpts().OpenMPOffloadMandatory;
9641
9642 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9643
9644 const bool RequiresOuterTask =
9648 (CGM.getLangOpts().OpenMP >= 51 &&
9652 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9653 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9654 PrePostActionTy &) {
9655 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9656 };
9657 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9658
9660 llvm::Value *MapTypesArray = nullptr;
9661 llvm::Value *MapNamesArray = nullptr;
9662
9663 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9664 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9665 OutlinedFnID, &InputInfo, &MapTypesArray,
9666 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9667 PrePostActionTy &) {
9668 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9669 RequiresOuterTask, CS, OffloadingMandatory,
9670 Device, OutlinedFnID, InputInfo, MapTypesArray,
9671 MapNamesArray, SizeEmitter, CGF, CGM);
9672 };
9673
9674 auto &&TargetElseGen =
9675 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9676 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9677 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9678 CS, OffloadingMandatory, CGF);
9679 };
9680
9681 // If we have a target function ID it means that we need to support
9682 // offloading, otherwise, just execute on the host. We need to execute on host
9683 // regardless of the conditional in the if clause if, e.g., the user do not
9684 // specify target triples.
9685 if (OutlinedFnID) {
9686 if (IfCond) {
9687 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9688 } else {
9689 RegionCodeGenTy ThenRCG(TargetThenGen);
9690 ThenRCG(CGF);
9691 }
9692 } else {
9693 RegionCodeGenTy ElseRCG(TargetElseGen);
9694 ElseRCG(CGF);
9695 }
9696}
9697
9699 StringRef ParentName) {
9700 if (!S)
9701 return;
9702
9703 // Codegen OMP target directives that offload compute to the device.
9704 bool RequiresDeviceCodegen =
9705 isa<OMPExecutableDirective>(S) &&
9707 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9708
9709 if (RequiresDeviceCodegen) {
9710 const auto &E = *cast<OMPExecutableDirective>(S);
9711
9712 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9713 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9714
9715 // Is this a target region that should not be emitted as an entry point? If
9716 // so just signal we are done with this target region.
9717 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9718 return;
9719
9720 switch (E.getDirectiveKind()) {
9721 case OMPD_target:
9723 cast<OMPTargetDirective>(E));
9724 break;
9725 case OMPD_target_parallel:
9727 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9728 break;
9729 case OMPD_target_teams:
9731 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9732 break;
9733 case OMPD_target_teams_distribute:
9735 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9736 break;
9737 case OMPD_target_teams_distribute_simd:
9739 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9740 break;
9741 case OMPD_target_parallel_for:
9743 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9744 break;
9745 case OMPD_target_parallel_for_simd:
9747 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9748 break;
9749 case OMPD_target_simd:
9751 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9752 break;
9753 case OMPD_target_teams_distribute_parallel_for:
9755 CGM, ParentName,
9756 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9757 break;
9758 case OMPD_target_teams_distribute_parallel_for_simd:
9761 CGM, ParentName,
9762 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9763 break;
9764 case OMPD_target_teams_loop:
9766 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9767 break;
9768 case OMPD_target_parallel_loop:
9770 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9771 break;
9772 case OMPD_parallel:
9773 case OMPD_for:
9774 case OMPD_parallel_for:
9775 case OMPD_parallel_master:
9776 case OMPD_parallel_sections:
9777 case OMPD_for_simd:
9778 case OMPD_parallel_for_simd:
9779 case OMPD_cancel:
9780 case OMPD_cancellation_point:
9781 case OMPD_ordered:
9782 case OMPD_threadprivate:
9783 case OMPD_allocate:
9784 case OMPD_task:
9785 case OMPD_simd:
9786 case OMPD_tile:
9787 case OMPD_unroll:
9788 case OMPD_sections:
9789 case OMPD_section:
9790 case OMPD_single:
9791 case OMPD_master:
9792 case OMPD_critical:
9793 case OMPD_taskyield:
9794 case OMPD_barrier:
9795 case OMPD_taskwait:
9796 case OMPD_taskgroup:
9797 case OMPD_atomic:
9798 case OMPD_flush:
9799 case OMPD_depobj:
9800 case OMPD_scan:
9801 case OMPD_teams:
9802 case OMPD_target_data:
9803 case OMPD_target_exit_data:
9804 case OMPD_target_enter_data:
9805 case OMPD_distribute:
9806 case OMPD_distribute_simd:
9807 case OMPD_distribute_parallel_for:
9808 case OMPD_distribute_parallel_for_simd:
9809 case OMPD_teams_distribute:
9810 case OMPD_teams_distribute_simd:
9811 case OMPD_teams_distribute_parallel_for:
9812 case OMPD_teams_distribute_parallel_for_simd:
9813 case OMPD_target_update:
9814 case OMPD_declare_simd:
9815 case OMPD_declare_variant:
9816 case OMPD_begin_declare_variant:
9817 case OMPD_end_declare_variant:
9818 case OMPD_declare_target:
9819 case OMPD_end_declare_target:
9820 case OMPD_declare_reduction:
9821 case OMPD_declare_mapper:
9822 case OMPD_taskloop:
9823 case OMPD_taskloop_simd:
9824 case OMPD_master_taskloop:
9825 case OMPD_master_taskloop_simd:
9826 case OMPD_parallel_master_taskloop:
9827 case OMPD_parallel_master_taskloop_simd:
9828 case OMPD_requires:
9829 case OMPD_metadirective:
9830 case OMPD_unknown:
9831 default:
9832 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9833 }
9834 return;
9835 }
9836
9837 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9838 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9839 return;
9840
9841 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9842 return;
9843 }
9844
9845 // If this is a lambda function, look into its body.
9846 if (const auto *L = dyn_cast<LambdaExpr>(S))
9847 S = L->getBody();
9848
9849 // Keep looking for target regions recursively.
9850 for (const Stmt *II : S->children())
9851 scanForTargetRegionsFunctions(II, ParentName);
9852}
9853
9854static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9855 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9856 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9857 if (!DevTy)
9858 return false;
9859 // Do not emit device_type(nohost) functions for the host.
9860 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9861 return true;
9862 // Do not emit device_type(host) functions for the device.
9863 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9864 return true;
9865 return false;
9866}
9867
9869 // If emitting code for the host, we do not process FD here. Instead we do
9870 // the normal code generation.
9871 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9872 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9873 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9874 CGM.getLangOpts().OpenMPIsTargetDevice))
9875 return true;
9876 return false;
9877 }
9878
9879 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9880 // Try to detect target regions in the function.
9881 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9882 StringRef Name = CGM.getMangledName(GD);
9883 scanForTargetRegionsFunctions(FD->getBody(), Name);
9884 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9885 CGM.getLangOpts().OpenMPIsTargetDevice))
9886 return true;
9887 }
9888
9889 // Do not to emit function if it is not marked as declare target.
9890 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9891 AlreadyEmittedTargetDecls.count(VD) == 0;
9892}
9893
9895 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9896 CGM.getLangOpts().OpenMPIsTargetDevice))
9897 return true;
9898
9899 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9900 return false;
9901
9902 // Check if there are Ctors/Dtors in this declaration and look for target
9903 // regions in it. We use the complete variant to produce the kernel name
9904 // mangling.
9905 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9906 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9907 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9908 StringRef ParentName =
9910 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9911 }
9912 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9913 StringRef ParentName =
9915 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9916 }
9917 }
9918
9919 // Do not to emit variable if it is not marked as declare target.
9920 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9921 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9922 cast<VarDecl>(GD.getDecl()));
9923 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9924 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9925 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9927 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9928 return true;
9929 }
9930 return false;
9931}
9932
9934 llvm::Constant *Addr) {
9935 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9936 !CGM.getLangOpts().OpenMPIsTargetDevice)
9937 return;
9938
9939 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9940 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9941
9942 // If this is an 'extern' declaration we defer to the canonical definition and
9943 // do not emit an offloading entry.
9944 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9945 VD->hasExternalStorage())
9946 return;
9947
9948 if (!Res) {
9949 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9950 // Register non-target variables being emitted in device code (debug info
9951 // may cause this).
9952 StringRef VarName = CGM.getMangledName(VD);
9953 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9954 }
9955 return;
9956 }
9957
9958 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9959 auto LinkageForVariable = [&VD, this]() {
9961 };
9962
9963 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9964 OMPBuilder.registerTargetGlobalVariable(
9967 VD->isExternallyVisible(),
9969 VD->getCanonicalDecl()->getBeginLoc()),
9970 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9971 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9974 Addr);
9975
9976 for (auto *ref : GeneratedRefs)
9978}
9979
9981 if (isa<FunctionDecl>(GD.getDecl()) ||
9982 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9983 return emitTargetFunctions(GD);
9984
9985 return emitTargetGlobalVariable(GD);
9986}
9987
9989 for (const VarDecl *VD : DeferredGlobalVariables) {
9990 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9991 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9992 if (!Res)
9993 continue;
9994 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9995 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9997 CGM.EmitGlobal(VD);
9998 } else {
9999 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10000 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10001 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10003 "Expected link clause or to clause with unified memory.");
10005 }
10006 }
10007}
10008
10010 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10012 " Expected target-based directive.");
10013}
10014
10016 for (const OMPClause *Clause : D->clauselists()) {
10017 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10019 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10020 } else if (const auto *AC =
10021 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10022 switch (AC->getAtomicDefaultMemOrderKind()) {
10023 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10024 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10025 break;
10026 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10027 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10028 break;
10029 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10030 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10031 break;
10033 break;
10034 }
10035 }
10036 }
10037}
10038
10039llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10041}
10042
10044 LangAS &AS) {
10045 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10046 return false;
10047 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10048 switch(A->getAllocatorType()) {
10049 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10050 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10051 // Not supported, fallback to the default mem space.
10052 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10053 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10054 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10055 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10056 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10057 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10058 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10059 AS = LangAS::Default;
10060 return true;
10061 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10062 llvm_unreachable("Expected predefined allocator for the variables with the "
10063 "static storage.");
10064 }
10065 return false;
10066}
10067
10070}
10071
10073 CodeGenModule &CGM)
10074 : CGM(CGM) {
10075 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10076 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10078 }
10079}
10080
10082 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10083 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10084}
10085
10087 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10088 return true;
10089
10090 const auto *D = cast<FunctionDecl>(GD.getDecl());
10091 // Do not to emit function if it is marked as declare target as it was already
10092 // emitted.
10093 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10094 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10095 if (auto *F = dyn_cast_or_null<llvm::Function>(
10097 return !F->isDeclaration();
10098 return false;
10099 }
10100 return true;
10101 }
10102
10103 return !AlreadyEmittedTargetDecls.insert(D).second;
10104}
10105
10107 const OMPExecutableDirective &D,
10108 SourceLocation Loc,
10109 llvm::Function *OutlinedFn,
10110 ArrayRef<llvm::Value *> CapturedVars) {
10111 if (!CGF.HaveInsertPoint())
10112 return;
10113
10114 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10116
10117 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10118 llvm::Value *Args[] = {
10119 RTLoc,
10120 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10121 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10123 RealArgs.append(std::begin(Args), std::end(Args));
10124 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10125
10126 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10127 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10128 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10129}
10130
10132 const Expr *NumTeams,
10133 const Expr *ThreadLimit,
10134 SourceLocation Loc) {
10135 if (!CGF.HaveInsertPoint())
10136 return;
10137
10138 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10139
10140 llvm::Value *NumTeamsVal =
10141 NumTeams
10142 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10143 CGF.CGM.Int32Ty, /* isSigned = */ true)
10144 : CGF.Builder.getInt32(0);
10145
10146 llvm::Value *ThreadLimitVal =
10147 ThreadLimit
10148 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10149 CGF.CGM.Int32Ty, /* isSigned = */ true)
10150 : CGF.Builder.getInt32(0);
10151
10152 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10153 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10154 ThreadLimitVal};
10155 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10156 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10157 PushNumTeamsArgs);
10158}
10159
10161 const Expr *ThreadLimit,
10162 SourceLocation Loc) {
10163 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10164 llvm::Value *ThreadLimitVal =
10165 ThreadLimit
10166 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10167 CGF.CGM.Int32Ty, /* isSigned = */ true)
10168 : CGF.Builder.getInt32(0);
10169
10170 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10171 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10172 ThreadLimitVal};
10173 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10174 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10175 ThreadLimitArgs);
10176}
10177
10179 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10180 const Expr *Device, const RegionCodeGenTy &CodeGen,
10182 if (!CGF.HaveInsertPoint())
10183 return;
10184
10185 // Action used to replace the default codegen action and turn privatization
10186 // off.
10187 PrePostActionTy NoPrivAction;
10188
10189 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10190
10191 llvm::Value *IfCondVal = nullptr;
10192 if (IfCond)
10193 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10194
10195 // Emit device ID if any.
10196 llvm::Value *DeviceID = nullptr;
10197 if (Device) {
10198 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10199 CGF.Int64Ty, /*isSigned=*/true);
10200 } else {
10201 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10202 }
10203
10204 // Fill up the arrays with all the mapped variables.
10205 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10206 auto GenMapInfoCB =
10207 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10208 CGF.Builder.restoreIP(CodeGenIP);
10209 // Get map clause information.
10210 MappableExprsHandler MEHandler(D, CGF);
10211 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10212
10213 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10214 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10215 };
10216 if (CGM.getCodeGenOpts().getDebugInfo() !=
10217 llvm::codegenoptions::NoDebugInfo) {
10218 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10219 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10220 FillInfoMap);
10221 }
10222
10223 return CombinedInfo;
10224 };
10225 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10226 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10227 CGF.Builder.restoreIP(CodeGenIP);
10228 switch (BodyGenType) {
10229 case BodyGenTy::Priv:
10230 if (!Info.CaptureDeviceAddrMap.empty())
10231 CodeGen(CGF);
10232 break;
10233 case BodyGenTy::DupNoPriv:
10234 if (!Info.CaptureDeviceAddrMap.empty()) {
10235 CodeGen.setAction(NoPrivAction);
10236 CodeGen(CGF);
10237 }
10238 break;
10239 case BodyGenTy::NoPriv:
10240 if (Info.CaptureDeviceAddrMap.empty()) {
10241 CodeGen.setAction(NoPrivAction);
10242 CodeGen(CGF);
10243 }
10244 break;
10245 }
10246 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10247 CGF.Builder.GetInsertPoint());
10248 };
10249
10250 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10251 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10252 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10253 }
10254 };
10255
10256 auto CustomMapperCB = [&](unsigned int I) {
10257 llvm::Value *MFunc = nullptr;
10258 if (CombinedInfo.Mappers[I]) {
10259 Info.HasMapper = true;
10261 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10262 }
10263 return MFunc;
10264 };
10265
10266 // Source location for the ident struct
10267 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10268
10269 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10270 CGF.AllocaInsertPt->getIterator());
10271 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10272 CGF.Builder.GetInsertPoint());
10273 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10274 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10275 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10276 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10277}
10278
10280 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10281 const Expr *Device) {
10282 if (!CGF.HaveInsertPoint())
10283 return;
10284
10285 assert((isa<OMPTargetEnterDataDirective>(D) ||
10286 isa<OMPTargetExitDataDirective>(D) ||
10287 isa<OMPTargetUpdateDirective>(D)) &&
10288 "Expecting either target enter, exit data, or update directives.");
10289
10291 llvm::Value *MapTypesArray = nullptr;
10292 llvm::Value *MapNamesArray = nullptr;
10293 // Generate the code for the opening of the data environment.
10294 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10295 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10296 // Emit device ID if any.
10297 llvm::Value *DeviceID = nullptr;
10298 if (Device) {
10299 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10300 CGF.Int64Ty, /*isSigned=*/true);
10301 } else {
10302 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10303 }
10304
10305 // Emit the number of elements in the offloading arrays.
10306 llvm::Constant *PointerNum =
10307 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10308
10309 // Source location for the ident struct
10310 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10311
10312 llvm::Value *OffloadingArgs[] = {RTLoc,
10313 DeviceID,
10314 PointerNum,
10315 InputInfo.BasePointersArray.getPointer(),
10316 InputInfo.PointersArray.getPointer(),
10317 InputInfo.SizesArray.getPointer(),
10318 MapTypesArray,
10319 MapNamesArray,
10320 InputInfo.MappersArray.getPointer()};
10321
10322 // Select the right runtime function call for each standalone
10323 // directive.
10324 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10325 RuntimeFunction RTLFn;
10326 switch (D.getDirectiveKind()) {
10327 case OMPD_target_enter_data:
10328 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10329 : OMPRTL___tgt_target_data_begin_mapper;
10330 break;
10331 case OMPD_target_exit_data:
10332 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10333 : OMPRTL___tgt_target_data_end_mapper;
10334 break;
10335 case OMPD_target_update:
10336 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10337 : OMPRTL___tgt_target_data_update_mapper;
10338 break;
10339 case OMPD_parallel:
10340 case OMPD_for:
10341 case OMPD_parallel_for:
10342 case OMPD_parallel_master:
10343 case OMPD_parallel_sections:
10344 case OMPD_for_simd:
10345 case OMPD_parallel_for_simd:
10346 case OMPD_cancel:
10347 case OMPD_cancellation_point:
10348 case OMPD_ordered:
10349 case OMPD_threadprivate:
10350 case OMPD_allocate:
10351 case OMPD_task:
10352 case OMPD_simd:
10353 case OMPD_tile:
10354 case OMPD_unroll:
10355 case OMPD_sections:
10356 case OMPD_section:
10357 case OMPD_single:
10358 case OMPD_master:
10359 case OMPD_critical:
10360 case OMPD_taskyield:
10361 case OMPD_barrier:
10362 case OMPD_taskwait:
10363 case OMPD_taskgroup:
10364 case OMPD_atomic:
10365 case OMPD_flush:
10366 case OMPD_depobj:
10367 case OMPD_scan:
10368 case OMPD_teams:
10369 case OMPD_target_data:
10370 case OMPD_distribute:
10371 case OMPD_distribute_simd:
10372 case OMPD_distribute_parallel_for:
10373 case OMPD_distribute_parallel_for_simd:
10374 case OMPD_teams_distribute:
10375 case OMPD_teams_distribute_simd:
10376 case OMPD_teams_distribute_parallel_for:
10377 case OMPD_teams_distribute_parallel_for_simd:
10378 case OMPD_declare_simd:
10379 case OMPD_declare_variant:
10380 case OMPD_begin_declare_variant:
10381 case OMPD_end_declare_variant:
10382 case OMPD_declare_target:
10383 case OMPD_end_declare_target:
10384 case OMPD_declare_reduction:
10385 case OMPD_declare_mapper:
10386 case OMPD_taskloop:
10387 case OMPD_taskloop_simd:
10388 case OMPD_master_taskloop:
10389 case OMPD_master_taskloop_simd:
10390 case OMPD_parallel_master_taskloop:
10391 case OMPD_parallel_master_taskloop_simd:
10392 case OMPD_target:
10393 case OMPD_target_simd:
10394 case OMPD_target_teams_distribute:
10395 case OMPD_target_teams_distribute_simd:
10396 case OMPD_target_teams_distribute_parallel_for:
10397 case OMPD_target_teams_distribute_parallel_for_simd:
10398 case OMPD_target_teams:
10399 case OMPD_target_parallel:
10400 case OMPD_target_parallel_for:
10401 case OMPD_target_parallel_for_simd:
10402 case OMPD_requires:
10403 case OMPD_metadirective:
10404 case OMPD_unknown:
10405 default:
10406 llvm_unreachable("Unexpected standalone target data directive.");
10407 break;
10408 }
10409 CGF.EmitRuntimeCall(
10410 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10411 OffloadingArgs);
10412 };
10413
10414 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10415 &MapNamesArray](CodeGenFunction &CGF,
10416 PrePostActionTy &) {
10417 // Fill up the arrays with all the mapped variables.
10418 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10419
10420 // Get map clause information.
10421 MappableExprsHandler MEHandler(D, CGF);
10422 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10423
10425 // Fill up the arrays and create the arguments.
10426 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10427 /*IsNonContiguous=*/true);
10428 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10430 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10431 llvm::codegenoptions::NoDebugInfo;
10432 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10433 EmitDebug,
10434 /*ForEndCall=*/false);
10435 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10436 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10438 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10440 InputInfo.SizesArray =
10441 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10442 InputInfo.MappersArray =
10443 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10444 MapTypesArray = Info.RTArgs.MapTypesArray;
10445 MapNamesArray = Info.RTArgs.MapNamesArray;
10446 if (RequiresOuterTask)
10447 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10448 else
10449 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10450 };
10451
10452 if (IfCond) {
10453 emitIfClause(CGF, IfCond, TargetThenGen,
10454 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10455 } else {
10456 RegionCodeGenTy ThenRCG(TargetThenGen);
10457 ThenRCG(CGF);
10458 }
10459}
10460
10461namespace {
10462 /// Kind of parameter in a function with 'declare simd' directive.
10463enum ParamKindTy {
10464 Linear,
10465 LinearRef,
10466 LinearUVal,
10467 LinearVal,
10468 Uniform,
10469 Vector,
10470};
10471/// Attribute set of the parameter.
10472struct ParamAttrTy {
10473 ParamKindTy Kind = Vector;
10474 llvm::APSInt StrideOrArg;
10475 llvm::APSInt Alignment;
10476 bool HasVarStride = false;
10477};
10478} // namespace
10479
10480static unsigned evaluateCDTSize(const FunctionDecl *FD,
10481 ArrayRef<ParamAttrTy> ParamAttrs) {
10482 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10483 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10484 // of that clause. The VLEN value must be power of 2.
10485 // In other case the notion of the function`s "characteristic data type" (CDT)
10486 // is used to compute the vector length.
10487 // CDT is defined in the following order:
10488 // a) For non-void function, the CDT is the return type.
10489 // b) If the function has any non-uniform, non-linear parameters, then the
10490 // CDT is the type of the first such parameter.
10491 // c) If the CDT determined by a) or b) above is struct, union, or class
10492 // type which is pass-by-value (except for the type that maps to the
10493 // built-in complex data type), the characteristic data type is int.
10494 // d) If none of the above three cases is applicable, the CDT is int.
10495 // The VLEN is then determined based on the CDT and the size of vector
10496 // register of that ISA for which current vector version is generated. The
10497 // VLEN is computed using the formula below:
10498 // VLEN = sizeof(vector_register) / sizeof(CDT),
10499 // where vector register size specified in section 3.2.1 Registers and the
10500 // Stack Frame of original AMD64 ABI document.
10501 QualType RetType = FD->getReturnType();
10502 if (RetType.isNull())
10503 return 0;
10504 ASTContext &C = FD->getASTContext();
10505 QualType CDT;
10506 if (!RetType.isNull() && !RetType->isVoidType()) {
10507 CDT = RetType;
10508 } else {
10509 unsigned Offset = 0;
10510 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10511 if (ParamAttrs[Offset].Kind == Vector)
10512 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10513 ++Offset;
10514 }
10515 if (CDT.isNull()) {
10516 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10517 if (ParamAttrs[I + Offset].Kind == Vector) {
10518 CDT = FD->getParamDecl(I)->getType();
10519 break;
10520 }
10521 }
10522 }
10523 }
10524 if (CDT.isNull())
10525 CDT = C.IntTy;
10526 CDT = CDT->getCanonicalTypeUnqualified();
10527 if (CDT->isRecordType() || CDT->isUnionType())
10528 CDT = C.IntTy;
10529 return C.getTypeSize(CDT);
10530}
10531
10532/// Mangle the parameter part of the vector function name according to
10533/// their OpenMP classification. The mangling function is defined in
10534/// section 4.5 of the AAVFABI(2021Q1).
10535static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10536 SmallString<256> Buffer;
10537 llvm::raw_svector_ostream Out(Buffer);
10538 for (const auto &ParamAttr : ParamAttrs) {
10539 switch (ParamAttr.Kind) {
10540 case Linear:
10541 Out << 'l';
10542 break;
10543 case LinearRef:
10544 Out << 'R';
10545 break;
10546 case LinearUVal:
10547 Out << 'U';
10548 break;
10549 case LinearVal:
10550 Out << 'L';
10551 break;
10552 case Uniform:
10553 Out << 'u';
10554 break;
10555 case Vector:
10556 Out << 'v';
10557 break;
10558 }
10559 if (ParamAttr.HasVarStride)
10560 Out << "s" << ParamAttr.StrideOrArg;
10561 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10562 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10563 // Don't print the step value if it is not present or if it is
10564 // equal to 1.
10565 if (ParamAttr.StrideOrArg < 0)
10566 Out << 'n' << -ParamAttr.StrideOrArg;
10567 else if (ParamAttr.StrideOrArg != 1)
10568 Out << ParamAttr.StrideOrArg;
10569 }
10570
10571 if (!!ParamAttr.Alignment)
10572 Out << 'a' << ParamAttr.Alignment;
10573 }
10574
10575 return std::string(Out.str());
10576}
10577
10578static void
10579emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10580 const llvm::APSInt &VLENVal,
10581 ArrayRef<ParamAttrTy> ParamAttrs,
10582 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10583 struct ISADataTy {
10584 char ISA;
10585 unsigned VecRegSize;
10586 };
10587 ISADataTy ISAData[] = {
10588 {
10589 'b', 128
10590 }, // SSE
10591 {
10592 'c', 256
10593 }, // AVX
10594 {
10595 'd', 256
10596 }, // AVX2
10597 {
10598 'e', 512
10599 }, // AVX512
10600 };
10602 switch (State) {
10603 case OMPDeclareSimdDeclAttr::BS_Undefined:
10604 Masked.push_back('N');
10605 Masked.push_back('M');
10606 break;
10607 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10608 Masked.push_back('N');
10609 break;
10610 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10611 Masked.push_back('M');
10612 break;
10613 }
10614 for (char Mask : Masked) {
10615 for (const ISADataTy &Data : ISAData) {
10616 SmallString<256> Buffer;
10617 llvm::raw_svector_ostream Out(Buffer);
10618 Out << "_ZGV" << Data.ISA << Mask;
10619 if (!VLENVal) {
10620 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10621 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10622 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10623 } else {
10624 Out << VLENVal;
10625 }
10626 Out << mangleVectorParameters(ParamAttrs);
10627 Out << '_' << Fn->getName();
10628 Fn->addFnAttr(Out.str());
10629 }
10630 }
10631}
10632
10633// This are the Functions that are needed to mangle the name of the
10634// vector functions generated by the compiler, according to the rules
10635// defined in the "Vector Function ABI specifications for AArch64",
10636// available at
10637// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10638
10639/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10640static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10641 QT = QT.getCanonicalType();
10642
10643 if (QT->isVoidType())
10644 return false;
10645
10646 if (Kind == ParamKindTy::Uniform)
10647 return false;
10648
10649 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10650 return false;
10651
10652 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10653 !QT->isReferenceType())
10654 return false;
10655
10656 return true;
10657}
10658
10659/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10661 QT = QT.getCanonicalType();
10662 unsigned Size = C.getTypeSize(QT);
10663
10664 // Only scalars and complex within 16 bytes wide set PVB to true.
10665 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10666 return false;
10667
10668 if (QT->isFloatingType())
10669 return true;
10670
10671 if (QT->isIntegerType())
10672 return true;
10673
10674 if (QT->isPointerType())
10675 return true;
10676
10677 // TODO: Add support for complex types (section 3.1.2, item 2).
10678
10679 return false;
10680}
10681
10682/// Computes the lane size (LS) of a return type or of an input parameter,
10683/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10684/// TODO: Add support for references, section 3.2.1, item 1.
10685static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10686 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10688 if (getAArch64PBV(PTy, C))
10689 return C.getTypeSize(PTy);
10690 }
10691 if (getAArch64PBV(QT, C))
10692 return C.getTypeSize(QT);
10693
10694 return C.getTypeSize(C.getUIntPtrType());
10695}
10696
10697// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10698// signature of the scalar function, as defined in 3.2.2 of the
10699// AAVFABI.
10700static std::tuple<unsigned, unsigned, bool>
10702 QualType RetType = FD->getReturnType().getCanonicalType();
10703
10704 ASTContext &C = FD->getASTContext();
10705
10706 bool OutputBecomesInput = false;
10707
10709 if (!RetType->isVoidType()) {
10710 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10711 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10712 OutputBecomesInput = true;
10713 }
10714 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10716 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10717 }
10718
10719 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10720 // The LS of a function parameter / return value can only be a power
10721 // of 2, starting from 8 bits, up to 128.
10722 assert(llvm::all_of(Sizes,
10723 [](unsigned Size) {
10724 return Size == 8 || Size == 16 || Size == 32 ||
10725 Size == 64 || Size == 128;
10726 }) &&
10727 "Invalid size");
10728
10729 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10730 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10731 OutputBecomesInput);
10732}
10733
10734// Function used to add the attribute. The parameter `VLEN` is
10735// templated to allow the use of "x" when targeting scalable functions
10736// for SVE.
10737template <typename T>
10738static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10739 char ISA, StringRef ParSeq,
10740 StringRef MangledName, bool OutputBecomesInput,
10741 llvm::Function *Fn) {
10742 SmallString<256> Buffer;
10743 llvm::raw_svector_ostream Out(Buffer);
10744 Out << Prefix << ISA << LMask << VLEN;
10745 if (OutputBecomesInput)
10746 Out << "v";
10747 Out << ParSeq << "_" << MangledName;
10748 Fn->addFnAttr(Out.str());
10749}
10750
10751// Helper function to generate the Advanced SIMD names depending on
10752// the value of the NDS when simdlen is not present.
10753static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10754 StringRef Prefix, char ISA,
10755 StringRef ParSeq, StringRef MangledName,
10756 bool OutputBecomesInput,
10757 llvm::Function *Fn) {
10758 switch (NDS) {
10759 case 8:
10760 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10761 OutputBecomesInput, Fn);
10762 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10763 OutputBecomesInput, Fn);
10764 break;
10765 case 16:
10766 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10767 OutputBecomesInput, Fn);
10768 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10769 OutputBecomesInput, Fn);
10770 break;
10771 case 32:
10772 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10773 OutputBecomesInput, Fn);
10774 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10775 OutputBecomesInput, Fn);
10776 break;
10777 case 64:
10778 case 128:
10779 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10780 OutputBecomesInput, Fn);
10781 break;
10782 default:
10783 llvm_unreachable("Scalar type is too wide.");
10784 }
10785}
10786
10787/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10789 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10790 ArrayRef<ParamAttrTy> ParamAttrs,
10791 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10792 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10793
10794 // Get basic data for building the vector signature.
10795 const auto Data = getNDSWDS(FD, ParamAttrs);
10796 const unsigned NDS = std::get<0>(Data);
10797 const unsigned WDS = std::get<1>(Data);
10798 const bool OutputBecomesInput = std::get<2>(Data);
10799
10800 // Check the values provided via `simdlen` by the user.
10801 // 1. A `simdlen(1)` doesn't produce vector signatures,
10802 if (UserVLEN == 1) {
10803 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10805 "The clause simdlen(1) has no effect when targeting aarch64.");
10806 CGM.getDiags().Report(SLoc, DiagID);
10807 return;
10808 }
10809
10810 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10811 // Advanced SIMD output.
10812 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10813 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10814 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10815 "power of 2 when targeting Advanced SIMD.");
10816 CGM.getDiags().Report(SLoc, DiagID);
10817 return;
10818 }
10819
10820 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10821 // limits.
10822 if (ISA == 's' && UserVLEN != 0) {
10823 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10824 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10825 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10826 "lanes in the architectural constraints "
10827 "for SVE (min is 128-bit, max is "
10828 "2048-bit, by steps of 128-bit)");
10829 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10830 return;
10831 }
10832 }
10833
10834 // Sort out parameter sequence.
10835 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10836 StringRef Prefix = "_ZGV";
10837 // Generate simdlen from user input (if any).
10838 if (UserVLEN) {
10839 if (ISA == 's') {
10840 // SVE generates only a masked function.
10841 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10842 OutputBecomesInput, Fn);
10843 } else {
10844 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10845 // Advanced SIMD generates one or two functions, depending on
10846 // the `[not]inbranch` clause.
10847 switch (State) {
10848 case OMPDeclareSimdDeclAttr::BS_Undefined:
10849 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10850 OutputBecomesInput, Fn);
10851 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10852 OutputBecomesInput, Fn);
10853 break;
10854 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10855 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10856 OutputBecomesInput, Fn);
10857 break;
10858 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10859 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10860 OutputBecomesInput, Fn);
10861 break;
10862 }
10863 }
10864 } else {
10865 // If no user simdlen is provided, follow the AAVFABI rules for
10866 // generating the vector length.
10867 if (ISA == 's') {
10868 // SVE, section 3.4.1, item 1.
10869 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10870 OutputBecomesInput, Fn);
10871 } else {
10872 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10873 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10874 // two vector names depending on the use of the clause
10875 // `[not]inbranch`.
10876 switch (State) {
10877 case OMPDeclareSimdDeclAttr::BS_Undefined:
10878 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10879 OutputBecomesInput, Fn);
10880 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10881 OutputBecomesInput, Fn);
10882 break;
10883 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10884 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10885 OutputBecomesInput, Fn);
10886 break;
10887 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10888 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10889 OutputBecomesInput, Fn);
10890 break;
10891 }
10892 }
10893 }
10894}
10895
10897 llvm::Function *Fn) {
10899 FD = FD->getMostRecentDecl();
10900 while (FD) {
10901 // Map params to their positions in function decl.
10902 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10903 if (isa<CXXMethodDecl>(FD))
10904 ParamPositions.try_emplace(FD, 0);
10905 unsigned ParamPos = ParamPositions.size();
10906 for (const ParmVarDecl *P : FD->parameters()) {
10907 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10908 ++ParamPos;
10909 }
10910 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10911 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10912 // Mark uniform parameters.
10913 for (const Expr *E : Attr->uniforms()) {
10914 E = E->IgnoreParenImpCasts();
10915 unsigned Pos;
10916 if (isa<CXXThisExpr>(E)) {
10917 Pos = ParamPositions[FD];
10918 } else {
10919 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10920 ->getCanonicalDecl();
10921 auto It = ParamPositions.find(PVD);
10922 assert(It != ParamPositions.end() && "Function parameter not found");
10923 Pos = It->second;
10924 }
10925 ParamAttrs[Pos].Kind = Uniform;
10926 }
10927 // Get alignment info.
10928 auto *NI = Attr->alignments_begin();
10929 for (const Expr *E : Attr->aligneds()) {
10930 E = E->IgnoreParenImpCasts();
10931 unsigned Pos;
10932 QualType ParmTy;
10933 if (isa<CXXThisExpr>(E)) {
10934 Pos = ParamPositions[FD];
10935 ParmTy = E->getType();
10936 } else {
10937 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10938 ->getCanonicalDecl();
10939 auto It = ParamPositions.find(PVD);
10940 assert(It != ParamPositions.end() && "Function parameter not found");
10941 Pos = It->second;
10942 ParmTy = PVD->getType();
10943 }
10944 ParamAttrs[Pos].Alignment =
10945 (*NI)
10946 ? (*NI)->EvaluateKnownConstInt(C)
10947 : llvm::APSInt::getUnsigned(
10948 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10949 .getQuantity());
10950 ++NI;
10951 }
10952 // Mark linear parameters.
10953 auto *SI = Attr->steps_begin();
10954 auto *MI = Attr->modifiers_begin();
10955 for (const Expr *E : Attr->linears()) {
10956 E = E->IgnoreParenImpCasts();
10957 unsigned Pos;
10958 bool IsReferenceType = false;
10959 // Rescaling factor needed to compute the linear parameter
10960 // value in the mangled name.
10961 unsigned PtrRescalingFactor = 1;
10962 if (isa<CXXThisExpr>(E)) {
10963 Pos = ParamPositions[FD];
10964 auto *P = cast<PointerType>(E->getType());
10965 PtrRescalingFactor = CGM.getContext()
10966 .getTypeSizeInChars(P->getPointeeType())
10967 .getQuantity();
10968 } else {
10969 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10970 ->getCanonicalDecl();
10971 auto It = ParamPositions.find(PVD);
10972 assert(It != ParamPositions.end() && "Function parameter not found");
10973 Pos = It->second;
10974 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10975 PtrRescalingFactor = CGM.getContext()
10976 .getTypeSizeInChars(P->getPointeeType())
10977 .getQuantity();
10978 else if (PVD->getType()->isReferenceType()) {
10979 IsReferenceType = true;
10980 PtrRescalingFactor =
10981 CGM.getContext()
10982 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10983 .getQuantity();
10984 }
10985 }
10986 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10987 if (*MI == OMPC_LINEAR_ref)
10988 ParamAttr.Kind = LinearRef;
10989 else if (*MI == OMPC_LINEAR_uval)
10990 ParamAttr.Kind = LinearUVal;
10991 else if (IsReferenceType)
10992 ParamAttr.Kind = LinearVal;
10993 else
10994 ParamAttr.Kind = Linear;
10995 // Assuming a stride of 1, for `linear` without modifiers.
10996 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10997 if (*SI) {
10999 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11000 if (const auto *DRE =
11001 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11002 if (const auto *StridePVD =
11003 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11004 ParamAttr.HasVarStride = true;
11005 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11006 assert(It != ParamPositions.end() &&
11007 "Function parameter not found");
11008 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11009 }
11010 }
11011 } else {
11012 ParamAttr.StrideOrArg = Result.Val.getInt();
11013 }
11014 }
11015 // If we are using a linear clause on a pointer, we need to
11016 // rescale the value of linear_step with the byte size of the
11017 // pointee type.
11018 if (!ParamAttr.HasVarStride &&
11019 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11020 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11021 ++SI;
11022 ++MI;
11023 }
11024 llvm::APSInt VLENVal;
11025 SourceLocation ExprLoc;
11026 const Expr *VLENExpr = Attr->getSimdlen();
11027 if (VLENExpr) {
11028 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11029 ExprLoc = VLENExpr->getExprLoc();
11030 }
11031 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11032 if (CGM.getTriple().isX86()) {
11033 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11034 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11035 unsigned VLEN = VLENVal.getExtValue();
11036 StringRef MangledName = Fn->getName();
11037 if (CGM.getTarget().hasFeature("sve"))
11038 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11039 MangledName, 's', 128, Fn, ExprLoc);
11040 else if (CGM.getTarget().hasFeature("neon"))
11041 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11042 MangledName, 'n', 128, Fn, ExprLoc);
11043 }
11044 }
11045 FD = FD->getPreviousDecl();
11046 }
11047}
11048
11049namespace {
11050/// Cleanup action for doacross support.
11051class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11052public:
11053 static const int DoacrossFinArgs = 2;
11054
11055private:
11056 llvm::FunctionCallee RTLFn;
11057 llvm::Value *Args[DoacrossFinArgs];
11058
11059public:
11060 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11061 ArrayRef<llvm::Value *> CallArgs)
11062 : RTLFn(RTLFn) {
11063 assert(CallArgs.size() == DoacrossFinArgs);
11064 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11065 }
11066 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11067 if (!CGF.HaveInsertPoint())
11068 return;
11069 CGF.EmitRuntimeCall(RTLFn, Args);
11070 }
11071};
11072} // namespace
11073
11075 const OMPLoopDirective &D,
11076 ArrayRef<Expr *> NumIterations) {
11077 if (!CGF.HaveInsertPoint())
11078 return;
11079
11081 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11082 RecordDecl *RD;
11083 if (KmpDimTy.isNull()) {
11084 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11085 // kmp_int64 lo; // lower
11086 // kmp_int64 up; // upper
11087 // kmp_int64 st; // stride
11088 // };
11089 RD = C.buildImplicitRecord("kmp_dim");
11090 RD->startDefinition();
11091 addFieldToRecordDecl(C, RD, Int64Ty);
11092 addFieldToRecordDecl(C, RD, Int64Ty);
11093 addFieldToRecordDecl(C, RD, Int64Ty);
11094 RD->completeDefinition();
11095 KmpDimTy = C.getRecordType(RD);
11096 } else {
11097 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11098 }
11099 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11100 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11102
11103 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11104 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11105 enum { LowerFD = 0, UpperFD, StrideFD };
11106 // Fill dims with data.
11107 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11108 LValue DimsLVal = CGF.MakeAddrLValue(
11109 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11110 // dims.upper = num_iterations;
11111 LValue UpperLVal = CGF.EmitLValueForField(
11112 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11113 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11114 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11115 Int64Ty, NumIterations[I]->getExprLoc());
11116 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11117 // dims.stride = 1;
11118 LValue StrideLVal = CGF.EmitLValueForField(
11119 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11120 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11121 StrideLVal);
11122 }
11123
11124 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11125 // kmp_int32 num_dims, struct kmp_dim * dims);
11126 llvm::Value *Args[] = {
11128 getThreadID(CGF, D.getBeginLoc()),
11129 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11131 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11132 CGM.VoidPtrTy)};
11133
11134 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11135 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11136 CGF.EmitRuntimeCall(RTLFn, Args);
11137 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11138 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11139 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11140 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11141 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11142 llvm::ArrayRef(FiniArgs));
11143}
11144
11145template <typename T>
11147 const T *C, llvm::Value *ULoc,
11148 llvm::Value *ThreadID) {
11149 QualType Int64Ty =
11150 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11151 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11153 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11154 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11155 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11156 const Expr *CounterVal = C->getLoopData(I);
11157 assert(CounterVal);
11158 llvm::Value *CntVal = CGF.EmitScalarConversion(
11159 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11160 CounterVal->getExprLoc());
11161 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11162 /*Volatile=*/false, Int64Ty);
11163 }
11164 llvm::Value *Args[] = {
11165 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11166 llvm::FunctionCallee RTLFn;
11167 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11168 OMPDoacrossKind<T> ODK;
11169 if (ODK.isSource(C)) {
11170 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11171 OMPRTL___kmpc_doacross_post);
11172 } else {
11173 assert(ODK.isSink(C) && "Expect sink modifier.");
11174 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11175 OMPRTL___kmpc_doacross_wait);
11176 }
11177 CGF.EmitRuntimeCall(RTLFn, Args);
11178}
11179
11181 const OMPDependClause *C) {
11182 return EmitDoacrossOrdered<OMPDependClause>(
11183 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11184 getThreadID(CGF, C->getBeginLoc()));
11185}
11186
11188 const OMPDoacrossClause *C) {
11189 return EmitDoacrossOrdered<OMPDoacrossClause>(
11190 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11191 getThreadID(CGF, C->getBeginLoc()));
11192}
11193
11195 llvm::FunctionCallee Callee,
11196 ArrayRef<llvm::Value *> Args) const {
11197 assert(Loc.isValid() && "Outlined function call location must be valid.");
11199
11200 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11201 if (Fn->doesNotThrow()) {
11202 CGF.EmitNounwindRuntimeCall(Fn, Args);
11203 return;
11204 }
11205 }
11206 CGF.EmitRuntimeCall(Callee, Args);
11207}
11208
11210 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11211 ArrayRef<llvm::Value *> Args) const {
11212 emitCall(CGF, Loc, OutlinedFn, Args);
11213}
11214
11216 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11217 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11219}
11220
11222 const VarDecl *NativeParam,
11223 const VarDecl *TargetParam) const {
11224 return CGF.GetAddrOfLocalVar(NativeParam);
11225}
11226
11227/// Return allocator value from expression, or return a null allocator (default
11228/// when no allocator specified).
11229static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11230 const Expr *Allocator) {
11231 llvm::Value *AllocVal;
11232 if (Allocator) {
11233 AllocVal = CGF.EmitScalarExpr(Allocator);
11234 // According to the standard, the original allocator type is a enum
11235 // (integer). Convert to pointer type, if required.
11236 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11237 CGF.getContext().VoidPtrTy,
11238 Allocator->getExprLoc());
11239 } else {
11240 // If no allocator specified, it defaults to the null allocator.
11241 AllocVal = llvm::Constant::getNullValue(
11243 }
11244 return AllocVal;
11245}
11246
11247/// Return the alignment from an allocate directive if present.
11248static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11249 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11250
11251 if (!AllocateAlignment)
11252 return nullptr;
11253
11254 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11255}
11256
11258 const VarDecl *VD) {
11259 if (!VD)
11260 return Address::invalid();
11261 Address UntiedAddr = Address::invalid();
11262 Address UntiedRealAddr = Address::invalid();
11263 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11264 if (It != FunctionToUntiedTaskStackMap.end()) {
11265 const UntiedLocalVarsAddressesMap &UntiedData =
11266 UntiedLocalVarsStack[It->second];
11267 auto I = UntiedData.find(VD);
11268 if (I != UntiedData.end()) {
11269 UntiedAddr = I->second.first;
11270 UntiedRealAddr = I->second.second;
11271 }
11272 }
11273 const VarDecl *CVD = VD->getCanonicalDecl();
11274 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11275 // Use the default allocation.
11276 if (!isAllocatableDecl(VD))
11277 return UntiedAddr;
11278 llvm::Value *Size;
11279 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11280 if (CVD->getType()->isVariablyModifiedType()) {
11281 Size = CGF.getTypeSize(CVD->getType());
11282 // Align the size: ((size + align - 1) / align) * align
11283 Size = CGF.Builder.CreateNUWAdd(
11284 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11285 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11286 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11287 } else {
11289 Size = CGM.getSize(Sz.alignTo(Align));
11290 }
11291 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11292 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11293 const Expr *Allocator = AA->getAllocator();
11294 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11295 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11297 Args.push_back(ThreadID);
11298 if (Alignment)
11299 Args.push_back(Alignment);
11300 Args.push_back(Size);
11301 Args.push_back(AllocVal);
11302 llvm::omp::RuntimeFunction FnID =
11303 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11304 llvm::Value *Addr = CGF.EmitRuntimeCall(
11305 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11306 getName({CVD->getName(), ".void.addr"}));
11307 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11308 CGM.getModule(), OMPRTL___kmpc_free);
11311 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11312 if (UntiedAddr.isValid())
11313 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11314
11315 // Cleanup action for allocate support.
11316 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11317 llvm::FunctionCallee RTLFn;
11318 SourceLocation::UIntTy LocEncoding;
11319 Address Addr;
11320 const Expr *AllocExpr;
11321
11322 public:
11323 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11324 SourceLocation::UIntTy LocEncoding, Address Addr,
11325 const Expr *AllocExpr)
11326 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11327 AllocExpr(AllocExpr) {}
11328 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11329 if (!CGF.HaveInsertPoint())
11330 return;
11331 llvm::Value *Args[3];
11332 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11333 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11335 Addr.getPointer(), CGF.VoidPtrTy);
11336 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11337 Args[2] = AllocVal;
11338 CGF.EmitRuntimeCall(RTLFn, Args);
11339 }
11340 };
11341 Address VDAddr =
11342 UntiedRealAddr.isValid()
11343 ? UntiedRealAddr
11344 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11345 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11346 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11347 VDAddr, Allocator);
11348 if (UntiedRealAddr.isValid())
11349 if (auto *Region =
11350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11351 Region->emitUntiedSwitch(CGF);
11352 return VDAddr;
11353 }
11354 return UntiedAddr;
11355}
11356
11358 const VarDecl *VD) const {
11359 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11360 if (It == FunctionToUntiedTaskStackMap.end())
11361 return false;
11362 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11363}
11364
11367 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11368 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11369 if (!NeedToPush)
11370 return;
11372 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11373 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11374 for (const Stmt *Ref : C->private_refs()) {
11375 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11376 const ValueDecl *VD;
11377 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11378 VD = DRE->getDecl();
11379 } else {
11380 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11381 assert((ME->isImplicitCXXThis() ||
11382 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11383 "Expected member of current class.");
11384 VD = ME->getMemberDecl();
11385 }
11386 DS.insert(VD);
11387 }
11388 }
11389}
11390
11392 if (!NeedToPush)
11393 return;
11395}
11396
11398 CodeGenFunction &CGF,
11399 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11400 std::pair<Address, Address>> &LocalVars)
11401 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11402 if (!NeedToPush)
11403 return;
11405 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11406 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11407}
11408
11410 if (!NeedToPush)
11411 return;
11413}
11414
11416 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11417
11418 return llvm::any_of(
11420 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11421}
11422
11423void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11424 const OMPExecutableDirective &S,
11425 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11426 const {
11428 // Vars in target/task regions must be excluded completely.
11429 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11430 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11432 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11433 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11434 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11435 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11436 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11437 }
11438 }
11439 // Exclude vars in private clauses.
11440 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11441 for (const Expr *Ref : C->varlists()) {
11442 if (!Ref->getType()->isScalarType())
11443 continue;
11444 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11445 if (!DRE)
11446 continue;
11447 NeedToCheckForLPCs.insert(DRE->getDecl());
11448 }
11449 }
11450 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11451 for (const Expr *Ref : C->varlists()) {
11452 if (!Ref->getType()->isScalarType())
11453 continue;
11454 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11455 if (!DRE)
11456 continue;
11457 NeedToCheckForLPCs.insert(DRE->getDecl());
11458 }
11459 }
11460 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11461 for (const Expr *Ref : C->varlists()) {
11462 if (!Ref->getType()->isScalarType())
11463 continue;
11464 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11465 if (!DRE)
11466 continue;
11467 NeedToCheckForLPCs.insert(DRE->getDecl());
11468 }
11469 }
11470 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11471 for (const Expr *Ref : C->varlists()) {
11472 if (!Ref->getType()->isScalarType())
11473 continue;
11474 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11475 if (!DRE)
11476 continue;
11477 NeedToCheckForLPCs.insert(DRE->getDecl());
11478 }
11479 }
11480 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11481 for (const Expr *Ref : C->varlists()) {
11482 if (!Ref->getType()->isScalarType())
11483 continue;
11484 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11485 if (!DRE)
11486 continue;
11487 NeedToCheckForLPCs.insert(DRE->getDecl());
11488 }
11489 }
11490 for (const Decl *VD : NeedToCheckForLPCs) {
11491 for (const LastprivateConditionalData &Data :
11493 if (Data.DeclToUniqueName.count(VD) > 0) {
11494 if (!Data.Disabled)
11495 NeedToAddForLPCsAsDisabled.insert(VD);
11496 break;
11497 }
11498 }
11499 }
11500}
11501
11502CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11503 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11504 : CGM(CGF.CGM),
11505 Action((CGM.getLangOpts().OpenMP >= 50 &&
11506 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11507 [](const OMPLastprivateClause *C) {
11508 return C->getKind() ==
11509 OMPC_LASTPRIVATE_conditional;
11510 }))
11511 ? ActionToDo::PushAsLastprivateConditional
11512 : ActionToDo::DoNotPush) {
11513 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11514 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11515 return;
11516 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11517 "Expected a push action.");
11520 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11521 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11522 continue;
11523
11524 for (const Expr *Ref : C->varlists()) {
11525 Data.DeclToUniqueName.insert(std::make_pair(
11526 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11527 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11528 }
11529 }
11530 Data.IVLVal = IVLVal;
11531 Data.Fn = CGF.CurFn;
11532}
11533
11534CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11536 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11537 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11538 if (CGM.getLangOpts().OpenMP < 50)
11539 return;
11540 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11541 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11542 if (!NeedToAddForLPCsAsDisabled.empty()) {
11543 Action = ActionToDo::DisableLastprivateConditional;
11544 LastprivateConditionalData &Data =
11545 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11546 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11547 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11548 Data.Fn = CGF.CurFn;
11549 Data.Disabled = true;
11550 }
11551}
11552
11555 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11556 return LastprivateConditionalRAII(CGF, S);
11557}
11558
11560 if (CGM.getLangOpts().OpenMP < 50)
11561 return;
11562 if (Action == ActionToDo::DisableLastprivateConditional) {
11563 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11564 "Expected list of disabled private vars.");
11566 }
11567 if (Action == ActionToDo::PushAsLastprivateConditional) {
11568 assert(
11570 "Expected list of lastprivate conditional vars.");
11572 }
11573}
11574
11576 const VarDecl *VD) {
11578 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11579 if (I == LastprivateConditionalToTypes.end())
11580 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11581 QualType NewType;
11582 const FieldDecl *VDField;
11583 const FieldDecl *FiredField;
11584 LValue BaseLVal;
11585 auto VI = I->getSecond().find(VD);
11586 if (VI == I->getSecond().end()) {
11587 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11588 RD->startDefinition();
11589 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11590 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11591 RD->completeDefinition();
11592 NewType = C.getRecordType(RD);
11593 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11594 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11595 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11596 } else {
11597 NewType = std::get<0>(VI->getSecond());
11598 VDField = std::get<1>(VI->getSecond());
11599 FiredField = std::get<2>(VI->getSecond());
11600 BaseLVal = std::get<3>(VI->getSecond());
11601 }
11602 LValue FiredLVal =
11603 CGF.EmitLValueForField(BaseLVal, FiredField);
11605 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11606 FiredLVal);
11607 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11608}
11609
11610namespace {
11611/// Checks if the lastprivate conditional variable is referenced in LHS.
11612class LastprivateConditionalRefChecker final
11613 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11615 const Expr *FoundE = nullptr;
11616 const Decl *FoundD = nullptr;
11617 StringRef UniqueDeclName;
11618 LValue IVLVal;
11619 llvm::Function *FoundFn = nullptr;
11620 SourceLocation Loc;
11621
11622public:
11623 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11625 llvm::reverse(LPM)) {
11626 auto It = D.DeclToUniqueName.find(E->getDecl());
11627 if (It == D.DeclToUniqueName.end())
11628 continue;
11629 if (D.Disabled)
11630 return false;
11631 FoundE = E;
11632 FoundD = E->getDecl()->getCanonicalDecl();
11633 UniqueDeclName = It->second;
11634 IVLVal = D.IVLVal;
11635 FoundFn = D.Fn;
11636 break;
11637 }
11638 return FoundE == E;
11639 }
11640 bool VisitMemberExpr(const MemberExpr *E) {
11641 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11642 return false;
11644 llvm::reverse(LPM)) {
11645 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11646 if (It == D.DeclToUniqueName.end())
11647 continue;
11648 if (D.Disabled)
11649 return false;
11650 FoundE = E;
11651 FoundD = E->getMemberDecl()->getCanonicalDecl();
11652 UniqueDeclName = It->second;
11653 IVLVal = D.IVLVal;
11654 FoundFn = D.Fn;
11655 break;
11656 }
11657 return FoundE == E;
11658 }
11659 bool VisitStmt(const Stmt *S) {
11660 for (const Stmt *Child : S->children()) {
11661 if (!Child)
11662 continue;
11663 if (const auto *E = dyn_cast<Expr>(Child))
11664 if (!E->isGLValue())
11665 continue;
11666 if (Visit(Child))
11667 return true;
11668 }
11669 return false;
11670 }
11671 explicit LastprivateConditionalRefChecker(
11673 : LPM(LPM) {}
11674 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11675 getFoundData() const {
11676 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11677 }
11678};
11679} // namespace
11680
11682 LValue IVLVal,
11683 StringRef UniqueDeclName,
11684 LValue LVal,
11685 SourceLocation Loc) {
11686 // Last updated loop counter for the lastprivate conditional var.
11687 // int<xx> last_iv = 0;
11688 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11689 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11690 LLIVTy, getName({UniqueDeclName, "iv"}));
11691 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11692 IVLVal.getAlignment().getAsAlign());
11693 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11694
11695 // Last value of the lastprivate conditional.
11696 // decltype(priv_a) last_a;
11697 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11698 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11699 Last->setAlignment(LVal.getAlignment().getAsAlign());
11700 LValue LastLVal = CGF.MakeAddrLValue(
11701 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11702
11703 // Global loop counter. Required to handle inner parallel-for regions.
11704 // iv
11705 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11706
11707 // #pragma omp critical(a)
11708 // if (last_iv <= iv) {
11709 // last_iv = iv;
11710 // last_a = priv_a;
11711 // }
11712 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11713 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11714 Action.Enter(CGF);
11715 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11716 // (last_iv <= iv) ? Check if the variable is updated and store new
11717 // value in global var.
11718 llvm::Value *CmpRes;
11719 if (IVLVal.getType()->isSignedIntegerType()) {
11720 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11721 } else {
11722 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11723 "Loop iteration variable must be integer.");
11724 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11725 }
11726 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11727 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11728 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11729 // {
11730 CGF.EmitBlock(ThenBB);
11731
11732 // last_iv = iv;
11733 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11734
11735 // last_a = priv_a;
11736 switch (CGF.getEvaluationKind(LVal.getType())) {
11737 case TEK_Scalar: {
11738 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11739 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11740 break;
11741 }
11742 case TEK_Complex: {
11743 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11744 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11745 break;
11746 }
11747 case TEK_Aggregate:
11748 llvm_unreachable(
11749 "Aggregates are not supported in lastprivate conditional.");
11750 }
11751 // }
11752 CGF.EmitBranch(ExitBB);
11753 // There is no need to emit line number for unconditional branch.
11755 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11756 };
11757
11758 if (CGM.getLangOpts().OpenMPSimd) {
11759 // Do not emit as a critical region as no parallel region could be emitted.
11760 RegionCodeGenTy ThenRCG(CodeGen);
11761 ThenRCG(CGF);
11762 } else {
11763 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11764 }
11765}
11766
11768 const Expr *LHS) {
11769 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11770 return;
11771 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11772 if (!Checker.Visit(LHS))
11773 return;
11774 const Expr *FoundE;
11775 const Decl *FoundD;
11776 StringRef UniqueDeclName;
11777 LValue IVLVal;
11778 llvm::Function *FoundFn;
11779 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11780 Checker.getFoundData();
11781 if (FoundFn != CGF.CurFn) {
11782 // Special codegen for inner parallel regions.
11783 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11784 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11785 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11786 "Lastprivate conditional is not found in outer region.");
11787 QualType StructTy = std::get<0>(It->getSecond());
11788 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11789 LValue PrivLVal = CGF.EmitLValue(FoundE);
11791 PrivLVal.getAddress(CGF),
11792 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11793 CGF.ConvertTypeForMem(StructTy));
11794 LValue BaseLVal =
11795 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11796 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11797 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11798 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11799 FiredLVal, llvm::AtomicOrdering::Unordered,
11800 /*IsVolatile=*/true, /*isInit=*/false);
11801 return;
11802 }
11803
11804 // Private address of the lastprivate conditional in the current context.
11805 // priv_a
11806 LValue LVal = CGF.EmitLValue(FoundE);
11807 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11808 FoundE->getExprLoc());
11809}
11810
11813 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11814 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11815 return;
11816 auto Range = llvm::reverse(LastprivateConditionalStack);
11817 auto It = llvm::find_if(
11818 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11819 if (It == Range.end() || It->Fn != CGF.CurFn)
11820 return;
11821 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11822 assert(LPCI != LastprivateConditionalToTypes.end() &&
11823 "Lastprivates must be registered already.");
11825 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11826 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11827 for (const auto &Pair : It->DeclToUniqueName) {
11828 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11829 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11830 continue;
11831 auto I = LPCI->getSecond().find(Pair.first);
11832 assert(I != LPCI->getSecond().end() &&
11833 "Lastprivate must be rehistered already.");
11834 // bool Cmp = priv_a.Fired != 0;
11835 LValue BaseLVal = std::get<3>(I->getSecond());
11836 LValue FiredLVal =
11837 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11838 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11839 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11840 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11841 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11842 // if (Cmp) {
11843 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11844 CGF.EmitBlock(ThenBB);
11845 Address Addr = CGF.GetAddrOfLocalVar(VD);
11846 LValue LVal;
11847 if (VD->getType()->isReferenceType())
11848 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11850 else
11851 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11853 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11854 D.getBeginLoc());
11856 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11857 // }
11858 }
11859}
11860
11862 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11863 SourceLocation Loc) {
11864 if (CGF.getLangOpts().OpenMP < 50)
11865 return;
11866 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11867 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11868 "Unknown lastprivate conditional variable.");
11869 StringRef UniqueName = It->second;
11870 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11871 // The variable was not updated in the region - exit.
11872 if (!GV)
11873 return;
11874 LValue LPLVal = CGF.MakeAddrLValue(
11875 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11876 PrivLVal.getType().getNonReferenceType());
11877 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11878 CGF.EmitStoreOfScalar(Res, PrivLVal);
11879}
11880
11883 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11884 const RegionCodeGenTy &CodeGen) {
11885 llvm_unreachable("Not supported in SIMD-only mode");
11886}
11887
11890 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11891 const RegionCodeGenTy &CodeGen) {
11892 llvm_unreachable("Not supported in SIMD-only mode");
11893}
11894
11896 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11897 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11898 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11899 bool Tied, unsigned &NumberOfParts) {
11900 llvm_unreachable("Not supported in SIMD-only mode");
11901}
11902
11904 SourceLocation Loc,
11905 llvm::Function *OutlinedFn,
11906 ArrayRef<llvm::Value *> CapturedVars,
11907 const Expr *IfCond,
11908 llvm::Value *NumThreads) {
11909 llvm_unreachable("Not supported in SIMD-only mode");
11910}
11911
11913 CodeGenFunction &CGF, StringRef CriticalName,
11914 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11915 const Expr *Hint) {
11916 llvm_unreachable("Not supported in SIMD-only mode");
11917}
11918
11920 const RegionCodeGenTy &MasterOpGen,
11921 SourceLocation Loc) {
11922 llvm_unreachable("Not supported in SIMD-only mode");
11923}
11924
11926 const RegionCodeGenTy &MasterOpGen,
11927 SourceLocation Loc,
11928 const Expr *Filter) {
11929 llvm_unreachable("Not supported in SIMD-only mode");
11930}
11931
11933 SourceLocation Loc) {
11934 llvm_unreachable("Not supported in SIMD-only mode");
11935}
11936
11938 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11939 SourceLocation Loc) {
11940 llvm_unreachable("Not supported in SIMD-only mode");
11941}
11942
11944 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11945 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11947 ArrayRef<const Expr *> AssignmentOps) {
11948 llvm_unreachable("Not supported in SIMD-only mode");
11949}
11950
11952 const RegionCodeGenTy &OrderedOpGen,
11953 SourceLocation Loc,
11954 bool IsThreads) {
11955 llvm_unreachable("Not supported in SIMD-only mode");
11956}
11957
11959 SourceLocation Loc,
11961 bool EmitChecks,
11962 bool ForceSimpleCall) {
11963 llvm_unreachable("Not supported in SIMD-only mode");
11964}
11965
11968 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11969 bool Ordered, const DispatchRTInput &DispatchValues) {
11970 llvm_unreachable("Not supported in SIMD-only mode");
11971}
11972
11975 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11976 llvm_unreachable("Not supported in SIMD-only mode");
11977}
11978
11981 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11982 llvm_unreachable("Not supported in SIMD-only mode");
11983}
11984
11986 SourceLocation Loc,
11987 unsigned IVSize,
11988 bool IVSigned) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11990}
11991
11993 SourceLocation Loc,
11994 OpenMPDirectiveKind DKind) {
11995 llvm_unreachable("Not supported in SIMD-only mode");
11996}
11997
11999 SourceLocation Loc,
12000 unsigned IVSize, bool IVSigned,
12001 Address IL, Address LB,
12002 Address UB, Address ST) {
12003 llvm_unreachable("Not supported in SIMD-only mode");
12004}
12005
12007 llvm::Value *NumThreads,
12008 SourceLocation Loc) {
12009 llvm_unreachable("Not supported in SIMD-only mode");
12010}
12011
12013 ProcBindKind ProcBind,
12014 SourceLocation Loc) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12019 const VarDecl *VD,
12020 Address VDAddr,
12021 SourceLocation Loc) {
12022 llvm_unreachable("Not supported in SIMD-only mode");
12023}
12024
12026 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12027 CodeGenFunction *CGF) {
12028 llvm_unreachable("Not supported in SIMD-only mode");
12029}
12030
12032 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12033 llvm_unreachable("Not supported in SIMD-only mode");
12034}
12035
12038 SourceLocation Loc,
12039 llvm::AtomicOrdering AO) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12041}
12042
12044 const OMPExecutableDirective &D,
12045 llvm::Function *TaskFunction,
12046 QualType SharedsTy, Address Shareds,
12047 const Expr *IfCond,
12048 const OMPTaskDataTy &Data) {
12049 llvm_unreachable("Not supported in SIMD-only mode");
12050}
12051
12054 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12055 const Expr *IfCond, const OMPTaskDataTy &Data) {
12056 llvm_unreachable("Not supported in SIMD-only mode");
12057}
12058
12062 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12063 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12064 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12065 ReductionOps, Options);
12066}
12067
12070 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12071 llvm_unreachable("Not supported in SIMD-only mode");
12072}
12073
12075 SourceLocation Loc,
12076 bool IsWorksharingReduction) {
12077 llvm_unreachable("Not supported in SIMD-only mode");
12078}
12079
12081 SourceLocation Loc,
12082 ReductionCodeGen &RCG,
12083 unsigned N) {
12084 llvm_unreachable("Not supported in SIMD-only mode");
12085}
12086
12088 SourceLocation Loc,
12089 llvm::Value *ReductionsPtr,
12090 LValue SharedLVal) {
12091 llvm_unreachable("Not supported in SIMD-only mode");
12092}
12093
12095 SourceLocation Loc,
12096 const OMPTaskDataTy &Data) {
12097 llvm_unreachable("Not supported in SIMD-only mode");
12098}
12099
12102 OpenMPDirectiveKind CancelRegion) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12104}
12105
12107 SourceLocation Loc, const Expr *IfCond,
12108 OpenMPDirectiveKind CancelRegion) {
12109 llvm_unreachable("Not supported in SIMD-only mode");
12110}
12111
12113 const OMPExecutableDirective &D, StringRef ParentName,
12114 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12115 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12116 llvm_unreachable("Not supported in SIMD-only mode");
12117}
12118
12121 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12122 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12123 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12124 const OMPLoopDirective &D)>
12125 SizeEmitter) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12127}
12128
12130 llvm_unreachable("Not supported in SIMD-only mode");
12131}
12132
12134 llvm_unreachable("Not supported in SIMD-only mode");
12135}
12136
12138 return false;
12139}
12140
12142 const OMPExecutableDirective &D,
12143 SourceLocation Loc,
12144 llvm::Function *OutlinedFn,
12145 ArrayRef<llvm::Value *> CapturedVars) {
12146 llvm_unreachable("Not supported in SIMD-only mode");
12147}
12148
12150 const Expr *NumTeams,
12151 const Expr *ThreadLimit,
12152 SourceLocation Loc) {
12153 llvm_unreachable("Not supported in SIMD-only mode");
12154}
12155
12157 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12158 const Expr *Device, const RegionCodeGenTy &CodeGen,
12160 llvm_unreachable("Not supported in SIMD-only mode");
12161}
12162
12164 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12165 const Expr *Device) {
12166 llvm_unreachable("Not supported in SIMD-only mode");
12167}
12168
12170 const OMPLoopDirective &D,
12171 ArrayRef<Expr *> NumIterations) {
12172 llvm_unreachable("Not supported in SIMD-only mode");
12173}
12174
12176 const OMPDependClause *C) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12178}
12179
12181 const OMPDoacrossClause *C) {
12182 llvm_unreachable("Not supported in SIMD-only mode");
12183}
12184
12185const VarDecl *
12187 const VarDecl *NativeParam) const {
12188 llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12191Address
12193 const VarDecl *NativeParam,
12194 const VarDecl *TargetParam) const {
12195 llvm_unreachable("Not supported in SIMD-only mode");
12196}
#define V(N, I)
Definition: ASTContext.h:3259
StringRef P
#define SM(sm)
Definition: Cuda.cpp:82
Provides LLVM's BitmaskEnum facility to enumeration types declared in namespace clang.
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Defines the clang::FileManager interface and associated types.
int Priority
Definition: Format.cpp:2956
#define X(type, name)
Definition: Value.h:142
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
SourceManager & getSourceManager()
Definition: ASTContext.h:700
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2742
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2565
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1113
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:770
CanQualType BoolTy
Definition: ASTContext.h:1087
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1086
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2745
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:752
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3147
Attr - This represents one attribute.
Definition: Attr.h:42
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2528
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2792
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2053
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2179
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2203
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:618
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1021
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1640
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:612
base_class_range vbases()
Definition: DeclCXX.h:635
capture_const_range captures() const
Definition: DeclCXX.h:1100
ctor_range ctors() const
Definition: DeclCXX.h:680
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:1974
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:349
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3768
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3802
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1305
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3808
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3796
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3799
This captures a statement into a function.
Definition: Stmt.h:3755
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3906
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3876
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3859
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1431
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3901
capture_range captures()
Definition: Stmt.h:3893
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
An aligned address.
Definition: Address.h:29
static Address invalid()
Definition: Address.h:46
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:78
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:62
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:85
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:100
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:93
llvm::Value * getPointer() const
Definition: Address.h:51
bool isValid() const
Definition: Address.h:47
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:57
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:863
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:870
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:880
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:97
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:156
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:193
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:71
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:297
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:230
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:246
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:117
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
virtual Address getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits Callee function call with arguments Args with location Loc.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:308
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:654
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:627
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2777
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:436
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1625
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:674
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T, bool ForBitField=false)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:716
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:576
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:272
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:171
CharUnits getAlignment() const
Definition: CGValue.h:331
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:350
const Qualifiers & getQuals() const
Definition: CGValue.h:326
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:334
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:279
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:323
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:39
static RValue get(llvm::Value *V)
Definition: CGValue.h:89
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:96
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:75
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1446
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1699
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
ValueDecl * getDecl()
Definition: Expr.h:1328
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:85
T * getAttr() const
Definition: DeclBase.h:578
bool hasAttrs() const
Definition: DeclBase.h:523
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:501
void addAttr(Attr *A)
Definition: DeclBase.cpp:975
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:564
SourceLocation getLocation() const
Definition: DeclBase.h:444
DeclContext * getDeclContext()
Definition: DeclBase.h:453
AttrVec & getAttrs()
Definition: DeclBase.h:529
bool hasAttr() const
Definition: DeclBase.h:582
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:822
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:873
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3050
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3045
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3542
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3892
Represents a member of a struct/union/class.
Definition: Decl.h:3025
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4512
Represents a function declaration or definition.
Definition: Decl.h:1959
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2674
QualType getReturnType() const
Definition: Decl.h:2722
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2651
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3582
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3657
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5338
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:957
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:496
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:492
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3182
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3261
Expr * getBase() const
Definition: Expr.h:3255
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
bool isExternallyVisible() const
Definition: Decl.h:408
This represents clause 'affinity' in the '#pragma omp task'-based directives.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5049
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition: DeclOpenMP.h:349
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:249
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:226
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:223
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:220
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:246
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:241
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:556
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive.
Definition: StmtOpenMP.h:547
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:569
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:502
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind.
Definition: StmtOpenMP.h:496
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:504
static const SpecificClause * getSingleClause(ArrayRef< OMPClause * > Clauses)
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:477
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:459
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:527
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:596
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:275
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition: Expr.cpp:5232
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition: ExprOpenMP.h:399
Decl * getIteratorDecl(unsigned I)
Gets the iterator declaration for the given iterator.
Definition: Expr.cpp:5189
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1018
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:1376
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:1368
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:1360
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:676
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:416
clauselist_range clauselists()
Definition: DeclOpenMP.h:441
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1168
Represents a parameter to a function.
Definition: Decl.h:1749
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2898
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:737
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:964
QualType withRestrict() const
Definition: Type.h:967
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:804
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6902
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:6942
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7102
QualType getCanonicalType() const
Definition: Type.h:6954
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1323
Represents a struct/union/class.
Definition: Decl.h:4133
field_iterator field_end() const
Definition: Decl.h:4342
field_range fields() const
Definition: Decl.h:4339
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5047
bool field_empty() const
Definition: Decl.h:4347
field_iterator field_begin() const
Definition: Decl.cpp:5035
RecordDecl * getDecl() const
Definition: Type.h:5102
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:204
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:226
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3009
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:197
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4704
bool isUnion() const
Definition: Decl.h:3755
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1525
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1436
The base class of the type hierarchy.
Definition: Type.h:1606
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1819
bool isVoidType() const
Definition: Type.h:7443
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2104
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:7614
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2083
bool isArrayType() const
Definition: Type.h:7220
bool isPointerType() const
Definition: Type.h:7154
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7479
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:7724
bool isReferenceType() const
Definition: Type.h:7166
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:651
bool isLValueReferenceType() const
Definition: Type.h:7170
QualType getCanonicalTypeInternal() const
Definition: Type.h:2703
const RecordType * getAsStructureType() const
Definition: Type.cpp:667
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:7607
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2438
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:7710
bool isFloatingType() const
Definition: Type.cpp:2186
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2133
bool isAnyPointerType() const
Definition: Type.h:7158
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:7657
bool isRecordType() const
Definition: Type.h:7244
bool isUnionType() const
Definition: Type.cpp:621
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1827
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1823
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
Represents a variable declaration or definition.
Definition: Decl.h:918
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2254
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2363
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1201
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1168
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1279
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2372
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1246
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1342
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3290
Expr * getSizeExpr() const
Definition: Type.h:3309
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:33
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
@ NotKnownNonNull
Definition: Address.h:26
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
@ Device
'device' clause, allowed on the 'update' construct.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:24
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:269
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:38
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:43
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:39
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:103
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:54
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:58
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:132
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:136
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:131
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:50
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:78
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:79
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:30
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:34
@ AS_public
Definition: Specifiers.h:121
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:70
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:74
unsigned long uint64_t
YAML serialization mapping.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:22
#define bool
Definition: stdbool.h:20
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:4278
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:235
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:245
Expr * Upper
Normalized upper bound.
Definition: ExprOpenMP.h:240
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:243
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:237
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:179
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:181
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:180
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57