clang 23.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateZExtOrTrunc(Size, ElemSizeOf->getType());
819 Size = CGF.Builder.CreateNUWAdd(
820 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
821 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
822 } else {
823 SizeInChars =
824 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
825 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
826 }
827 Sizes.emplace_back(SizeInChars, Size);
829 CGF,
831 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
832 RValue::get(Size));
833 CGF.EmitVariablyModifiedType(PrivateType);
834}
835
837 llvm::Value *Size) {
838 QualType PrivateType = getPrivateType(N);
839 if (!PrivateType->isVariablyModifiedType()) {
840 assert(!Size && !Sizes[N].second &&
841 "Size should be nullptr for non-variably modified reduction "
842 "items.");
843 return;
844 }
846 CGF,
848 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
849 RValue::get(Size));
850 CGF.EmitVariablyModifiedType(PrivateType);
851}
852
854 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
855 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
856 assert(SharedAddresses.size() > N && "No variable was generated");
857 const auto *PrivateVD =
858 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
859 const OMPDeclareReductionDecl *DRD =
860 getReductionInit(ClausesData[N].ReductionOp);
861 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
862 if (DRD && DRD->getInitializer())
863 (void)DefaultInit(CGF);
864 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
865 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
866 (void)DefaultInit(CGF);
867 QualType SharedType = SharedAddresses[N].first.getType();
868 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
869 PrivateAddr, SharedAddr, SharedType);
870 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
871 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
872 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
873 PrivateVD->getType().getQualifiers(),
874 /*IsInitializer=*/false);
875 }
876}
877
879 QualType PrivateType = getPrivateType(N);
880 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
881 return DTorKind != QualType::DK_none;
882}
883
885 Address PrivateAddr) {
886 QualType PrivateType = getPrivateType(N);
887 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
888 if (needCleanups(N)) {
889 PrivateAddr =
890 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
891 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
892 }
893}
894
895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
896 LValue BaseLV) {
897 BaseTy = BaseTy.getNonReferenceType();
898 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
899 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
900 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
901 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
902 } else {
903 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
904 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 }
906 BaseTy = BaseTy->getPointeeType();
907 }
908 return CGF.MakeAddrLValue(
909 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
910 BaseLV.getType(), BaseLV.getBaseInfo(),
911 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
912}
913
915 Address OriginalBaseAddress, llvm::Value *Addr) {
917 Address TopTmp = Address::invalid();
918 Address MostTopTmp = Address::invalid();
919 BaseTy = BaseTy.getNonReferenceType();
920 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
921 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
922 Tmp = CGF.CreateMemTempWithoutCast(BaseTy);
923 if (TopTmp.isValid())
924 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
925 else
926 MostTopTmp = Tmp;
927 TopTmp = Tmp;
928 BaseTy = BaseTy->getPointeeType();
929 }
930
931 if (Tmp.isValid()) {
933 Addr, Tmp.getElementType());
934 CGF.Builder.CreateStore(Addr, Tmp);
935 return MostTopTmp;
936 }
937
939 Addr, OriginalBaseAddress.getType());
940 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
941}
942
943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
944 const VarDecl *OrigVD = nullptr;
945 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
946 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
948 Base = TempOASE->getBase()->IgnoreParenImpCasts();
949 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
950 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 OrigVD = cast<VarDecl>(DE->getDecl());
953 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
954 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 OrigVD = cast<VarDecl>(DE->getDecl());
959 }
960 return OrigVD;
961}
962
964 Address PrivateAddr) {
965 const DeclRefExpr *DE;
966 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
967 BaseDecls.emplace_back(OrigVD);
968 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
969 LValue BaseLValue =
970 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
971 OriginalBaseLValue);
972 Address SharedAddr = SharedAddresses[N].first.getAddress();
973 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
974 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
975 SharedAddr.emitRawPointer(CGF));
976 llvm::Value *PrivatePointer =
978 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
979 llvm::Value *Ptr = CGF.Builder.CreateGEP(
980 SharedAddr.getElementType(), PrivatePointer, Adjustment);
981 return castToBase(CGF, OrigVD->getType(),
982 SharedAddresses[N].first.getType(),
983 OriginalBaseLValue.getAddress(), Ptr);
984 }
985 BaseDecls.emplace_back(
986 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
987 return PrivateAddr;
988}
989
991 const OMPDeclareReductionDecl *DRD =
992 getReductionInit(ClausesData[N].ReductionOp);
993 return DRD && DRD->getInitializer();
994}
995
996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
997 return CGF.EmitLoadOfPointerLValue(
998 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
999 getThreadIDVariable()->getType()->castAs<PointerType>());
1000}
1001
1002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1003 if (!CGF.HaveInsertPoint())
1004 return;
1005 // 1.2.2 OpenMP Language Terminology
1006 // Structured block - An executable statement with a single entry at the
1007 // top and a single exit at the bottom.
1008 // The point of exit cannot be a branch out of the structured block.
1009 // longjmp() and throw() must not violate the entry/exit criteria.
1010 CGF.EHStack.pushTerminate();
1011 if (S)
1013 CodeGen(CGF);
1014 CGF.EHStack.popTerminate();
1015}
1016
1017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1018 CodeGenFunction &CGF) {
1019 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1020 getThreadIDVariable()->getType(),
1022}
1023
1025 QualType FieldTy) {
1026 auto *Field = FieldDecl::Create(
1027 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1028 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1029 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1030 Field->setAccess(AS_public);
1031 DC->addDecl(Field);
1032 return Field;
1033}
1034
1036 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1037 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1038 llvm::OpenMPIRBuilderConfig Config(
1039 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1040 CGM.getLangOpts().OpenMPOffloadMandatory,
1041 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1042 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1043 Config.setDefaultTargetAS(
1044 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1045 Config.setRuntimeCC(CGM.getRuntimeCC());
1046
1047 OMPBuilder.setConfig(Config);
1048 OMPBuilder.initialize();
1049 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1050 CGM.getLangOpts().OpenMPIsTargetDevice
1051 ? CGM.getLangOpts().OMPHostIRFile
1052 : StringRef{});
1053
1054 // The user forces the compiler to behave as if omp requires
1055 // unified_shared_memory was given.
1056 if (CGM.getLangOpts().OpenMPForceUSM) {
1058 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1059 }
1060}
1061
1063 InternalVars.clear();
1064 // Clean non-target variable declarations possibly used only in debug info.
1065 for (const auto &Data : EmittedNonTargetVariables) {
1066 if (!Data.getValue().pointsToAliveValue())
1067 continue;
1068 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1069 if (!GV)
1070 continue;
1071 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1072 continue;
1073 GV->eraseFromParent();
1074 }
1075}
1076
1078 return OMPBuilder.createPlatformSpecificName(Parts);
1079}
1080
1081static llvm::Function *
1083 const Expr *CombinerInitializer, const VarDecl *In,
1084 const VarDecl *Out, bool IsCombiner) {
1085 // void .omp_combiner.(Ty *in, Ty *out);
1086 ASTContext &C = CGM.getContext();
1087 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1088 auto *OmpOutParm = ImplicitParamDecl::Create(
1089 C, /*DC=*/nullptr, Out->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 auto *OmpInParm = ImplicitParamDecl::Create(
1092 C, /*DC=*/nullptr, In->getLocation(),
1093 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1094 FunctionArgList Args{OmpOutParm, OmpInParm};
1095 const CGFunctionInfo &FnInfo =
1096 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1097 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1098 std::string Name = CGM.getOpenMPRuntime().getName(
1099 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1100 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1101 Name, &CGM.getModule());
1102 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1103 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1104 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
1105 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1106 Fn->removeFnAttr(llvm::Attribute::NoInline);
1107 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1108 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1109 }
1110 CodeGenFunction CGF(CGM);
1111 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1112 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1113 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1114 Out->getLocation());
1116 Address AddrIn = CGF.GetAddrOfLocalVar(OmpInParm);
1117 Scope.addPrivate(
1118 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 Address AddrOut = CGF.GetAddrOfLocalVar(OmpOutParm);
1121 Scope.addPrivate(
1122 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1123 .getAddress());
1124 (void)Scope.Privatize();
1125 if (!IsCombiner && Out->hasInit() &&
1126 !CGF.isTrivialInitializer(Out->getInit())) {
1127 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1128 Out->getType().getQualifiers(),
1129 /*IsInitializer=*/true);
1130 }
1131 if (CombinerInitializer)
1132 CGF.EmitIgnoredExpr(CombinerInitializer);
1133 Scope.ForceCleanup();
1134 CGF.FinishFunction();
1135 return Fn;
1136}
1137
1140 if (UDRMap.count(D) > 0)
1141 return;
1142 llvm::Function *Combiner = emitCombinerOrInitializer(
1143 CGM, D->getType(), D->getCombiner(),
1146 /*IsCombiner=*/true);
1147 llvm::Function *Initializer = nullptr;
1148 if (const Expr *Init = D->getInitializer()) {
1150 CGM, D->getType(),
1152 : nullptr,
1155 /*IsCombiner=*/false);
1156 }
1157 UDRMap.try_emplace(D, Combiner, Initializer);
1158 if (CGF)
1159 FunctionUDRMap[CGF->CurFn].push_back(D);
1160}
1161
1162std::pair<llvm::Function *, llvm::Function *>
1164 auto I = UDRMap.find(D);
1165 if (I != UDRMap.end())
1166 return I->second;
1167 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1168 return UDRMap.lookup(D);
1169}
1170
1171namespace {
1172// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1173// Builder if one is present.
1174struct PushAndPopStackRAII {
1175 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1176 bool HasCancel, llvm::omp::Directive Kind)
1177 : OMPBuilder(OMPBuilder) {
1178 if (!OMPBuilder)
1179 return;
1180
1181 // The following callback is the crucial part of clangs cleanup process.
1182 //
1183 // NOTE:
1184 // Once the OpenMPIRBuilder is used to create parallel regions (and
1185 // similar), the cancellation destination (Dest below) is determined via
1186 // IP. That means if we have variables to finalize we split the block at IP,
1187 // use the new block (=BB) as destination to build a JumpDest (via
1188 // getJumpDestInCurrentScope(BB)) which then is fed to
1189 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1190 // to push & pop an FinalizationInfo object.
1191 // The FiniCB will still be needed but at the point where the
1192 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1193 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1194 assert(IP.getBlock()->end() == IP.getPoint() &&
1195 "Clang CG should cause non-terminated block!");
1196 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1197 CGF.Builder.restoreIP(IP);
1199 CGF.getOMPCancelDestination(OMPD_parallel);
1200 CGF.EmitBranchThroughCleanup(Dest);
1201 return llvm::Error::success();
1202 };
1203
1204 // TODO: Remove this once we emit parallel regions through the
1205 // OpenMPIRBuilder as it can do this setup internally.
1206 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1207 OMPBuilder->pushFinalizationCB(std::move(FI));
1208 }
1209 ~PushAndPopStackRAII() {
1210 if (OMPBuilder)
1211 OMPBuilder->popFinalizationCB();
1212 }
1213 llvm::OpenMPIRBuilder *OMPBuilder;
1214};
1215} // namespace
1216
1218 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1219 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1220 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1221 assert(ThreadIDVar->getType()->isPointerType() &&
1222 "thread id variable must be of type kmp_int32 *");
1223 CodeGenFunction CGF(CGM, true);
1224 bool HasCancel = false;
1225 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1226 HasCancel = OPD->hasCancel();
1227 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1228 HasCancel = OPD->hasCancel();
1229 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1230 HasCancel = OPSD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD =
1241 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1242 HasCancel = OPFD->hasCancel();
1243
1244 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1245 // parallel region to make cancellation barriers work properly.
1246 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1247 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1248 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1249 HasCancel, OutlinedHelperName);
1250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1251 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1255 std::string Suffix = getName({"omp_outlined"});
1256 return (Name + Suffix).str();
1257}
1258
1260 return getOutlinedHelperName(CGF.CurFn->getName());
1261}
1262
1263std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1264 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1265 return (Name + Suffix).str();
1266}
1267
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1280 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1281 const RegionCodeGenTy &CodeGen) {
1282 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1284 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1285 CodeGen);
1286}
1287
1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1292 bool Tied, unsigned &NumberOfParts) {
1293 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1294 PrePostActionTy &) {
1295 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1296 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1297 llvm::Value *TaskArgs[] = {
1298 UpLoc, ThreadID,
1299 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1300 TaskTVar->getType()->castAs<PointerType>())
1301 .getPointer(CGF)};
1302 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1303 CGM.getModule(), OMPRTL___kmpc_omp_task),
1304 TaskArgs);
1305 };
1306 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1307 UntiedCodeGen);
1308 CodeGen.setAction(Action);
1309 assert(!ThreadIDVar->getType()->isPointerType() &&
1310 "thread id variable must be of type kmp_int32 for tasks");
1311 const OpenMPDirectiveKind Region =
1312 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1313 : OMPD_task;
1314 const CapturedStmt *CS = D.getCapturedStmt(Region);
1315 bool HasCancel = false;
1316 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1323 HasCancel = TD->hasCancel();
1324
1325 CodeGenFunction CGF(CGM, true);
1326 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1327 InnermostKind, HasCancel, Action);
1328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1329 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1330 if (!Tied)
1331 NumberOfParts = Action.getNumberOfParts();
1332 return Res;
1333}
1334
1336 bool AtCurrentPoint) {
1337 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1338 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1339
1340 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1341 if (AtCurrentPoint) {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1343 CGF.Builder.GetInsertBlock());
1344 } else {
1345 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1346 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1347 }
1348}
1349
1351 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1352 if (Elem.ServiceInsertPt) {
1353 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1354 Elem.ServiceInsertPt = nullptr;
1355 Ptr->eraseFromParent();
1356 }
1357}
1358
1360 SourceLocation Loc,
1361 SmallString<128> &Buffer) {
1362 llvm::raw_svector_ostream OS(Buffer);
1363 // Build debug location
1365 OS << ";";
1366 if (auto *DbgInfo = CGF.getDebugInfo())
1367 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1368 else
1369 OS << PLoc.getFilename();
1370 OS << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1375}
1376
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 std::string FileName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1392 if (auto *DbgInfo = CGF.getDebugInfo())
1393 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1394 else
1395 FileName = PLoc.getFilename();
1396 unsigned Line = PLoc.getLine();
1397 unsigned Column = PLoc.getColumn();
1398 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1399 Column, SrcLocStrSize);
1400 }
1401 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1402 return OMPBuilder.getOrCreateIdent(
1403 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1404}
1405
1407 SourceLocation Loc) {
1408 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1409 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1410 // the clang invariants used below might be broken.
1411 if (CGM.getLangOpts().OpenMPIRBuilder) {
1412 SmallString<128> Buffer;
1413 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1414 uint32_t SrcLocStrSize;
1415 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1416 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1417 return OMPBuilder.getOrCreateThreadID(
1418 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1419 }
1420
1421 llvm::Value *ThreadID = nullptr;
1422 // Check whether we've already cached a load of the thread id in this
1423 // function.
1424 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1425 if (I != OpenMPLocThreadIDMap.end()) {
1426 ThreadID = I->second.ThreadID;
1427 if (ThreadID != nullptr)
1428 return ThreadID;
1429 }
1430 // If exceptions are enabled, do not use parameter to avoid possible crash.
1431 if (auto *OMPRegionInfo =
1432 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1433 if (OMPRegionInfo->getThreadIDVariable()) {
1434 // Check if this an outlined function with thread id passed as argument.
1435 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1436 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1437 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1438 !CGF.getLangOpts().CXXExceptions ||
1439 CGF.Builder.GetInsertBlock() == TopBlock ||
1440 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1441 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1442 TopBlock ||
1443 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1444 CGF.Builder.GetInsertBlock()) {
1445 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1446 // If value loaded in entry block, cache it and use it everywhere in
1447 // function.
1448 if (CGF.Builder.GetInsertBlock() == TopBlock)
1449 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1450 return ThreadID;
1451 }
1452 }
1453 }
1454
1455 // This is not an outlined function region - need to call __kmpc_int32
1456 // kmpc_global_thread_num(ident_t *loc).
1457 // Generate thread id value and cache this value for use across the
1458 // function.
1459 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1460 if (!Elem.ServiceInsertPt)
1462 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1463 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1465 llvm::CallInst *Call = CGF.Builder.CreateCall(
1466 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1467 OMPRTL___kmpc_global_thread_num),
1468 emitUpdateLocation(CGF, Loc));
1469 Call->setCallingConv(CGF.getRuntimeCC());
1470 Elem.ThreadID = Call;
1471 return Call;
1472}
1473
1475 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1476 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1478 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1479 }
1480 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1481 for (const auto *D : I->second)
1482 UDRMap.erase(D);
1483 FunctionUDRMap.erase(I);
1484 }
1485 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1486 for (const auto *D : I->second)
1487 UDMMap.erase(D);
1488 FunctionUDMMap.erase(I);
1489 }
1492}
1493
1495 return OMPBuilder.IdentPtr;
1496}
1497
1498static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1535 break;
1536 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1537 // MT_Local variables don't need offload entry (device-local).
1538 llvm_unreachable("MT_Local should not reach convertCaptureClause");
1539 break;
1540 default:
1541 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1542 break;
1543 }
1544}
1545
1546static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1547 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1548 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1549
1550 auto FileInfoCallBack = [&]() {
1552 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1553
1554 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1555 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1556
1557 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1558 };
1559
1560 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1561 *CGM.getFileSystem(), ParentName);
1562}
1563
1565 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1566
1567 auto LinkageForVariable = [&VD, this]() {
1568 return CGM.getLLVMLinkageVarDefinition(VD);
1569 };
1570
1571 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1572
1573 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1574 CGM.getContext().getPointerType(VD->getType()));
1575 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1577 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1578 VD->isExternallyVisible(),
1580 VD->getCanonicalDecl()->getBeginLoc()),
1581 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1582 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1583 LinkageForVariable);
1584
1585 if (!addr)
1586 return ConstantAddress::invalid();
1587 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1588}
1589
1590llvm::Constant *
1592 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1593 !CGM.getContext().getTargetInfo().isTLSSupported());
1594 // Lookup the entry, lazily creating it if necessary.
1595 std::string Suffix = getName({"cache", ""});
1596 return OMPBuilder.getOrCreateInternalVariable(
1597 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1598}
1599
1601 const VarDecl *VD,
1602 Address VDAddr,
1603 SourceLocation Loc) {
1604 if (CGM.getLangOpts().OpenMPUseTLS &&
1605 CGM.getContext().getTargetInfo().isTLSSupported())
1606 return VDAddr;
1607
1608 llvm::Type *VarTy = VDAddr.getElementType();
1609 llvm::Value *Args[] = {
1610 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1611 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1612 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1614 return Address(
1615 CGF.EmitRuntimeCall(
1616 OMPBuilder.getOrCreateRuntimeFunction(
1617 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1618 Args),
1619 CGF.Int8Ty, VDAddr.getAlignment());
1620}
1621
1623 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1624 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1625 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1626 // library.
1627 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1628 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1629 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1630 OMPLoc);
1631 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1632 // to register constructor/destructor for variable.
1633 llvm::Value *Args[] = {
1634 OMPLoc,
1635 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1636 Ctor, CopyCtor, Dtor};
1637 CGF.EmitRuntimeCall(
1638 OMPBuilder.getOrCreateRuntimeFunction(
1639 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1640 Args);
1641}
1642
1644 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1645 bool PerformInit, CodeGenFunction *CGF) {
1646 if (CGM.getLangOpts().OpenMPUseTLS &&
1647 CGM.getContext().getTargetInfo().isTLSSupported())
1648 return nullptr;
1649
1650 VD = VD->getDefinition(CGM.getContext());
1651 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1652 QualType ASTTy = VD->getType();
1653
1654 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1655 const Expr *Init = VD->getAnyInitializer();
1656 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1657 // Generate function that re-emits the declaration's initializer into the
1658 // threadprivate copy of the variable VD
1659 CodeGenFunction CtorCGF(CGM);
1660 auto *Dst = ImplicitParamDecl::Create(
1661 CGM.getContext(), /*DC=*/nullptr, Loc,
1662 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamKind::Other);
1663
1664 FunctionArgList Args{Dst};
1665 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1666 CGM.getContext().VoidPtrTy, Args);
1667 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1668 std::string Name = getName({"__kmpc_global_ctor_", ""});
1669 llvm::Function *Fn =
1670 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1671 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1672 Args, Loc, Loc);
1673 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1674 CtorCGF.GetAddrOfLocalVar(Dst), /*Volatile=*/false,
1675 CGM.getContext().VoidPtrTy, Dst->getLocation());
1676 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1677 VDAddr.getAlignment());
1678 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1679 /*IsInitializer=*/true);
1680 ArgVal = CtorCGF.EmitLoadOfScalar(
1681 CtorCGF.GetAddrOfLocalVar(Dst), /*Volatile=*/false,
1682 CGM.getContext().VoidPtrTy, Dst->getLocation());
1683 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1684 CtorCGF.FinishFunction();
1685 Ctor = Fn;
1686 }
1688 // Generate function that emits destructor call for the threadprivate copy
1689 // of the variable VD
1690 CodeGenFunction DtorCGF(CGM);
1691 auto *Dst = ImplicitParamDecl::Create(
1692 CGM.getContext(), /*DC=*/nullptr, Loc,
1693 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamKind::Other);
1694
1695 FunctionArgList Args{Dst};
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst->getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 // Copying constructor for the threadprivate variable.
1722 // Must be NULL - reserved by runtime, but currently it requires that this
1723 // parameter is always NULL. Otherwise it fires assertion.
1724 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1725 if (Ctor == nullptr) {
1726 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1727 }
1728 if (Dtor == nullptr) {
1729 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1730 }
1731 if (!CGF) {
1732 auto *InitFunctionTy =
1733 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1734 std::string Name = getName({"__omp_threadprivate_init_", ""});
1735 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1737 CodeGenFunction InitCGF(CGM);
1738 FunctionArgList ArgList;
1739 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1740 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1741 Loc, Loc);
1742 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 InitCGF.FinishFunction();
1744 return InitFunction;
1745 }
1746 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747 }
1748 return nullptr;
1749}
1750
1752 llvm::GlobalValue *GV) {
1753 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1755
1756 // We only need to handle active 'indirect' declare target functions.
1757 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758 return;
1759
1760 // Get a mangled name to store the new device global in.
1761 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1763 SmallString<128> Name;
1764 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765
1766 // We need to generate a new global to hold the address of the indirectly
1767 // called device function. Doing this allows us to keep the visibility and
1768 // linkage of the associated function unchanged while allowing the runtime to
1769 // access its value.
1770 llvm::GlobalValue *Addr = GV;
1771 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1773 CGM.getLLVMContext(),
1774 CGM.getModule().getDataLayout().getProgramAddressSpace());
1775 Addr = new llvm::GlobalVariable(
1776 CGM.getModule(), FnPtrTy,
1777 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1778 nullptr, llvm::GlobalValue::NotThreadLocal,
1779 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1780 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1781 }
1782
1783 // Register the indirect Vtable:
1784 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1785 // size field refers to the size of memory pointed to, not the size of
1786 // the pointer symbol itself (which is implicitly the size of a pointer).
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1789 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1794 const VarDecl *VD) {
1795 // TODO: add logic to avoid duplicate vtable registrations per
1796 // translation unit; though for external linkage, this should no
1797 // longer be an issue - or at least we can avoid the issue by
1798 // checking for an existing offloading entry. But, perhaps the
1799 // better approach is to defer emission of the vtables and offload
1800 // entries until later (by tracking a list of items that need to be
1801 // emitted).
1802
1803 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1804
1805 // Generate a new externally visible global to point to the
1806 // internally visible vtable. Doing this allows us to keep the
1807 // visibility and linkage of the associated vtable unchanged while
1808 // allowing the runtime to access its value. The externally
1809 // visible global var needs to be emitted with a unique mangled
1810 // name that won't conflict with similarly named (internal)
1811 // vtables in other translation units.
1812
1813 // Register vtable with source location of dynamic object in map
1814 // clause.
1815 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1817 VTable->getName());
1818
1819 llvm::GlobalVariable *Addr = VTable;
1820 SmallString<128> AddrName;
1821 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1822 AddrName.append("addr");
1823
1824 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1825 Addr = new llvm::GlobalVariable(
1826 CGM.getModule(), VTable->getType(),
1827 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1828 AddrName,
1829 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1830 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1831 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1832 }
1833 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1834 AddrName, VTable,
1835 CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1836 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1837 llvm::GlobalValue::WeakODRLinkage);
1838}
1839
1842 const VarDecl *VD) {
1843 // Register C++ VTable to OpenMP Offload Entry if it's a new
1844 // CXXRecordDecl.
1845 if (CXXRecord && CXXRecord->isDynamicClass() &&
1846 !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1847 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1848 if (Res.second) {
1849 CGM.EmitVTable(CXXRecord);
1850 CodeGenVTables VTables = CGM.getVTables();
1851 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1852 assert(VTablesAddr && "Expected non-null VTable address");
1853 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1854 // Emit VTable for all the fields containing dynamic CXXRecord
1855 for (const FieldDecl *Field : CXXRecord->fields()) {
1856 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1858 }
1859 // Emit VTable for all dynamic parent class
1860 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1861 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1862 emitAndRegisterVTable(CGM, BaseDecl, VD);
1863 }
1864 }
1865 }
1866}
1867
1869 // Register VTable by scanning through the map clause of OpenMP target region.
1870 // Get CXXRecordDecl and VarDecl from Expr.
1871 auto GetVTableDecl = [](const Expr *E) {
1872 QualType VDTy = E->getType();
1873 CXXRecordDecl *CXXRecord = nullptr;
1874 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1875 VDTy = RefType->getPointeeType();
1876 if (VDTy->isPointerType())
1878 else
1879 CXXRecord = VDTy->getAsCXXRecordDecl();
1880
1881 const VarDecl *VD = nullptr;
1882 if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
1883 VD = cast<VarDecl>(DRE->getDecl());
1884 } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
1885 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
1886 if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
1887 VD = BaseVD;
1888 }
1889 }
1890 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1891 };
1892 // Collect VTable from OpenMP map clause.
1893 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1894 for (const auto *E : C->varlist()) {
1895 auto DeclPair = GetVTableDecl(E);
1896 // Ensure VD is not null
1897 if (DeclPair.second)
1898 emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1899 }
1900 }
1901}
1902
1904 QualType VarType,
1905 StringRef Name) {
1906 std::string Suffix = getName({"artificial", ""});
1907 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1908 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1909 VarLVType, Twine(Name).concat(Suffix).str());
1910 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1911 CGM.getTarget().isTLSSupported()) {
1912 GAddr->setThreadLocal(/*Val=*/true);
1913 return Address(GAddr, GAddr->getValueType(),
1914 CGM.getContext().getTypeAlignInChars(VarType));
1915 }
1916 std::string CacheSuffix = getName({"cache", ""});
1917 llvm::Value *Args[] = {
1920 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1921 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1922 /*isSigned=*/false),
1923 OMPBuilder.getOrCreateInternalVariable(
1924 CGM.VoidPtrPtrTy,
1925 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1926 return Address(
1928 CGF.EmitRuntimeCall(
1929 OMPBuilder.getOrCreateRuntimeFunction(
1930 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1931 Args),
1932 CGF.Builder.getPtrTy(0)),
1933 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1934}
1935
1937 const RegionCodeGenTy &ThenGen,
1938 const RegionCodeGenTy &ElseGen) {
1939 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1940
1941 // If the condition constant folds and can be elided, try to avoid emitting
1942 // the condition and the dead arm of the if/else.
1943 bool CondConstant;
1944 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1945 if (CondConstant)
1946 ThenGen(CGF);
1947 else
1948 ElseGen(CGF);
1949 return;
1950 }
1951
1952 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1953 // emit the conditional branch.
1954 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1955 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1956 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1957 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1958
1959 // Emit the 'then' code.
1960 CGF.EmitBlock(ThenBlock);
1961 ThenGen(CGF);
1962 CGF.EmitBranch(ContBlock);
1963 // Emit the 'else' code if present.
1964 // There is no need to emit line number for unconditional branch.
1966 CGF.EmitBlock(ElseBlock);
1967 ElseGen(CGF);
1968 // There is no need to emit line number for unconditional branch.
1970 CGF.EmitBranch(ContBlock);
1971 // Emit the continuation block for code after the if.
1972 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1973}
1974
1976 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1977 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1978 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1979 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1980 if (!CGF.HaveInsertPoint())
1981 return;
1982 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1983 auto &M = CGM.getModule();
1984 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1985 this](CodeGenFunction &CGF, PrePostActionTy &) {
1986 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1987 llvm::Value *Args[] = {
1988 RTLoc,
1989 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1990 OutlinedFn};
1992 RealArgs.append(std::begin(Args), std::end(Args));
1993 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1994
1995 llvm::FunctionCallee RTLFn =
1996 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1997 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1998 };
1999 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2000 this](CodeGenFunction &CGF, PrePostActionTy &) {
2002 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2003 // Build calls:
2004 // __kmpc_serialized_parallel(&Loc, GTid);
2005 llvm::Value *Args[] = {RTLoc, ThreadID};
2006 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2007 M, OMPRTL___kmpc_serialized_parallel),
2008 Args);
2009
2010 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2011 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2012 RawAddress ZeroAddrBound =
2014 /*Name=*/".bound.zero.addr");
2015 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2017 // ThreadId for serialized parallels is 0.
2018 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
2019 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2020 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2021
2022 // Ensure we do not inline the function. This is trivially true for the ones
2023 // passed to __kmpc_fork_call but the ones called in serialized regions
2024 // could be inlined. This is not a perfect but it is closer to the invariant
2025 // we want, namely, every data environment starts with a new function.
2026 // TODO: We should pass the if condition to the runtime function and do the
2027 // handling there. Much cleaner code.
2028 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2029 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2031
2032 // __kmpc_end_serialized_parallel(&Loc, GTid);
2033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2034 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2035 M, OMPRTL___kmpc_end_serialized_parallel),
2036 EndArgs);
2037 };
2038 if (IfCond) {
2039 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2040 } else {
2041 RegionCodeGenTy ThenRCG(ThenGen);
2042 ThenRCG(CGF);
2043 }
2044}
2045
2046// If we're inside an (outlined) parallel region, use the region info's
2047// thread-ID variable (it is passed in a first argument of the outlined function
2048// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2049// regular serial code region, get thread ID by calling kmp_int32
2050// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2051// return the address of that temp.
2053 SourceLocation Loc) {
2054 if (auto *OMPRegionInfo =
2055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2056 if (OMPRegionInfo->getThreadIDVariable())
2057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2058
2059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2060 QualType Int32Ty =
2061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2062 Address ThreadIDTemp =
2063 CGF.CreateMemTempWithoutCast(Int32Ty, /*Name*/ ".threadid_temp.");
2064 CGF.EmitStoreOfScalar(ThreadID,
2065 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2066
2067 return ThreadIDTemp;
2068}
2069
2070llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2071 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2072 std::string Name = getName({Prefix, "var"});
2073 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2074}
2075
2076namespace {
2077/// Common pre(post)-action for different OpenMP constructs.
2078class CommonActionTy final : public PrePostActionTy {
2079 llvm::FunctionCallee EnterCallee;
2080 ArrayRef<llvm::Value *> EnterArgs;
2081 llvm::FunctionCallee ExitCallee;
2082 ArrayRef<llvm::Value *> ExitArgs;
2083 bool Conditional;
2084 llvm::BasicBlock *ContBlock = nullptr;
2085
2086public:
2087 CommonActionTy(llvm::FunctionCallee EnterCallee,
2088 ArrayRef<llvm::Value *> EnterArgs,
2089 llvm::FunctionCallee ExitCallee,
2090 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2091 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2092 ExitArgs(ExitArgs), Conditional(Conditional) {}
2093 void Enter(CodeGenFunction &CGF) override {
2094 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2095 if (Conditional) {
2096 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2097 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2098 ContBlock = CGF.createBasicBlock("omp_if.end");
2099 // Generate the branch (If-stmt)
2100 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2101 CGF.EmitBlock(ThenBlock);
2102 }
2103 }
2104 void Done(CodeGenFunction &CGF) {
2105 // Emit the rest of blocks/branches
2106 CGF.EmitBranch(ContBlock);
2107 CGF.EmitBlock(ContBlock, true);
2108 }
2109 void Exit(CodeGenFunction &CGF) override {
2110 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2111 }
2112};
2113} // anonymous namespace
2114
2116 StringRef CriticalName,
2117 const RegionCodeGenTy &CriticalOpGen,
2118 SourceLocation Loc, const Expr *Hint) {
2119 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2120 // CriticalOpGen();
2121 // __kmpc_end_critical(ident_t *, gtid, Lock);
2122 // Prepare arguments and build a call to __kmpc_critical
2123 if (!CGF.HaveInsertPoint())
2124 return;
2125 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2126 CGM.getModule(),
2127 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2128 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2129 unsigned LockVarArgIdx = 2;
2130 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2131 RuntimeFcn.getFunctionType()
2132 ->getParamType(LockVarArgIdx)
2133 ->getPointerAddressSpace())
2134 LockVar = CGF.Builder.CreateAddrSpaceCast(
2135 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2136 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2137 LockVar};
2138 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2139 std::end(Args));
2140 if (Hint) {
2141 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2142 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2143 }
2144 CommonActionTy Action(RuntimeFcn, EnterArgs,
2145 OMPBuilder.getOrCreateRuntimeFunction(
2146 CGM.getModule(), OMPRTL___kmpc_end_critical),
2147 Args);
2148 CriticalOpGen.setAction(Action);
2149 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2150}
2151
2153 const RegionCodeGenTy &MasterOpGen,
2154 SourceLocation Loc) {
2155 if (!CGF.HaveInsertPoint())
2156 return;
2157 // if(__kmpc_master(ident_t *, gtid)) {
2158 // MasterOpGen();
2159 // __kmpc_end_master(ident_t *, gtid);
2160 // }
2161 // Prepare arguments and build a call to __kmpc_master
2162 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2163 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2164 CGM.getModule(), OMPRTL___kmpc_master),
2165 Args,
2166 OMPBuilder.getOrCreateRuntimeFunction(
2167 CGM.getModule(), OMPRTL___kmpc_end_master),
2168 Args,
2169 /*Conditional=*/true);
2170 MasterOpGen.setAction(Action);
2171 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2172 Action.Done(CGF);
2173}
2174
2176 const RegionCodeGenTy &MaskedOpGen,
2177 SourceLocation Loc, const Expr *Filter) {
2178 if (!CGF.HaveInsertPoint())
2179 return;
2180 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2181 // MaskedOpGen();
2182 // __kmpc_end_masked(iden_t *, gtid);
2183 // }
2184 // Prepare arguments and build a call to __kmpc_masked
2185 llvm::Value *FilterVal = Filter
2186 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2187 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2188 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2189 FilterVal};
2190 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2191 getThreadID(CGF, Loc)};
2192 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2193 CGM.getModule(), OMPRTL___kmpc_masked),
2194 Args,
2195 OMPBuilder.getOrCreateRuntimeFunction(
2196 CGM.getModule(), OMPRTL___kmpc_end_masked),
2197 ArgsEnd,
2198 /*Conditional=*/true);
2199 MaskedOpGen.setAction(Action);
2200 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2201 Action.Done(CGF);
2202}
2203
2205 SourceLocation Loc) {
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2209 OMPBuilder.createTaskyield(CGF.Builder);
2210 } else {
2211 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2212 llvm::Value *Args[] = {
2213 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2214 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2215 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2216 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2217 Args);
2218 }
2219
2220 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2221 Region->emitUntiedSwitch(CGF);
2222}
2223
2225 const RegionCodeGenTy &TaskgroupOpGen,
2226 SourceLocation Loc) {
2227 if (!CGF.HaveInsertPoint())
2228 return;
2229 // __kmpc_taskgroup(ident_t *, gtid);
2230 // TaskgroupOpGen();
2231 // __kmpc_end_taskgroup(ident_t *, gtid);
2232 // Prepare arguments and build a call to __kmpc_taskgroup
2233 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2234 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2235 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2236 Args,
2237 OMPBuilder.getOrCreateRuntimeFunction(
2238 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2239 Args);
2240 TaskgroupOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2242}
2243
2244/// Given an array of pointers to variables, project the address of a
2245/// given variable.
2247 unsigned Index, const VarDecl *Var) {
2248 // Pull out the pointer to the variable.
2249 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2250 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2251
2252 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2253 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2254}
2255
2257 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2258 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2259 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2260 SourceLocation Loc) {
2261 ASTContext &C = CGM.getContext();
2262 // void copy_func(void *LHSArg, void *RHSArg);
2263
2264 auto *LHSArg =
2265 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2266 C.VoidPtrTy, ImplicitParamKind::Other);
2267 auto *RHSArg =
2268 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2269 C.VoidPtrTy, ImplicitParamKind::Other);
2270 FunctionArgList Args{LHSArg, RHSArg};
2271 const auto &CGFI =
2272 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2273 std::string Name =
2274 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2275 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2276 llvm::GlobalValue::InternalLinkage, Name,
2277 &CGM.getModule());
2279 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2280 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
2281 Fn->setDoesNotRecurse();
2282 CodeGenFunction CGF(CGM);
2283 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2284 // Dest = (void*[n])(LHSArg);
2285 // Src = (void*[n])(RHSArg);
2287 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(LHSArg)),
2288 CGF.Builder.getPtrTy(0)),
2289 ArgsElemType, CGF.getPointerAlign());
2291 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(RHSArg)),
2292 CGF.Builder.getPtrTy(0)),
2293 ArgsElemType, CGF.getPointerAlign());
2294 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2295 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2296 // ...
2297 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2298 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2299 const auto *DestVar =
2300 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2301 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2302
2303 const auto *SrcVar =
2304 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2305 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2306
2307 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2308 QualType Type = VD->getType();
2309 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2310 }
2311 CGF.FinishFunction();
2312 return Fn;
2313}
2314
2316 const RegionCodeGenTy &SingleOpGen,
2317 SourceLocation Loc,
2318 ArrayRef<const Expr *> CopyprivateVars,
2319 ArrayRef<const Expr *> SrcExprs,
2320 ArrayRef<const Expr *> DstExprs,
2321 ArrayRef<const Expr *> AssignmentOps) {
2322 if (!CGF.HaveInsertPoint())
2323 return;
2324 assert(CopyprivateVars.size() == SrcExprs.size() &&
2325 CopyprivateVars.size() == DstExprs.size() &&
2326 CopyprivateVars.size() == AssignmentOps.size());
2327 ASTContext &C = CGM.getContext();
2328 // int32 did_it = 0;
2329 // if(__kmpc_single(ident_t *, gtid)) {
2330 // SingleOpGen();
2331 // __kmpc_end_single(ident_t *, gtid);
2332 // did_it = 1;
2333 // }
2334 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2335 // <copy_func>, did_it);
2336
2337 Address DidIt = Address::invalid();
2338 if (!CopyprivateVars.empty()) {
2339 // int32 did_it = 0;
2340 QualType KmpInt32Ty =
2341 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2342 DidIt = CGF.CreateMemTempWithoutCast(KmpInt32Ty, ".omp.copyprivate.did_it");
2343 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2344 }
2345 // Prepare arguments and build a call to __kmpc_single
2346 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2347 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2348 CGM.getModule(), OMPRTL___kmpc_single),
2349 Args,
2350 OMPBuilder.getOrCreateRuntimeFunction(
2351 CGM.getModule(), OMPRTL___kmpc_end_single),
2352 Args,
2353 /*Conditional=*/true);
2354 SingleOpGen.setAction(Action);
2355 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2356 if (DidIt.isValid()) {
2357 // did_it = 1;
2358 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2359 }
2360 Action.Done(CGF);
2361 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2362 // <copy_func>, did_it);
2363 if (DidIt.isValid()) {
2364 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2365 QualType CopyprivateArrayTy = C.getConstantArrayType(
2366 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2367 /*IndexTypeQuals=*/0);
2368 // Create a list of all private variables for copyprivate.
2369 Address CopyprivateList = CGF.CreateMemTempWithoutCast(
2370 CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2371 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2372 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2373 CGF.Builder.CreateStore(
2375 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2376 CGF.VoidPtrTy),
2377 Elem);
2378 }
2379 // Build function that copies private values from single region to all other
2380 // threads in the corresponding parallel region.
2381 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2382 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2383 SrcExprs, DstExprs, AssignmentOps, Loc);
2384 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2386 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2387 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2388 llvm::Value *Args[] = {
2389 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2390 getThreadID(CGF, Loc), // i32 <gtid>
2391 BufSize, // size_t <buf_size>
2392 CL.emitRawPointer(CGF), // void *<copyprivate list>
2393 CpyFn, // void (*) (void *, void *) <copy_func>
2394 DidItVal // i32 did_it
2395 };
2396 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2397 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2398 Args);
2399 }
2400}
2401
2403 const RegionCodeGenTy &OrderedOpGen,
2404 SourceLocation Loc, bool IsThreads) {
2405 if (!CGF.HaveInsertPoint())
2406 return;
2407 // __kmpc_ordered(ident_t *, gtid);
2408 // OrderedOpGen();
2409 // __kmpc_end_ordered(ident_t *, gtid);
2410 // Prepare arguments and build a call to __kmpc_ordered
2411 if (IsThreads) {
2412 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2413 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2414 CGM.getModule(), OMPRTL___kmpc_ordered),
2415 Args,
2416 OMPBuilder.getOrCreateRuntimeFunction(
2417 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2418 Args);
2419 OrderedOpGen.setAction(Action);
2420 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2421 return;
2422 }
2423 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2424}
2425
2427 unsigned Flags;
2428 if (Kind == OMPD_for)
2429 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2430 else if (Kind == OMPD_sections)
2431 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2432 else if (Kind == OMPD_single)
2433 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2434 else if (Kind == OMPD_barrier)
2435 Flags = OMP_IDENT_BARRIER_EXPL;
2436 else
2437 Flags = OMP_IDENT_BARRIER_IMPL;
2438 return Flags;
2439}
2440
2442 CodeGenFunction &CGF, const OMPLoopDirective &S,
2443 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2444 // Check if the loop directive is actually a doacross loop directive. In this
2445 // case choose static, 1 schedule.
2446 if (llvm::any_of(
2447 S.getClausesOfKind<OMPOrderedClause>(),
2448 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2449 ScheduleKind = OMPC_SCHEDULE_static;
2450 // Chunk size is 1 in this case.
2451 llvm::APInt ChunkSize(32, 1);
2452 ChunkExpr = IntegerLiteral::Create(
2453 CGF.getContext(), ChunkSize,
2454 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2455 SourceLocation());
2456 }
2457}
2458
2460 OpenMPDirectiveKind Kind, bool EmitChecks,
2461 bool ForceSimpleCall) {
2462 // Check if we should use the OMPBuilder
2463 auto *OMPRegionInfo =
2464 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2465 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2466 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2467 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2468 EmitChecks));
2469 CGF.Builder.restoreIP(AfterIP);
2470 return;
2471 }
2472
2473 if (!CGF.HaveInsertPoint())
2474 return;
2475 // Build call __kmpc_cancel_barrier(loc, thread_id);
2476 // Build call __kmpc_barrier(loc, thread_id);
2477 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2478 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2479 // thread_id);
2480 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2481 getThreadID(CGF, Loc)};
2482 if (OMPRegionInfo) {
2483 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2484 llvm::Value *Result = CGF.EmitRuntimeCall(
2485 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2486 OMPRTL___kmpc_cancel_barrier),
2487 Args);
2488 if (EmitChecks) {
2489 // if (__kmpc_cancel_barrier()) {
2490 // exit from construct;
2491 // }
2492 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2493 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2494 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2495 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2496 CGF.EmitBlock(ExitBB);
2497 // exit from construct;
2498 CodeGenFunction::JumpDest CancelDestination =
2499 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2500 CGF.EmitBranchThroughCleanup(CancelDestination);
2501 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2502 }
2503 return;
2504 }
2505 }
2506 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507 CGM.getModule(), OMPRTL___kmpc_barrier),
2508 Args);
2509}
2510
2512 Expr *ME, bool IsFatal) {
2513 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2514 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2515 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2516 // *message)
2517 llvm::Value *Args[] = {
2518 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2519 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2520 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2521 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2522 CGM.getModule(), OMPRTL___kmpc_error),
2523 Args);
2524}
2525
2526/// Map the OpenMP loop schedule to the runtime enumeration.
2527static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2528 bool Chunked, bool Ordered) {
2529 switch (ScheduleKind) {
2530 case OMPC_SCHEDULE_static:
2531 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2532 : (Ordered ? OMP_ord_static : OMP_sch_static);
2533 case OMPC_SCHEDULE_dynamic:
2534 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2535 case OMPC_SCHEDULE_guided:
2536 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2537 case OMPC_SCHEDULE_runtime:
2538 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2539 case OMPC_SCHEDULE_auto:
2540 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2542 assert(!Chunked && "chunk was specified but schedule kind not known");
2543 return Ordered ? OMP_ord_static : OMP_sch_static;
2544 }
2545 llvm_unreachable("Unexpected runtime schedule");
2546}
2547
2548/// Map the OpenMP distribute schedule to the runtime enumeration.
2549static OpenMPSchedType
2551 // only static is allowed for dist_schedule
2552 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2553}
2554
2556 bool Chunked) const {
2557 OpenMPSchedType Schedule =
2558 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2559 return Schedule == OMP_sch_static;
2560}
2561
2563 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2564 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2565 return Schedule == OMP_dist_sch_static;
2566}
2567
2569 bool Chunked) const {
2570 OpenMPSchedType Schedule =
2571 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2572 return Schedule == OMP_sch_static_chunked;
2573}
2574
2576 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2577 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2578 return Schedule == OMP_dist_sch_static_chunked;
2579}
2580
2582 OpenMPSchedType Schedule =
2583 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2584 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2585 return Schedule != OMP_sch_static;
2586}
2587
2588static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2591 int Modifier = 0;
2592 switch (M1) {
2593 case OMPC_SCHEDULE_MODIFIER_monotonic:
2594 Modifier = OMP_sch_modifier_monotonic;
2595 break;
2596 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2597 Modifier = OMP_sch_modifier_nonmonotonic;
2598 break;
2599 case OMPC_SCHEDULE_MODIFIER_simd:
2600 if (Schedule == OMP_sch_static_chunked)
2601 Schedule = OMP_sch_static_balanced_chunked;
2602 break;
2605 break;
2606 }
2607 switch (M2) {
2608 case OMPC_SCHEDULE_MODIFIER_monotonic:
2609 Modifier = OMP_sch_modifier_monotonic;
2610 break;
2611 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2612 Modifier = OMP_sch_modifier_nonmonotonic;
2613 break;
2614 case OMPC_SCHEDULE_MODIFIER_simd:
2615 if (Schedule == OMP_sch_static_chunked)
2616 Schedule = OMP_sch_static_balanced_chunked;
2617 break;
2620 break;
2621 }
2622 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2623 // If the static schedule kind is specified or if the ordered clause is
2624 // specified, and if the nonmonotonic modifier is not specified, the effect is
2625 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2626 // modifier is specified, the effect is as if the nonmonotonic modifier is
2627 // specified.
2628 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2629 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2630 Schedule == OMP_sch_static_balanced_chunked ||
2631 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2632 Schedule == OMP_dist_sch_static_chunked ||
2633 Schedule == OMP_dist_sch_static))
2634 Modifier = OMP_sch_modifier_nonmonotonic;
2635 }
2636 return Schedule | Modifier;
2637}
2638
2641 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2642 bool Ordered, const DispatchRTInput &DispatchValues) {
2643 if (!CGF.HaveInsertPoint())
2644 return;
2645 OpenMPSchedType Schedule = getRuntimeSchedule(
2646 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2647 assert(Ordered ||
2648 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2649 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2650 Schedule != OMP_sch_static_balanced_chunked));
2651 // Call __kmpc_dispatch_init(
2652 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2653 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2654 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2655
2656 // If the Chunk was not specified in the clause - use default value 1.
2657 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2658 : CGF.Builder.getIntN(IVSize, 1);
2659 llvm::Value *Args[] = {
2660 emitUpdateLocation(CGF, Loc),
2661 getThreadID(CGF, Loc),
2662 CGF.Builder.getInt32(addMonoNonMonoModifier(
2663 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2664 DispatchValues.LB, // Lower
2665 DispatchValues.UB, // Upper
2666 CGF.Builder.getIntN(IVSize, 1), // Stride
2667 Chunk // Chunk
2668 };
2669 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2670 Args);
2671}
2672
2674 SourceLocation Loc) {
2675 if (!CGF.HaveInsertPoint())
2676 return;
2677 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2678 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2679 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2680}
2681
2683 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2684 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2686 const CGOpenMPRuntime::StaticRTInput &Values) {
2687 if (!CGF.HaveInsertPoint())
2688 return;
2689
2690 assert(!Values.Ordered);
2691 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2692 Schedule == OMP_sch_static_balanced_chunked ||
2693 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2694 Schedule == OMP_dist_sch_static ||
2695 Schedule == OMP_dist_sch_static_chunked);
2696
2697 // Call __kmpc_for_static_init(
2698 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2699 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2700 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2701 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2702 llvm::Value *Chunk = Values.Chunk;
2703 if (Chunk == nullptr) {
2704 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2705 Schedule == OMP_dist_sch_static) &&
2706 "expected static non-chunked schedule");
2707 // If the Chunk was not specified in the clause - use default value 1.
2708 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2709 } else {
2710 assert((Schedule == OMP_sch_static_chunked ||
2711 Schedule == OMP_sch_static_balanced_chunked ||
2712 Schedule == OMP_ord_static_chunked ||
2713 Schedule == OMP_dist_sch_static_chunked) &&
2714 "expected static chunked schedule");
2715 }
2716 llvm::Value *Args[] = {
2717 UpdateLocation,
2718 ThreadId,
2719 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2720 M2)), // Schedule type
2721 Values.IL.emitRawPointer(CGF), // &isLastIter
2722 Values.LB.emitRawPointer(CGF), // &LB
2723 Values.UB.emitRawPointer(CGF), // &UB
2724 Values.ST.emitRawPointer(CGF), // &Stride
2725 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2726 Chunk // Chunk
2727 };
2728 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2729}
2730
2732 SourceLocation Loc,
2733 OpenMPDirectiveKind DKind,
2734 const OpenMPScheduleTy &ScheduleKind,
2735 const StaticRTInput &Values) {
2736 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2737 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2738 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2739 "Expected loop-based or sections-based directive.");
2740 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2742 ? OMP_IDENT_WORK_LOOP
2743 : OMP_IDENT_WORK_SECTIONS);
2744 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2745 llvm::FunctionCallee StaticInitFunction =
2746 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2747 false);
2749 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2750 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2751}
2752
2756 const CGOpenMPRuntime::StaticRTInput &Values) {
2757 OpenMPSchedType ScheduleNum =
2758 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2759 llvm::Value *UpdatedLocation =
2760 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2761 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2762 llvm::FunctionCallee StaticInitFunction;
2763 bool isGPUDistribute =
2764 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2765 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2766 Values.IVSize, Values.IVSigned, isGPUDistribute);
2767
2768 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2769 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2771}
2772
2774 SourceLocation Loc,
2775 OpenMPDirectiveKind DKind) {
2776 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2777 DKind == OMPD_sections) &&
2778 "Expected distribute, for, or sections directive kind");
2779 if (!CGF.HaveInsertPoint())
2780 return;
2781 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2782 llvm::Value *Args[] = {
2783 emitUpdateLocation(CGF, Loc,
2785 (DKind == OMPD_target_teams_loop)
2786 ? OMP_IDENT_WORK_DISTRIBUTE
2787 : isOpenMPLoopDirective(DKind)
2788 ? OMP_IDENT_WORK_LOOP
2789 : OMP_IDENT_WORK_SECTIONS),
2790 getThreadID(CGF, Loc)};
2792 if (isOpenMPDistributeDirective(DKind) &&
2793 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2794 CGF.EmitRuntimeCall(
2795 OMPBuilder.getOrCreateRuntimeFunction(
2796 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2797 Args);
2798 else
2799 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2800 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2801 Args);
2802}
2803
2805 SourceLocation Loc,
2806 unsigned IVSize,
2807 bool IVSigned) {
2808 if (!CGF.HaveInsertPoint())
2809 return;
2810 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2811 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2812 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2813 Args);
2814}
2815
2817 SourceLocation Loc, unsigned IVSize,
2818 bool IVSigned, Address IL,
2819 Address LB, Address UB,
2820 Address ST) {
2821 // Call __kmpc_dispatch_next(
2822 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2823 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2824 // kmp_int[32|64] *p_stride);
2825 llvm::Value *Args[] = {
2826 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2827 IL.emitRawPointer(CGF), // &isLastIter
2828 LB.emitRawPointer(CGF), // &Lower
2829 UB.emitRawPointer(CGF), // &Upper
2830 ST.emitRawPointer(CGF) // &Stride
2831 };
2832 llvm::Value *Call = CGF.EmitRuntimeCall(
2833 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2834 return CGF.EmitScalarConversion(
2835 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2836 CGF.getContext().BoolTy, Loc);
2837}
2838
2840 const Expr *Message,
2841 SourceLocation Loc) {
2842 if (!Message)
2843 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2844 return CGF.EmitScalarExpr(Message);
2845}
2846
2847llvm::Value *
2849 SourceLocation Loc) {
2850 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2851 // as if sev-level is fatal."
2852 return llvm::ConstantInt::get(CGM.Int32Ty,
2853 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2854}
2855
2857 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2859 SourceLocation SeverityLoc, const Expr *Message,
2860 SourceLocation MessageLoc) {
2861 if (!CGF.HaveInsertPoint())
2862 return;
2864 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2865 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2866 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2867 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2868 // messsage) if strict modifier is used.
2869 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2870 if (Modifier == OMPC_NUMTHREADS_strict) {
2871 FnID = OMPRTL___kmpc_push_num_threads_strict;
2872 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2873 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2874 }
2875 CGF.EmitRuntimeCall(
2876 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2877}
2878
2880 ProcBindKind ProcBind,
2881 SourceLocation Loc) {
2882 if (!CGF.HaveInsertPoint())
2883 return;
2884 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2885 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2886 llvm::Value *Args[] = {
2887 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2888 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2889 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2890 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2891 Args);
2892}
2893
2895 SourceLocation Loc, llvm::AtomicOrdering AO) {
2896 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2897 OMPBuilder.createFlush(CGF.Builder);
2898 } else {
2899 if (!CGF.HaveInsertPoint())
2900 return;
2901 // Build call void __kmpc_flush(ident_t *loc)
2902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2903 CGM.getModule(), OMPRTL___kmpc_flush),
2904 emitUpdateLocation(CGF, Loc));
2905 }
2906}
2907
2908namespace {
2909/// Indexes of fields for type kmp_task_t.
2910enum KmpTaskTFields {
2911 /// List of shared variables.
2912 KmpTaskTShareds,
2913 /// Task routine.
2914 KmpTaskTRoutine,
2915 /// Partition id for the untied tasks.
2916 KmpTaskTPartId,
2917 /// Function with call of destructors for private variables.
2918 Data1,
2919 /// Task priority.
2920 Data2,
2921 /// (Taskloops only) Lower bound.
2922 KmpTaskTLowerBound,
2923 /// (Taskloops only) Upper bound.
2924 KmpTaskTUpperBound,
2925 /// (Taskloops only) Stride.
2926 KmpTaskTStride,
2927 /// (Taskloops only) Is last iteration flag.
2928 KmpTaskTLastIter,
2929 /// (Taskloops only) Reduction data.
2930 KmpTaskTReductions,
2931};
2932} // anonymous namespace
2933
2935 // If we are in simd mode or there are no entries, we don't need to do
2936 // anything.
2937 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2938 return;
2939
2940 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2941 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2942 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2943 SourceLocation Loc;
2944 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2945 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2946 E = CGM.getContext().getSourceManager().fileinfo_end();
2947 I != E; ++I) {
2948 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2949 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2950 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2951 I->getFirst(), EntryInfo.Line, 1);
2952 break;
2953 }
2954 }
2955 }
2956 switch (Kind) {
2957 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2958 CGM.getDiags().Report(Loc,
2959 diag::err_target_region_offloading_entry_incorrect)
2960 << EntryInfo.ParentName;
2961 } break;
2962 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2963 CGM.getDiags().Report(
2964 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2965 << EntryInfo.ParentName;
2966 } break;
2967 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2968 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2969 } break;
2970 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2971 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2972 DiagnosticsEngine::Error, "Offloading entry for indirect declare "
2973 "target variable is incorrect: the "
2974 "address is invalid.");
2975 CGM.getDiags().Report(DiagID);
2976 } break;
2977 }
2978 };
2979
2980 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2981}
2982
2984 if (!KmpRoutineEntryPtrTy) {
2985 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2986 ASTContext &C = CGM.getContext();
2987 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2989 KmpRoutineEntryPtrQTy = C.getPointerType(
2990 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2991 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2992 }
2993}
2994
2995namespace {
2996struct PrivateHelpersTy {
2997 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2998 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2999 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3000 PrivateElemInit(PrivateElemInit) {}
3001 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3002 const Expr *OriginalRef = nullptr;
3003 const VarDecl *Original = nullptr;
3004 const VarDecl *PrivateCopy = nullptr;
3005 const VarDecl *PrivateElemInit = nullptr;
3006 bool isLocalPrivate() const {
3007 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3008 }
3009};
3010typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3011} // anonymous namespace
3012
3013static bool isAllocatableDecl(const VarDecl *VD) {
3014 const VarDecl *CVD = VD->getCanonicalDecl();
3015 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3016 return false;
3017 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3018 // Use the default allocation.
3019 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3020 !AA->getAllocator());
3021}
3022
3023static RecordDecl *
3025 if (!Privates.empty()) {
3026 ASTContext &C = CGM.getContext();
3027 // Build struct .kmp_privates_t. {
3028 // /* private vars */
3029 // };
3030 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3031 RD->startDefinition();
3032 for (const auto &Pair : Privates) {
3033 const VarDecl *VD = Pair.second.Original;
3035 // If the private variable is a local variable with lvalue ref type,
3036 // allocate the pointer instead of the pointee type.
3037 if (Pair.second.isLocalPrivate()) {
3038 if (VD->getType()->isLValueReferenceType())
3039 Type = C.getPointerType(Type);
3040 if (isAllocatableDecl(VD))
3041 Type = C.getPointerType(Type);
3042 }
3044 if (VD->hasAttrs()) {
3045 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3046 E(VD->getAttrs().end());
3047 I != E; ++I)
3048 FD->addAttr(*I);
3049 }
3050 }
3051 RD->completeDefinition();
3052 return RD;
3053 }
3054 return nullptr;
3055}
3056
3057static RecordDecl *
3059 QualType KmpInt32Ty,
3060 QualType KmpRoutineEntryPointerQTy) {
3061 ASTContext &C = CGM.getContext();
3062 // Build struct kmp_task_t {
3063 // void * shareds;
3064 // kmp_routine_entry_t routine;
3065 // kmp_int32 part_id;
3066 // kmp_cmplrdata_t data1;
3067 // kmp_cmplrdata_t data2;
3068 // For taskloops additional fields:
3069 // kmp_uint64 lb;
3070 // kmp_uint64 ub;
3071 // kmp_int64 st;
3072 // kmp_int32 liter;
3073 // void * reductions;
3074 // };
3075 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
3076 UD->startDefinition();
3077 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3078 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3079 UD->completeDefinition();
3080 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
3081 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3082 RD->startDefinition();
3083 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3084 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3085 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3086 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3087 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3088 if (isOpenMPTaskLoopDirective(Kind)) {
3089 QualType KmpUInt64Ty =
3090 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3091 QualType KmpInt64Ty =
3092 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3093 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3094 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3095 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3096 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3097 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3098 }
3099 RD->completeDefinition();
3100 return RD;
3101}
3102
3103static RecordDecl *
3106 ASTContext &C = CGM.getContext();
3107 // Build struct kmp_task_t_with_privates {
3108 // kmp_task_t task_data;
3109 // .kmp_privates_t. privates;
3110 // };
3111 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3112 RD->startDefinition();
3113 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3114 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3115 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
3116 RD->completeDefinition();
3117 return RD;
3118}
3119
3120/// Emit a proxy function which accepts kmp_task_t as the second
3121/// argument.
3122/// \code
3123/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3124/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3125/// For taskloops:
3126/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3127/// tt->reductions, tt->shareds);
3128/// return 0;
3129/// }
3130/// \endcode
3131static llvm::Function *
3133 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3134 QualType KmpTaskTWithPrivatesPtrQTy,
3135 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3136 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3137 llvm::Value *TaskPrivatesMap) {
3138 ASTContext &C = CGM.getContext();
3139 auto *GtidArg =
3140 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3141 KmpInt32Ty, ImplicitParamKind::Other);
3142 auto *TaskTypeArg = ImplicitParamDecl::Create(
3143 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3144 KmpTaskTWithPrivatesPtrQTy.withRestrict(), ImplicitParamKind::Other);
3145 FunctionArgList Args{GtidArg, TaskTypeArg};
3146 const auto &TaskEntryFnInfo =
3147 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3148 llvm::FunctionType *TaskEntryTy =
3149 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3150 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3151 auto *TaskEntry = llvm::Function::Create(
3152 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3153 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3154 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3155 TaskEntry->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3156 TaskEntry->setDoesNotRecurse();
3157 CodeGenFunction CGF(CGM);
3158 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3159 Loc, Loc);
3160
3161 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3162 // tt,
3163 // For taskloops:
3164 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3165 // tt->task_data.shareds);
3166 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3167 CGF.GetAddrOfLocalVar(GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3168 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3169 CGF.GetAddrOfLocalVar(TaskTypeArg),
3170 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3171 const auto *KmpTaskTWithPrivatesQTyRD =
3172 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3173 LValue Base =
3174 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3175 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3176 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3177 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3178 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3179
3180 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3181 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3182 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3183 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3184 CGF.ConvertTypeForMem(SharedsPtrTy));
3185
3186 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3187 llvm::Value *PrivatesParam;
3188 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3189 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3190 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3191 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3192 } else {
3193 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3194 }
3195
3196 llvm::Value *CommonArgs[] = {
3197 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3198 CGF.Builder
3199 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3200 CGF.VoidPtrTy, CGF.Int8Ty)
3201 .emitRawPointer(CGF)};
3202 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3203 std::end(CommonArgs));
3204 if (isOpenMPTaskLoopDirective(Kind)) {
3205 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3206 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3207 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3208 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3209 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3210 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3211 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3212 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3213 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3214 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3215 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3216 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3217 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3218 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3219 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3220 CallArgs.push_back(LBParam);
3221 CallArgs.push_back(UBParam);
3222 CallArgs.push_back(StParam);
3223 CallArgs.push_back(LIParam);
3224 CallArgs.push_back(RParam);
3225 }
3226 CallArgs.push_back(SharedsParam);
3227
3228 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3229 CallArgs);
3230 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3231 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3232 CGF.FinishFunction();
3233 return TaskEntry;
3234}
3235
3237 SourceLocation Loc,
3238 QualType KmpInt32Ty,
3239 QualType KmpTaskTWithPrivatesPtrQTy,
3240 QualType KmpTaskTWithPrivatesQTy) {
3241 ASTContext &C = CGM.getContext();
3242 auto *GtidArg =
3243 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3244 KmpInt32Ty, ImplicitParamKind::Other);
3245 auto *TaskTypeArg = ImplicitParamDecl::Create(
3246 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3247 KmpTaskTWithPrivatesPtrQTy.withRestrict(), ImplicitParamKind::Other);
3248 FunctionArgList Args{GtidArg, TaskTypeArg};
3249 const auto &DestructorFnInfo =
3250 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3251 llvm::FunctionType *DestructorFnTy =
3252 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3253 std::string Name =
3254 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3255 auto *DestructorFn =
3256 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3257 Name, &CGM.getModule());
3258 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3259 DestructorFnInfo);
3260 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3261 DestructorFn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3262 DestructorFn->setDoesNotRecurse();
3263 CodeGenFunction CGF(CGM);
3264 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3265 Args, Loc, Loc);
3266
3267 LValue Base = CGF.EmitLoadOfPointerLValue(
3268 CGF.GetAddrOfLocalVar(TaskTypeArg),
3269 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3270 const auto *KmpTaskTWithPrivatesQTyRD =
3271 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3273 Base = CGF.EmitLValueForField(Base, *FI);
3274 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3275 if (QualType::DestructionKind DtorKind =
3276 Field->getType().isDestructedType()) {
3277 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3278 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3279 }
3280 }
3281 CGF.FinishFunction();
3282 return DestructorFn;
3283}
3284
3285/// Emit a privates mapping function for correct handling of private and
3286/// firstprivate variables.
3287/// \code
3288/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3289/// **noalias priv1,..., <tyn> **noalias privn) {
3290/// *priv1 = &.privates.priv1;
3291/// ...;
3292/// *privn = &.privates.privn;
3293/// }
3294/// \endcode
3295static llvm::Value *
3297 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3299 ASTContext &C = CGM.getContext();
3300 FunctionArgList Args;
3301 auto *TaskPrivatesArg = ImplicitParamDecl::Create(
3302 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3303 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3305 Args.push_back(TaskPrivatesArg);
3306 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3307 unsigned Counter = 1;
3308 for (const Expr *E : Data.PrivateVars) {
3309 Args.push_back(ImplicitParamDecl::Create(
3310 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3311 C.getPointerType(C.getPointerType(E->getType()))
3312 .withConst()
3313 .withRestrict(),
3315 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3316 PrivateVarsPos[VD] = Counter;
3317 ++Counter;
3318 }
3319 for (const Expr *E : Data.FirstprivateVars) {
3320 Args.push_back(ImplicitParamDecl::Create(
3321 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3322 C.getPointerType(C.getPointerType(E->getType()))
3323 .withConst()
3324 .withRestrict(),
3326 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3327 PrivateVarsPos[VD] = Counter;
3328 ++Counter;
3329 }
3330 for (const Expr *E : Data.LastprivateVars) {
3331 Args.push_back(ImplicitParamDecl::Create(
3332 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3333 C.getPointerType(C.getPointerType(E->getType()))
3334 .withConst()
3335 .withRestrict(),
3337 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3338 PrivateVarsPos[VD] = Counter;
3339 ++Counter;
3340 }
3341 for (const VarDecl *VD : Data.PrivateLocals) {
3343 if (VD->getType()->isLValueReferenceType())
3344 Ty = C.getPointerType(Ty);
3345 if (isAllocatableDecl(VD))
3346 Ty = C.getPointerType(Ty);
3347 Args.push_back(ImplicitParamDecl::Create(
3348 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3349 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3351 PrivateVarsPos[VD] = Counter;
3352 ++Counter;
3353 }
3354 const auto &TaskPrivatesMapFnInfo =
3355 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3356 llvm::FunctionType *TaskPrivatesMapTy =
3357 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3358 std::string Name =
3359 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3360 auto *TaskPrivatesMap = llvm::Function::Create(
3361 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3362 &CGM.getModule());
3363 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3364 TaskPrivatesMapFnInfo);
3365 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3366 TaskPrivatesMap->addFnAttr("sample-profile-suffix-elision-policy",
3367 "selected");
3368 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3369 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3370 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3371 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3372 }
3373 CodeGenFunction CGF(CGM);
3374 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3375 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3376
3377 // *privi = &.privates.privi;
3378 LValue Base = CGF.EmitLoadOfPointerLValue(
3379 CGF.GetAddrOfLocalVar(TaskPrivatesArg),
3380 TaskPrivatesArg->getType()->castAs<PointerType>());
3381 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3382 Counter = 0;
3383 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3384 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3385 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3386 LValue RefLVal =
3387 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3388 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3389 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3390 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3391 ++Counter;
3392 }
3393 CGF.FinishFunction();
3394 return TaskPrivatesMap;
3395}
3396
3397/// Emit initialization for private variables in task-based directives.
3399 const OMPExecutableDirective &D,
3400 Address KmpTaskSharedsPtr, LValue TDBase,
3401 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3402 QualType SharedsTy, QualType SharedsPtrTy,
3403 const OMPTaskDataTy &Data,
3404 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3405 ASTContext &C = CGF.getContext();
3406 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3407 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3408 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3409 ? OMPD_taskloop
3410 : OMPD_task;
3411 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3412 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3413 LValue SrcBase;
3414 bool IsTargetTask =
3415 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3416 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3417 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3418 // PointersArray, SizesArray, and MappersArray. The original variables for
3419 // these arrays are not captured and we get their addresses explicitly.
3420 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3421 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3422 SrcBase = CGF.MakeAddrLValue(
3424 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3425 CGF.ConvertTypeForMem(SharedsTy)),
3426 SharedsTy);
3427 }
3428 FI = FI->getType()->castAsRecordDecl()->field_begin();
3429 for (const PrivateDataTy &Pair : Privates) {
3430 // Do not initialize private locals.
3431 if (Pair.second.isLocalPrivate()) {
3432 ++FI;
3433 continue;
3434 }
3435 const VarDecl *VD = Pair.second.PrivateCopy;
3436 const Expr *Init = VD->getAnyInitializer();
3437 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3438 !CGF.isTrivialInitializer(Init)))) {
3439 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3440 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3441 const VarDecl *OriginalVD = Pair.second.Original;
3442 // Check if the variable is the target-based BasePointersArray,
3443 // PointersArray, SizesArray, or MappersArray.
3444 LValue SharedRefLValue;
3445 QualType Type = PrivateLValue.getType();
3446 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3447 if (IsTargetTask && !SharedField) {
3448 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3449 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3450 cast<CapturedDecl>(OriginalVD->getDeclContext())
3451 ->getNumParams() == 0 &&
3453 cast<CapturedDecl>(OriginalVD->getDeclContext())
3454 ->getDeclContext()) &&
3455 "Expected artificial target data variable.");
3456 SharedRefLValue =
3457 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3458 } else if (ForDup) {
3459 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3460 SharedRefLValue = CGF.MakeAddrLValue(
3461 SharedRefLValue.getAddress().withAlignment(
3462 C.getDeclAlign(OriginalVD)),
3463 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3464 SharedRefLValue.getTBAAInfo());
3465 } else if (CGF.LambdaCaptureFields.count(
3466 Pair.second.Original->getCanonicalDecl()) > 0 ||
3467 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3468 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3469 } else {
3470 // Processing for implicitly captured variables.
3471 InlinedOpenMPRegionRAII Region(
3472 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3473 /*HasCancel=*/false, /*NoInheritance=*/true);
3474 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3475 }
3476 if (Type->isArrayType()) {
3477 // Initialize firstprivate array.
3479 // Perform simple memcpy.
3480 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3481 } else {
3482 // Initialize firstprivate array using element-by-element
3483 // initialization.
3485 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3486 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3487 Address SrcElement) {
3488 // Clean up any temporaries needed by the initialization.
3489 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3490 InitScope.addPrivate(Elem, SrcElement);
3491 (void)InitScope.Privatize();
3492 // Emit initialization for single element.
3493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3494 CGF, &CapturesInfo);
3495 CGF.EmitAnyExprToMem(Init, DestElement,
3496 Init->getType().getQualifiers(),
3497 /*IsInitializer=*/false);
3498 });
3499 }
3500 } else {
3501 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3502 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3503 (void)InitScope.Privatize();
3504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3505 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3506 /*capturedByInit=*/false);
3507 }
3508 } else {
3509 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3510 }
3511 }
3512 ++FI;
3513 }
3514}
3515
3516/// Check if duplication function is required for taskloops.
3519 bool InitRequired = false;
3520 for (const PrivateDataTy &Pair : Privates) {
3521 if (Pair.second.isLocalPrivate())
3522 continue;
3523 const VarDecl *VD = Pair.second.PrivateCopy;
3524 const Expr *Init = VD->getAnyInitializer();
3525 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3527 if (InitRequired)
3528 break;
3529 }
3530 return InitRequired;
3531}
3532
3533
3534/// Emit task_dup function (for initialization of
3535/// private/firstprivate/lastprivate vars and last_iter flag)
3536/// \code
3537/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3538/// lastpriv) {
3539/// // setup lastprivate flag
3540/// task_dst->last = lastpriv;
3541/// // could be constructor calls here...
3542/// }
3543/// \endcode
3544static llvm::Value *
3546 const OMPExecutableDirective &D,
3547 QualType KmpTaskTWithPrivatesPtrQTy,
3548 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3549 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3550 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3551 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3552 ASTContext &C = CGM.getContext();
3553 auto *DstArg = ImplicitParamDecl::Create(
3554 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy,
3556 auto *SrcArg = ImplicitParamDecl::Create(
3557 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy,
3559 auto *LastprivArg =
3560 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3562 FunctionArgList Args{DstArg, SrcArg, LastprivArg};
3563 const auto &TaskDupFnInfo =
3564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3565 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3566 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3567 auto *TaskDup = llvm::Function::Create(
3568 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3569 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3570 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3571 TaskDup->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3572 TaskDup->setDoesNotRecurse();
3573 CodeGenFunction CGF(CGM);
3574 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3575 Loc);
3576
3577 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3578 CGF.GetAddrOfLocalVar(DstArg),
3579 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3580 // task_dst->liter = lastpriv;
3581 if (WithLastIter) {
3582 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3583 LValue Base = CGF.EmitLValueForField(
3584 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3585 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3586 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3587 CGF.GetAddrOfLocalVar(LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3588 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3589 }
3590
3591 // Emit initial values for private copies (if any).
3592 assert(!Privates.empty());
3593 Address KmpTaskSharedsPtr = Address::invalid();
3594 if (!Data.FirstprivateVars.empty()) {
3595 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596 CGF.GetAddrOfLocalVar(SrcArg),
3597 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598 LValue Base = CGF.EmitLValueForField(
3599 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3600 KmpTaskSharedsPtr = Address(
3602 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3603 KmpTaskTShareds)),
3604 Loc),
3605 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3606 }
3607 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3608 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3609 CGF.FinishFunction();
3610 return TaskDup;
3611}
3612
3613/// Checks if destructor function is required to be generated.
3614/// \return true if cleanups are required, false otherwise.
3615static bool
3616checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3618 for (const PrivateDataTy &P : Privates) {
3619 if (P.second.isLocalPrivate())
3620 continue;
3621 QualType Ty = P.second.Original->getType().getNonReferenceType();
3622 if (Ty.isDestructedType())
3623 return true;
3624 }
3625 return false;
3626}
3627
3628namespace {
3629/// Loop generator for OpenMP iterator expression.
3630class OMPIteratorGeneratorScope final
3632 CodeGenFunction &CGF;
3633 const OMPIteratorExpr *E = nullptr;
3634 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3635 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3636 OMPIteratorGeneratorScope() = delete;
3637 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3638
3639public:
3640 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3641 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3642 if (!E)
3643 return;
3644 SmallVector<llvm::Value *, 4> Uppers;
3645 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3646 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3647 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3648 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3649 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3650 addPrivate(
3651 HelperData.CounterVD,
3652 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3653 }
3654 Privatize();
3655
3656 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3657 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3658 LValue CLVal =
3659 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3660 HelperData.CounterVD->getType());
3661 // Counter = 0;
3662 CGF.EmitStoreOfScalar(
3663 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3664 CLVal);
3665 CodeGenFunction::JumpDest &ContDest =
3666 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3667 CodeGenFunction::JumpDest &ExitDest =
3668 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3669 // N = <number-of_iterations>;
3670 llvm::Value *N = Uppers[I];
3671 // cont:
3672 // if (Counter < N) goto body; else goto exit;
3673 CGF.EmitBlock(ContDest.getBlock());
3674 auto *CVal =
3675 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3676 llvm::Value *Cmp =
3677 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3678 ? CGF.Builder.CreateICmpSLT(CVal, N)
3679 : CGF.Builder.CreateICmpULT(CVal, N);
3680 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3681 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3682 // body:
3683 CGF.EmitBlock(BodyBB);
3684 // Iteri = Begini + Counter * Stepi;
3685 CGF.EmitIgnoredExpr(HelperData.Update);
3686 }
3687 }
3688 ~OMPIteratorGeneratorScope() {
3689 if (!E)
3690 return;
3691 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3692 // Counter = Counter + 1;
3693 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3694 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3695 // goto cont;
3696 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3697 // exit:
3698 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3699 }
3700 }
3701};
3702} // namespace
3703
3704static std::pair<llvm::Value *, llvm::Value *>
3706 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3707 llvm::Value *Addr;
3708 if (OASE) {
3709 const Expr *Base = OASE->getBase();
3710 Addr = CGF.EmitScalarExpr(Base);
3711 } else {
3712 Addr = CGF.EmitLValue(E).getPointer(CGF);
3713 }
3714 llvm::Value *SizeVal;
3715 QualType Ty = E->getType();
3716 if (OASE) {
3717 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3718 for (const Expr *SE : OASE->getDimensions()) {
3719 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3720 Sz = CGF.EmitScalarConversion(
3721 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3722 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3723 }
3724 } else if (const auto *ASE =
3725 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3726 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3727 Address UpAddrAddress = UpAddrLVal.getAddress();
3728 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3729 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3730 /*Idx0=*/1);
3731 SizeVal = CGF.Builder.CreatePtrDiff(UpAddr, Addr, "", /*IsNUW=*/true);
3732 } else {
3733 SizeVal = CGF.getTypeSize(Ty);
3734 }
3735 return std::make_pair(Addr, SizeVal);
3736}
3737
3738/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3739static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3740 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3741 if (KmpTaskAffinityInfoTy.isNull()) {
3742 RecordDecl *KmpAffinityInfoRD =
3743 C.buildImplicitRecord("kmp_task_affinity_info_t");
3744 KmpAffinityInfoRD->startDefinition();
3745 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3746 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3747 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3748 KmpAffinityInfoRD->completeDefinition();
3749 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3750 }
3751}
3752
3755 const OMPExecutableDirective &D,
3756 llvm::Function *TaskFunction, QualType SharedsTy,
3757 Address Shareds, const OMPTaskDataTy &Data) {
3758 ASTContext &C = CGM.getContext();
3760 // Aggregate privates and sort them by the alignment.
3761 const auto *I = Data.PrivateCopies.begin();
3762 for (const Expr *E : Data.PrivateVars) {
3763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3764 Privates.emplace_back(
3765 C.getDeclAlign(VD),
3766 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3767 /*PrivateElemInit=*/nullptr));
3768 ++I;
3769 }
3770 I = Data.FirstprivateCopies.begin();
3771 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3772 for (const Expr *E : Data.FirstprivateVars) {
3773 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3774 Privates.emplace_back(
3775 C.getDeclAlign(VD),
3776 PrivateHelpersTy(
3777 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3778 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3779 ++I;
3780 ++IElemInitRef;
3781 }
3782 I = Data.LastprivateCopies.begin();
3783 for (const Expr *E : Data.LastprivateVars) {
3784 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3785 Privates.emplace_back(
3786 C.getDeclAlign(VD),
3787 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3788 /*PrivateElemInit=*/nullptr));
3789 ++I;
3790 }
3791 for (const VarDecl *VD : Data.PrivateLocals) {
3792 if (isAllocatableDecl(VD))
3793 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3794 else
3795 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3796 }
3797 llvm::stable_sort(Privates,
3798 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3799 return L.first > R.first;
3800 });
3801 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3802 // Build type kmp_routine_entry_t (if not built yet).
3803 emitKmpRoutineEntryT(KmpInt32Ty);
3804 // Build type kmp_task_t (if not built yet).
3805 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3806 if (SavedKmpTaskloopTQTy.isNull()) {
3807 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3808 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3809 }
3811 } else {
3812 assert((D.getDirectiveKind() == OMPD_task ||
3813 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3814 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3815 "Expected taskloop, task or target directive");
3816 if (SavedKmpTaskTQTy.isNull()) {
3817 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3818 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3819 }
3821 }
3822 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3823 // Build particular struct kmp_task_t for the given task.
3824 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3826 CanQualType KmpTaskTWithPrivatesQTy =
3827 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3828 QualType KmpTaskTWithPrivatesPtrQTy =
3829 C.getPointerType(KmpTaskTWithPrivatesQTy);
3830 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3831 llvm::Value *KmpTaskTWithPrivatesTySize =
3832 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3833 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3834
3835 // Emit initial values for private copies (if any).
3836 llvm::Value *TaskPrivatesMap = nullptr;
3837 llvm::Type *TaskPrivatesMapTy =
3838 std::next(TaskFunction->arg_begin(), 3)->getType();
3839 if (!Privates.empty()) {
3840 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3841 TaskPrivatesMap =
3842 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3843 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3844 TaskPrivatesMap, TaskPrivatesMapTy);
3845 } else {
3846 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3847 cast<llvm::PointerType>(TaskPrivatesMapTy));
3848 }
3849 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3850 // kmp_task_t *tt);
3851 llvm::Function *TaskEntry = emitProxyTaskFunction(
3852 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3853 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3854 TaskPrivatesMap);
3855
3856 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3857 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3858 // kmp_routine_entry_t *task_entry);
3859 // Task flags. Format is taken from
3860 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3861 // description of kmp_tasking_flags struct.
3862 enum {
3863 TiedFlag = 0x1,
3864 FinalFlag = 0x2,
3865 DestructorsFlag = 0x8,
3866 PriorityFlag = 0x20,
3867 DetachableFlag = 0x40,
3868 FreeAgentFlag = 0x80,
3869 TransparentFlag = 0x100,
3870 };
3871 unsigned Flags = Data.Tied ? TiedFlag : 0;
3872 bool NeedsCleanup = false;
3873 if (!Privates.empty()) {
3874 NeedsCleanup =
3875 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3876 if (NeedsCleanup)
3877 Flags = Flags | DestructorsFlag;
3878 }
3879 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3880 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3881 if (Kind == OMPC_THREADSET_omp_pool)
3882 Flags = Flags | FreeAgentFlag;
3883 }
3884 if (D.getSingleClause<OMPTransparentClause>())
3885 Flags |= TransparentFlag;
3886
3887 if (Data.Priority.getInt())
3888 Flags = Flags | PriorityFlag;
3889 if (D.hasClausesOfKind<OMPDetachClause>())
3890 Flags = Flags | DetachableFlag;
3891 llvm::Value *TaskFlags =
3892 Data.Final.getPointer()
3893 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3894 CGF.Builder.getInt32(FinalFlag),
3895 CGF.Builder.getInt32(/*C=*/0))
3896 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3897 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3898 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3900 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3902 TaskEntry, KmpRoutineEntryPtrTy)};
3903 llvm::Value *NewTask;
3904 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3905 // Check if we have any device clause associated with the directive.
3906 const Expr *Device = nullptr;
3907 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3908 Device = C->getDevice();
3909 // Emit device ID if any otherwise use default value.
3910 llvm::Value *DeviceID;
3911 if (Device)
3912 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3913 CGF.Int64Ty, /*isSigned=*/true);
3914 else
3915 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3916 AllocArgs.push_back(DeviceID);
3917 NewTask = CGF.EmitRuntimeCall(
3918 OMPBuilder.getOrCreateRuntimeFunction(
3919 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3920 AllocArgs);
3921 } else {
3922 NewTask =
3923 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3924 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3925 AllocArgs);
3926 }
3927 // Emit detach clause initialization.
3928 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3929 // task_descriptor);
3930 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3931 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3932 LValue EvtLVal = CGF.EmitLValue(Evt);
3933
3934 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3935 // int gtid, kmp_task_t *task);
3936 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3937 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3938 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3939 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3940 OMPBuilder.getOrCreateRuntimeFunction(
3941 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3942 {Loc, Tid, NewTask});
3943 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3944 Evt->getExprLoc());
3945 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3946 }
3947 // Process affinity clauses.
3948 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3949 // Process list of affinity data.
3950 ASTContext &C = CGM.getContext();
3951 Address AffinitiesArray = Address::invalid();
3952 // Calculate number of elements to form the array of affinity data.
3953 llvm::Value *NumOfElements = nullptr;
3954 unsigned NumAffinities = 0;
3955 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3956 if (const Expr *Modifier = C->getModifier()) {
3957 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3958 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3959 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3960 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3961 NumOfElements =
3962 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3963 }
3964 } else {
3965 NumAffinities += C->varlist_size();
3966 }
3967 }
3969 // Fields ids in kmp_task_affinity_info record.
3970 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3971
3972 QualType KmpTaskAffinityInfoArrayTy;
3973 if (NumOfElements) {
3974 NumOfElements = CGF.Builder.CreateNUWAdd(
3975 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3976 auto *OVE = new (C) OpaqueValueExpr(
3977 Loc,
3978 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3979 VK_PRValue);
3980 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3981 RValue::get(NumOfElements));
3982 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3984 /*IndexTypeQuals=*/0);
3985 // Properly emit variable-sized array.
3986 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3988 CGF.EmitVarDecl(*PD);
3989 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3990 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3991 /*isSigned=*/false);
3992 } else {
3993 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3995 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3996 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3997 AffinitiesArray = CGF.CreateMemTempWithoutCast(KmpTaskAffinityInfoArrayTy,
3998 ".affs.arr.addr");
3999 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4000 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4001 /*isSigned=*/false);
4002 }
4003
4004 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4005 // Fill array by elements without iterators.
4006 unsigned Pos = 0;
4007 bool HasIterator = false;
4008 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4009 if (C->getModifier()) {
4010 HasIterator = true;
4011 continue;
4012 }
4013 for (const Expr *E : C->varlist()) {
4014 llvm::Value *Addr;
4015 llvm::Value *Size;
4016 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4017 LValue Base =
4018 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4020 // affs[i].base_addr = &<Affinities[i].second>;
4021 LValue BaseAddrLVal = CGF.EmitLValueForField(
4022 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4023 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4024 BaseAddrLVal);
4025 // affs[i].len = sizeof(<Affinities[i].second>);
4026 LValue LenLVal = CGF.EmitLValueForField(
4027 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4028 CGF.EmitStoreOfScalar(Size, LenLVal);
4029 ++Pos;
4030 }
4031 }
4032 LValue PosLVal;
4033 if (HasIterator) {
4034 PosLVal = CGF.MakeAddrLValue(
4035 CGF.CreateMemTempWithoutCast(C.getSizeType(), "affs.counter.addr"),
4036 C.getSizeType());
4037 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4038 }
4039 // Process elements with iterators.
4040 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4041 const Expr *Modifier = C->getModifier();
4042 if (!Modifier)
4043 continue;
4044 OMPIteratorGeneratorScope IteratorScope(
4045 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4046 for (const Expr *E : C->varlist()) {
4047 llvm::Value *Addr;
4048 llvm::Value *Size;
4049 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4050 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4051 LValue Base =
4052 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
4054 // affs[i].base_addr = &<Affinities[i].second>;
4055 LValue BaseAddrLVal = CGF.EmitLValueForField(
4056 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4057 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4058 BaseAddrLVal);
4059 // affs[i].len = sizeof(<Affinities[i].second>);
4060 LValue LenLVal = CGF.EmitLValueForField(
4061 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4062 CGF.EmitStoreOfScalar(Size, LenLVal);
4063 Idx = CGF.Builder.CreateNUWAdd(
4064 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4065 CGF.EmitStoreOfScalar(Idx, PosLVal);
4066 }
4067 }
4068 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4069 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4070 // naffins, kmp_task_affinity_info_t *affin_list);
4071 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4072 llvm::Value *GTid = getThreadID(CGF, Loc);
4073 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4074 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
4075 // FIXME: Emit the function and ignore its result for now unless the
4076 // runtime function is properly implemented.
4077 (void)CGF.EmitRuntimeCall(
4078 OMPBuilder.getOrCreateRuntimeFunction(
4079 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4080 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4081 }
4082 llvm::Value *NewTaskNewTaskTTy =
4084 NewTask, KmpTaskTWithPrivatesPtrTy);
4085 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
4086 KmpTaskTWithPrivatesQTy);
4087 LValue TDBase =
4088 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4089 // Fill the data in the resulting kmp_task_t record.
4090 // Copy shareds if there are any.
4091 Address KmpTaskSharedsPtr = Address::invalid();
4092 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4093 KmpTaskSharedsPtr = Address(
4094 CGF.EmitLoadOfScalar(
4096 TDBase,
4097 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4098 Loc),
4099 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4100 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4101 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4102 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4103 }
4104 // Emit initial values for private copies (if any).
4106 if (!Privates.empty()) {
4107 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4108 SharedsTy, SharedsPtrTy, Data, Privates,
4109 /*ForDup=*/false);
4110 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4111 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4112 Result.TaskDupFn = emitTaskDupFunction(
4113 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4114 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4115 /*WithLastIter=*/!Data.LastprivateVars.empty());
4116 }
4117 }
4118 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4119 enum { Priority = 0, Destructors = 1 };
4120 // Provide pointer to function with destructors for privates.
4121 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4122 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4123 assert(KmpCmplrdataUD->isUnion());
4124 if (NeedsCleanup) {
4125 llvm::Value *DestructorFn = emitDestructorsFunction(
4126 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4127 KmpTaskTWithPrivatesQTy);
4128 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4129 LValue DestructorsLV = CGF.EmitLValueForField(
4130 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4132 DestructorFn, KmpRoutineEntryPtrTy),
4133 DestructorsLV);
4134 }
4135 // Set priority.
4136 if (Data.Priority.getInt()) {
4137 LValue Data2LV = CGF.EmitLValueForField(
4138 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4139 LValue PriorityLV = CGF.EmitLValueForField(
4140 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4141 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4142 }
4143 Result.NewTask = NewTask;
4144 Result.TaskEntry = TaskEntry;
4145 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4146 Result.TDBase = TDBase;
4147 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4148 return Result;
4149}
4150
4151/// Translates internal dependency kind into the runtime kind.
4153 RTLDependenceKindTy DepKind;
4154 switch (K) {
4155 case OMPC_DEPEND_in:
4156 DepKind = RTLDependenceKindTy::DepIn;
4157 break;
4158 // Out and InOut dependencies must use the same code.
4159 case OMPC_DEPEND_out:
4160 case OMPC_DEPEND_inout:
4161 DepKind = RTLDependenceKindTy::DepInOut;
4162 break;
4163 case OMPC_DEPEND_mutexinoutset:
4164 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4165 break;
4166 case OMPC_DEPEND_inoutset:
4167 DepKind = RTLDependenceKindTy::DepInOutSet;
4168 break;
4169 case OMPC_DEPEND_outallmemory:
4170 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4171 break;
4172 case OMPC_DEPEND_source:
4173 case OMPC_DEPEND_sink:
4174 case OMPC_DEPEND_depobj:
4175 case OMPC_DEPEND_inoutallmemory:
4177 llvm_unreachable("Unknown task dependence type");
4178 }
4179 return DepKind;
4180}
4181
4182/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4183static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4184 QualType &FlagsTy) {
4185 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4186 if (KmpDependInfoTy.isNull()) {
4187 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4188 KmpDependInfoRD->startDefinition();
4189 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4190 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4191 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4192 KmpDependInfoRD->completeDefinition();
4193 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4194 }
4195}
4196
4197std::pair<llvm::Value *, LValue>
4199 SourceLocation Loc) {
4200 ASTContext &C = CGM.getContext();
4201 QualType FlagsTy;
4202 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4203 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4204 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4206 DepobjLVal.getAddress().withElementType(
4207 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4208 KmpDependInfoPtrTy->castAs<PointerType>());
4209 Address DepObjAddr = CGF.Builder.CreateGEP(
4210 CGF, Base.getAddress(),
4211 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4212 LValue NumDepsBase = CGF.MakeAddrLValue(
4213 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4214 // NumDeps = deps[i].base_addr;
4215 LValue BaseAddrLVal = CGF.EmitLValueForField(
4216 NumDepsBase,
4217 *std::next(KmpDependInfoRD->field_begin(),
4218 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4219 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4220 return std::make_pair(NumDeps, Base);
4221}
4222
4223static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4224 llvm::PointerUnion<unsigned *, LValue *> Pos,
4226 Address DependenciesArray) {
4227 CodeGenModule &CGM = CGF.CGM;
4228 ASTContext &C = CGM.getContext();
4229 QualType FlagsTy;
4230 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4231 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4232 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4233
4234 OMPIteratorGeneratorScope IteratorScope(
4235 CGF, cast_or_null<OMPIteratorExpr>(
4236 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4237 : nullptr));
4238 for (const Expr *E : Data.DepExprs) {
4239 llvm::Value *Addr;
4240 llvm::Value *Size;
4241
4242 // The expression will be a nullptr in the 'omp_all_memory' case.
4243 if (E) {
4244 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4245 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4246 } else {
4247 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4248 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4249 }
4250 LValue Base;
4251 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4252 Base = CGF.MakeAddrLValue(
4253 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4254 } else {
4255 assert(E && "Expected a non-null expression");
4256 LValue &PosLVal = *cast<LValue *>(Pos);
4257 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4258 Base = CGF.MakeAddrLValue(
4259 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4260 }
4261 // deps[i].base_addr = &<Dependencies[i].second>;
4262 LValue BaseAddrLVal = CGF.EmitLValueForField(
4263 Base,
4264 *std::next(KmpDependInfoRD->field_begin(),
4265 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4266 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4267 // deps[i].len = sizeof(<Dependencies[i].second>);
4268 LValue LenLVal = CGF.EmitLValueForField(
4269 Base, *std::next(KmpDependInfoRD->field_begin(),
4270 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4271 CGF.EmitStoreOfScalar(Size, LenLVal);
4272 // deps[i].flags = <Dependencies[i].first>;
4273 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4274 LValue FlagsLVal = CGF.EmitLValueForField(
4275 Base,
4276 *std::next(KmpDependInfoRD->field_begin(),
4277 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4279 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4280 FlagsLVal);
4281 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4282 ++(*P);
4283 } else {
4284 LValue &PosLVal = *cast<LValue *>(Pos);
4285 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4286 Idx = CGF.Builder.CreateNUWAdd(Idx,
4287 llvm::ConstantInt::get(Idx->getType(), 1));
4288 CGF.EmitStoreOfScalar(Idx, PosLVal);
4289 }
4290 }
4291}
4292
4296 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4297 "Expected depobj dependency kind.");
4299 SmallVector<LValue, 4> SizeLVals;
4300 ASTContext &C = CGF.getContext();
4301 {
4302 OMPIteratorGeneratorScope IteratorScope(
4303 CGF, cast_or_null<OMPIteratorExpr>(
4304 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4305 : nullptr));
4306 for (const Expr *E : Data.DepExprs) {
4307 llvm::Value *NumDeps;
4308 LValue Base;
4309 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4310 std::tie(NumDeps, Base) =
4311 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4312 LValue NumLVal = CGF.MakeAddrLValue(
4313 CGF.CreateMemTempWithoutCast(C.getUIntPtrType(), "depobj.size.addr"),
4314 C.getUIntPtrType());
4315 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4316 NumLVal.getAddress());
4317 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4318 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4319 CGF.EmitStoreOfScalar(Add, NumLVal);
4320 SizeLVals.push_back(NumLVal);
4321 }
4322 }
4323 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4324 llvm::Value *Size =
4325 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4326 Sizes.push_back(Size);
4327 }
4328 return Sizes;
4329}
4330
4333 LValue PosLVal,
4335 Address DependenciesArray) {
4336 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4337 "Expected depobj dependency kind.");
4338 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4339 {
4340 OMPIteratorGeneratorScope IteratorScope(
4341 CGF, cast_or_null<OMPIteratorExpr>(
4342 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4343 : nullptr));
4344 for (const Expr *E : Data.DepExprs) {
4345 llvm::Value *NumDeps;
4346 LValue Base;
4347 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4348 std::tie(NumDeps, Base) =
4349 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4350
4351 // memcopy dependency data.
4352 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4353 ElSize,
4354 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4355 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4356 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4357 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4358
4359 // Increase pos.
4360 // pos += size;
4361 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4362 CGF.EmitStoreOfScalar(Add, PosLVal);
4363 }
4364 }
4365}
4366
4367std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4369 SourceLocation Loc) {
4370 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4371 return D.DepExprs.empty();
4372 }))
4373 return std::make_pair(nullptr, Address::invalid());
4374 // Process list of dependencies.
4375 ASTContext &C = CGM.getContext();
4376 Address DependenciesArray = Address::invalid();
4377 llvm::Value *NumOfElements = nullptr;
4378 unsigned NumDependencies = std::accumulate(
4379 Dependencies.begin(), Dependencies.end(), 0,
4380 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4381 return D.DepKind == OMPC_DEPEND_depobj
4382 ? V
4383 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4384 });
4385 QualType FlagsTy;
4386 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4387 bool HasDepobjDeps = false;
4388 bool HasRegularWithIterators = false;
4389 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4390 llvm::Value *NumOfRegularWithIterators =
4391 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4392 // Calculate number of depobj dependencies and regular deps with the
4393 // iterators.
4394 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4395 if (D.DepKind == OMPC_DEPEND_depobj) {
4398 for (llvm::Value *Size : Sizes) {
4399 NumOfDepobjElements =
4400 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4401 }
4402 HasDepobjDeps = true;
4403 continue;
4404 }
4405 // Include number of iterations, if any.
4406
4407 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4408 llvm::Value *ClauseIteratorSpace =
4409 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4410 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4411 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4412 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4413 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4414 }
4415 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4416 ClauseIteratorSpace,
4417 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4418 NumOfRegularWithIterators =
4419 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4420 HasRegularWithIterators = true;
4421 continue;
4422 }
4423 }
4424
4425 QualType KmpDependInfoArrayTy;
4426 if (HasDepobjDeps || HasRegularWithIterators) {
4427 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4428 /*isSigned=*/false);
4429 if (HasDepobjDeps) {
4430 NumOfElements =
4431 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4432 }
4433 if (HasRegularWithIterators) {
4434 NumOfElements =
4435 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4436 }
4437 auto *OVE = new (C) OpaqueValueExpr(
4438 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4439 VK_PRValue);
4440 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4441 RValue::get(NumOfElements));
4442 KmpDependInfoArrayTy =
4443 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4444 /*IndexTypeQuals=*/0);
4445 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4446 // Properly emit variable-sized array.
4447 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4449 CGF.EmitVarDecl(*PD);
4450 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4451 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4452 /*isSigned=*/false);
4453 } else {
4454 KmpDependInfoArrayTy = C.getConstantArrayType(
4455 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4456 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4457 DependenciesArray =
4458 CGF.CreateMemTempWithoutCast(KmpDependInfoArrayTy, ".dep.arr.addr");
4459 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4460 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4461 /*isSigned=*/false);
4462 }
4463 unsigned Pos = 0;
4464 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4465 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4466 continue;
4467 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4468 }
4469 // Copy regular dependencies with iterators.
4470 LValue PosLVal = CGF.MakeAddrLValue(
4471 CGF.CreateMemTempWithoutCast(C.getSizeType(), "dep.counter.addr"),
4472 C.getSizeType());
4473 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4474 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4475 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4476 continue;
4477 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4478 }
4479 // Copy final depobj arrays without iterators.
4480 if (HasDepobjDeps) {
4481 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4482 if (Dep.DepKind != OMPC_DEPEND_depobj)
4483 continue;
4484 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4485 }
4486 }
4487 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4488 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4489 return std::make_pair(NumOfElements, DependenciesArray);
4490}
4491
4493 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4494 SourceLocation Loc) {
4495 if (Dependencies.DepExprs.empty())
4496 return Address::invalid();
4497 // Process list of dependencies.
4498 ASTContext &C = CGM.getContext();
4499 Address DependenciesArray = Address::invalid();
4500 unsigned NumDependencies = Dependencies.DepExprs.size();
4501 QualType FlagsTy;
4502 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4503 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4504
4505 llvm::Value *Size;
4506 // Define type kmp_depend_info[<Dependencies.size()>];
4507 // For depobj reserve one extra element to store the number of elements.
4508 // It is required to handle depobj(x) update(in) construct.
4509 // kmp_depend_info[<Dependencies.size()>] deps;
4510 llvm::Value *NumDepsVal;
4511 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4512 if (const auto *IE =
4513 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4514 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4515 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4516 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4517 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4518 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4519 }
4520 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4521 NumDepsVal);
4522 CharUnits SizeInBytes =
4523 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4524 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4525 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4526 NumDepsVal =
4527 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4528 } else {
4529 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4530 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4531 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4532 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4533 Size = CGM.getSize(Sz.alignTo(Align));
4534 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4535 }
4536 // Need to allocate on the dynamic memory.
4537 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4538 // Use default allocator.
4539 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4540 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4541
4542 llvm::Value *Addr =
4543 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4544 CGM.getModule(), OMPRTL___kmpc_alloc),
4545 Args, ".dep.arr.addr");
4546 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4548 Addr, CGF.Builder.getPtrTy(0));
4549 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4550 // Write number of elements in the first element of array for depobj.
4551 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4552 // deps[i].base_addr = NumDependencies;
4553 LValue BaseAddrLVal = CGF.EmitLValueForField(
4554 Base,
4555 *std::next(KmpDependInfoRD->field_begin(),
4556 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4557 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4558 llvm::PointerUnion<unsigned *, LValue *> Pos;
4559 unsigned Idx = 1;
4560 LValue PosLVal;
4561 if (Dependencies.IteratorExpr) {
4562 PosLVal = CGF.MakeAddrLValue(
4563 CGF.CreateMemTempWithoutCast(C.getSizeType(), "iterator.counter.addr"),
4564 C.getSizeType());
4565 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4566 /*IsInit=*/true);
4567 Pos = &PosLVal;
4568 } else {
4569 Pos = &Idx;
4570 }
4571 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4572 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4573 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4574 CGF.Int8Ty);
4575 return DependenciesArray;
4576}
4577
4579 SourceLocation Loc) {
4580 ASTContext &C = CGM.getContext();
4581 QualType FlagsTy;
4582 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4583 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4584 C.VoidPtrTy.castAs<PointerType>());
4585 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4587 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4589 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4590 Addr.getElementType(), Addr.emitRawPointer(CGF),
4591 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4592 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4593 CGF.VoidPtrTy);
4594 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4595 // Use default allocator.
4596 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4597 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4598
4599 // _kmpc_free(gtid, addr, nullptr);
4600 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4601 CGM.getModule(), OMPRTL___kmpc_free),
4602 Args);
4603}
4604
4606 OpenMPDependClauseKind NewDepKind,
4607 SourceLocation Loc) {
4608 ASTContext &C = CGM.getContext();
4609 QualType FlagsTy;
4610 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4611 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4612 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4613 llvm::Value *NumDeps;
4614 LValue Base;
4615 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4616
4617 Address Begin = Base.getAddress();
4618 // Cast from pointer to array type to pointer to single element.
4619 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4620 Begin.emitRawPointer(CGF), NumDeps);
4621 // The basic structure here is a while-do loop.
4622 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4623 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4624 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4625 CGF.EmitBlock(BodyBB);
4626 llvm::PHINode *ElementPHI =
4627 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4628 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4629 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4630 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4631 Base.getTBAAInfo());
4632 // deps[i].flags = NewDepKind;
4633 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4634 LValue FlagsLVal = CGF.EmitLValueForField(
4635 Base, *std::next(KmpDependInfoRD->field_begin(),
4636 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4638 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4639 FlagsLVal);
4640
4641 // Shift the address forward by one element.
4642 llvm::Value *ElementNext =
4643 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4644 .emitRawPointer(CGF);
4645 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4646 llvm::Value *IsEmpty =
4647 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4648 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4649 // Done.
4650 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4651}
4652
4654 const OMPExecutableDirective &D,
4655 llvm::Function *TaskFunction,
4656 QualType SharedsTy, Address Shareds,
4657 const Expr *IfCond,
4658 const OMPTaskDataTy &Data) {
4659 if (!CGF.HaveInsertPoint())
4660 return;
4661
4663 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4664 llvm::Value *NewTask = Result.NewTask;
4665 llvm::Function *TaskEntry = Result.TaskEntry;
4666 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4667 LValue TDBase = Result.TDBase;
4668 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4669 // Process list of dependences.
4670 Address DependenciesArray = Address::invalid();
4671 llvm::Value *NumOfElements;
4672 std::tie(NumOfElements, DependenciesArray) =
4673 emitDependClause(CGF, Data.Dependences, Loc);
4674
4675 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4676 // libcall.
4677 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4678 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4679 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4680 // list is not empty
4681 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4682 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4683 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4684 llvm::Value *DepTaskArgs[7];
4685 if (!Data.Dependences.empty()) {
4686 DepTaskArgs[0] = UpLoc;
4687 DepTaskArgs[1] = ThreadID;
4688 DepTaskArgs[2] = NewTask;
4689 DepTaskArgs[3] = NumOfElements;
4690 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4691 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4692 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4693 }
4694 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4695 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4696 if (!Data.Tied) {
4697 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4698 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4699 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4700 }
4701 if (!Data.Dependences.empty()) {
4702 CGF.EmitRuntimeCall(
4703 OMPBuilder.getOrCreateRuntimeFunction(
4704 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4705 DepTaskArgs);
4706 } else {
4707 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4708 CGM.getModule(), OMPRTL___kmpc_omp_task),
4709 TaskArgs);
4710 }
4711 // Check if parent region is untied and build return for untied task;
4712 if (auto *Region =
4713 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4714 Region->emitUntiedSwitch(CGF);
4715 };
4716
4717 llvm::Value *DepWaitTaskArgs[7];
4718 if (!Data.Dependences.empty()) {
4719 DepWaitTaskArgs[0] = UpLoc;
4720 DepWaitTaskArgs[1] = ThreadID;
4721 DepWaitTaskArgs[2] = NumOfElements;
4722 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4723 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4724 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4725 DepWaitTaskArgs[6] =
4726 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4727 }
4728 auto &M = CGM.getModule();
4729 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4730 TaskEntry, &Data, &DepWaitTaskArgs,
4731 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4732 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4733 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4734 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4735 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4736 // is specified.
4737 if (!Data.Dependences.empty())
4738 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4739 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4740 DepWaitTaskArgs);
4741 // Call proxy_task_entry(gtid, new_task);
4742 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4743 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4744 Action.Enter(CGF);
4745 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4746 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4747 OutlinedFnArgs);
4748 };
4749
4750 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4751 // kmp_task_t *new_task);
4752 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4753 // kmp_task_t *new_task);
4755 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4756 M, OMPRTL___kmpc_omp_task_begin_if0),
4757 TaskArgs,
4758 OMPBuilder.getOrCreateRuntimeFunction(
4759 M, OMPRTL___kmpc_omp_task_complete_if0),
4760 TaskArgs);
4761 RCG.setAction(Action);
4762 RCG(CGF);
4763 };
4764
4765 if (IfCond) {
4766 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4767 } else {
4768 RegionCodeGenTy ThenRCG(ThenCodeGen);
4769 ThenRCG(CGF);
4770 }
4771}
4772
4774 const OMPLoopDirective &D,
4775 llvm::Function *TaskFunction,
4776 QualType SharedsTy, Address Shareds,
4777 const Expr *IfCond,
4778 const OMPTaskDataTy &Data) {
4779 if (!CGF.HaveInsertPoint())
4780 return;
4782 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4783 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4784 // libcall.
4785 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4786 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4787 // sched, kmp_uint64 grainsize, void *task_dup);
4788 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4789 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4790 llvm::Value *IfVal;
4791 if (IfCond) {
4792 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4793 /*isSigned=*/true);
4794 } else {
4795 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4796 }
4797
4798 LValue LBLVal = CGF.EmitLValueForField(
4799 Result.TDBase,
4800 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4801 const auto *LBVar =
4802 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4803 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4804 /*IsInitializer=*/true);
4805 LValue UBLVal = CGF.EmitLValueForField(
4806 Result.TDBase,
4807 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4808 const auto *UBVar =
4809 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4810 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4811 /*IsInitializer=*/true);
4812 LValue StLVal = CGF.EmitLValueForField(
4813 Result.TDBase,
4814 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4815 const auto *StVar =
4816 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4817 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4818 /*IsInitializer=*/true);
4819 // Store reductions address.
4820 LValue RedLVal = CGF.EmitLValueForField(
4821 Result.TDBase,
4822 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4823 if (Data.Reductions) {
4824 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4825 } else {
4826 CGF.EmitNullInitialization(RedLVal.getAddress(),
4827 CGF.getContext().VoidPtrTy);
4828 }
4829 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4831 UpLoc,
4832 ThreadID,
4833 Result.NewTask,
4834 IfVal,
4835 LBLVal.getPointer(CGF),
4836 UBLVal.getPointer(CGF),
4837 CGF.EmitLoadOfScalar(StLVal, Loc),
4838 llvm::ConstantInt::getSigned(
4839 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4840 llvm::ConstantInt::getSigned(
4841 CGF.IntTy, Data.Schedule.getPointer()
4842 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4843 : NoSchedule),
4844 Data.Schedule.getPointer()
4845 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4846 /*isSigned=*/false)
4847 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4848 if (Data.HasModifier)
4849 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4850
4851 TaskArgs.push_back(Result.TaskDupFn
4853 Result.TaskDupFn, CGF.VoidPtrTy)
4854 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4855 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4856 CGM.getModule(), Data.HasModifier
4857 ? OMPRTL___kmpc_taskloop_5
4858 : OMPRTL___kmpc_taskloop),
4859 TaskArgs);
4860}
4861
4862/// Emit reduction operation for each element of array (required for
4863/// array sections) LHS op = RHS.
4864/// \param Type Type of array.
4865/// \param LHSVar Variable on the left side of the reduction operation
4866/// (references element of array in original variable).
4867/// \param RHSVar Variable on the right side of the reduction operation
4868/// (references element of array in original variable).
4869/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4870/// RHSVar.
4872 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4873 const VarDecl *RHSVar,
4874 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4875 const Expr *, const Expr *)> &RedOpGen,
4876 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4877 const Expr *UpExpr = nullptr) {
4878 // Perform element-by-element initialization.
4879 QualType ElementTy;
4880 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4881 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4882
4883 // Drill down to the base element type on both arrays.
4884 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4885 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4886
4887 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4888 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4889 // Cast from pointer to array type to pointer to single element.
4890 llvm::Value *LHSEnd =
4891 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4892 // The basic structure here is a while-do loop.
4893 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4894 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4895 llvm::Value *IsEmpty =
4896 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4897 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4898
4899 // Enter the loop body, making that address the current address.
4900 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4901 CGF.EmitBlock(BodyBB);
4902
4903 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4904
4905 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4906 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4907 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4908 Address RHSElementCurrent(
4909 RHSElementPHI, RHSAddr.getElementType(),
4910 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4911
4912 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4913 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4914 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4915 Address LHSElementCurrent(
4916 LHSElementPHI, LHSAddr.getElementType(),
4917 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4918
4919 // Emit copy.
4921 Scope.addPrivate(LHSVar, LHSElementCurrent);
4922 Scope.addPrivate(RHSVar, RHSElementCurrent);
4923 Scope.Privatize();
4924 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4925 Scope.ForceCleanup();
4926
4927 // Shift the address forward by one element.
4928 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4929 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4930 "omp.arraycpy.dest.element");
4931 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4932 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4933 "omp.arraycpy.src.element");
4934 // Check whether we've reached the end.
4935 llvm::Value *Done =
4936 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4937 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4938 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4939 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4940
4941 // Done.
4942 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4943}
4944
4945/// Emit reduction combiner. If the combiner is a simple expression emit it as
4946/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4947/// UDR combiner function.
4949 const Expr *ReductionOp) {
4950 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4951 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4952 if (const auto *DRE =
4953 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4954 if (const auto *DRD =
4955 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4956 std::pair<llvm::Function *, llvm::Function *> Reduction =
4960 CGF.EmitIgnoredExpr(ReductionOp);
4961 return;
4962 }
4963 CGF.EmitIgnoredExpr(ReductionOp);
4964}
4965
4967 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4969 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4970 ASTContext &C = CGM.getContext();
4971
4972 // void reduction_func(void *LHSArg, void *RHSArg);
4973 auto *LHSArg =
4974 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4975 C.VoidPtrTy, ImplicitParamKind::Other);
4976 auto *RHSArg =
4977 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4978 C.VoidPtrTy, ImplicitParamKind::Other);
4979 FunctionArgList Args{LHSArg, RHSArg};
4980 const auto &CGFI =
4981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4982 std::string Name = getReductionFuncName(ReducerName);
4983 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4984 llvm::GlobalValue::InternalLinkage, Name,
4985 &CGM.getModule());
4986 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4987 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4988 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
4989 Fn->setDoesNotRecurse();
4990 CodeGenFunction CGF(CGM);
4991 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4992
4993 // Dst = (void*[n])(LHSArg);
4994 // Src = (void*[n])(RHSArg);
4996 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(LHSArg)),
4997 CGF.Builder.getPtrTy(0)),
4998 ArgsElemType, CGF.getPointerAlign());
5000 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(RHSArg)),
5001 CGF.Builder.getPtrTy(0)),
5002 ArgsElemType, CGF.getPointerAlign());
5003
5004 // ...
5005 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5006 // ...
5008 const auto *IPriv = Privates.begin();
5009 unsigned Idx = 0;
5010 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5011 const auto *RHSVar =
5012 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5013 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5014 const auto *LHSVar =
5015 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5016 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5017 QualType PrivTy = (*IPriv)->getType();
5018 if (PrivTy->isVariablyModifiedType()) {
5019 // Get array size and emit VLA type.
5020 ++Idx;
5021 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5022 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5023 const VariableArrayType *VLA =
5024 CGF.getContext().getAsVariableArrayType(PrivTy);
5025 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5027 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5028 CGF.EmitVariablyModifiedType(PrivTy);
5029 }
5030 }
5031 Scope.Privatize();
5032 IPriv = Privates.begin();
5033 const auto *ILHS = LHSExprs.begin();
5034 const auto *IRHS = RHSExprs.begin();
5035 for (const Expr *E : ReductionOps) {
5036 if ((*IPriv)->getType()->isArrayType()) {
5037 // Emit reduction for array section.
5038 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5039 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5041 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5042 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5043 emitReductionCombiner(CGF, E);
5044 });
5045 } else {
5046 // Emit reduction for array subscript or single variable.
5047 emitReductionCombiner(CGF, E);
5048 }
5049 ++IPriv;
5050 ++ILHS;
5051 ++IRHS;
5052 }
5053 Scope.ForceCleanup();
5054 CGF.FinishFunction();
5055 return Fn;
5056}
5057
5059 const Expr *ReductionOp,
5060 const Expr *PrivateRef,
5061 const DeclRefExpr *LHS,
5062 const DeclRefExpr *RHS) {
5063 if (PrivateRef->getType()->isArrayType()) {
5064 // Emit reduction for array section.
5065 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5066 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5068 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5069 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5070 emitReductionCombiner(CGF, ReductionOp);
5071 });
5072 } else {
5073 // Emit reduction for array subscript or single variable.
5074 emitReductionCombiner(CGF, ReductionOp);
5075 }
5076}
5077
5078static std::string generateUniqueName(CodeGenModule &CGM,
5079 llvm::StringRef Prefix, const Expr *Ref);
5080
5082 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5083 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5084
5085 // Create a shared global variable (__shared_reduction_var) to accumulate the
5086 // final result.
5087 //
5088 // Call __kmpc_barrier to synchronize threads before initialization.
5089 //
5090 // The master thread (thread_id == 0) initializes __shared_reduction_var
5091 // with the identity value or initializer.
5092 //
5093 // Call __kmpc_barrier to synchronize before combining.
5094 // For each i:
5095 // - Thread enters critical section.
5096 // - Reads its private value from LHSExprs[i].
5097 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5098 // Privates[i]).
5099 // - Exits critical section.
5100 //
5101 // Call __kmpc_barrier after combining.
5102 //
5103 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5104 //
5105 // Final __kmpc_barrier to synchronize after broadcasting
5106 QualType PrivateType = Privates->getType();
5107 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
5108
5109 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
5110 std::string ReductionVarNameStr;
5111 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
5112 ReductionVarNameStr =
5113 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
5114 else
5115 ReductionVarNameStr = "unnamed_priv_var";
5116
5117 // Create an internal shared variable
5118 std::string SharedName =
5119 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
5120 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5121 LLVMType, ".omp.reduction." + SharedName);
5122
5123 SharedVar->setAlignment(
5124 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
5125
5126 Address SharedResult =
5127 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
5128
5129 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5130 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5131 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5132
5133 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5134 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5135
5136 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5137 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5138 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5139
5140 CGF.EmitBlock(InitBB);
5141
5142 auto EmitSharedInit = [&]() {
5143 if (UDR) { // Check if it's a User-Defined Reduction
5144 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5145 std::pair<llvm::Function *, llvm::Function *> FnPair =
5147 llvm::Function *InitializerFn = FnPair.second;
5148 if (InitializerFn) {
5149 if (const auto *CE =
5150 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5151 const auto *OutDRE = cast<DeclRefExpr>(
5152 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5153 ->getSubExpr());
5154 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5155
5156 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5157 LocalScope.addPrivate(OutVD, SharedResult);
5158
5159 (void)LocalScope.Privatize();
5160 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5161 CE->getCallee()->IgnoreParenImpCasts())) {
5163 CGF, OVE, RValue::get(InitializerFn));
5164 CGF.EmitIgnoredExpr(CE);
5165 } else {
5166 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5167 PrivateType.getQualifiers(),
5168 /*IsInitializer=*/true);
5169 }
5170 } else {
5171 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5172 PrivateType.getQualifiers(),
5173 /*IsInitializer=*/true);
5174 }
5175 } else {
5176 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5177 PrivateType.getQualifiers(),
5178 /*IsInitializer=*/true);
5179 }
5180 } else {
5181 // EmitNullInitialization handles default construction for C++ classes
5182 // and zeroing for scalars, which is a reasonable default.
5183 CGF.EmitNullInitialization(SharedResult, PrivateType);
5184 }
5185 return; // UDR initialization handled
5186 }
5187 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5188 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5189 if (const Expr *InitExpr = VD->getInit()) {
5190 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5191 PrivateType.getQualifiers(), true);
5192 return;
5193 }
5194 }
5195 }
5196 CGF.EmitNullInitialization(SharedResult, PrivateType);
5197 };
5198 EmitSharedInit();
5199 CGF.Builder.CreateBr(InitEndBB);
5200 CGF.EmitBlock(InitEndBB);
5201
5202 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5203 CGM.getModule(), OMPRTL___kmpc_barrier),
5204 BarrierArgs);
5205
5206 const Expr *ReductionOp = ReductionOps;
5207 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5208 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5209 LValue LHSLV = CGF.EmitLValue(Privates);
5210
5211 auto EmitCriticalReduction = [&](auto ReductionGen) {
5212 std::string CriticalName = getName({"reduction_critical"});
5213 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5214 };
5215
5216 if (CurrentUDR) {
5217 // Handle user-defined reduction.
5218 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5219 Action.Enter(CGF);
5220 std::pair<llvm::Function *, llvm::Function *> FnPair =
5221 getUserDefinedReduction(CurrentUDR);
5222 if (FnPair.first) {
5223 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5224 const auto *OutDRE = cast<DeclRefExpr>(
5225 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5226 ->getSubExpr());
5227 const auto *InDRE = cast<DeclRefExpr>(
5228 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5229 ->getSubExpr());
5230 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5231 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5232 SharedLV.getAddress());
5233 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5234 LHSLV.getAddress());
5235 (void)LocalScope.Privatize();
5236 emitReductionCombiner(CGF, ReductionOp);
5237 }
5238 }
5239 };
5240 EmitCriticalReduction(ReductionGen);
5241 } else {
5242 // Handle built-in reduction operations.
5243#ifndef NDEBUG
5244 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5245 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5246 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5247
5248 const Expr *AssignRHS = nullptr;
5249 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5250 if (BinOp->getOpcode() == BO_Assign)
5251 AssignRHS = BinOp->getRHS();
5252 } else if (const auto *OpCall =
5253 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5254 if (OpCall->getOperator() == OO_Equal)
5255 AssignRHS = OpCall->getArg(1);
5256 }
5257
5258 assert(AssignRHS &&
5259 "Private Variable Reduction : Invalid ReductionOp expression");
5260#endif
5261
5262 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5263 Action.Enter(CGF);
5264 const auto *OmpOutDRE =
5265 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5266 const auto *OmpInDRE =
5267 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5268 assert(
5269 OmpOutDRE && OmpInDRE &&
5270 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5271 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5272 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5273 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5274 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5275 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5276 (void)LocalScope.Privatize();
5277 // Emit the actual reduction operation
5278 CGF.EmitIgnoredExpr(ReductionOp);
5279 };
5280 EmitCriticalReduction(ReductionGen);
5281 }
5282
5283 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5284 CGM.getModule(), OMPRTL___kmpc_barrier),
5285 BarrierArgs);
5286
5287 // Broadcast final result
5288 bool IsAggregate = PrivateType->isAggregateType();
5289 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5290 llvm::Value *FinalResultVal = nullptr;
5291 Address FinalResultAddr = Address::invalid();
5292
5293 if (IsAggregate)
5294 FinalResultAddr = SharedResult;
5295 else
5296 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5297
5298 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5299 if (IsAggregate) {
5300 CGF.EmitAggregateCopy(TargetLHSLV,
5301 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5302 PrivateType, AggValueSlot::DoesNotOverlap, false);
5303 } else {
5304 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5305 }
5306 // Final synchronization barrier
5307 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5308 CGM.getModule(), OMPRTL___kmpc_barrier),
5309 BarrierArgs);
5310
5311 // Combiner with original list item
5312 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5313 PrePostActionTy &Action) {
5314 Action.Enter(CGF);
5315 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5316 cast<DeclRefExpr>(LHSExprs),
5317 cast<DeclRefExpr>(RHSExprs));
5318 };
5319 EmitCriticalReduction(OriginalListCombiner);
5320}
5321
5323 ArrayRef<const Expr *> OrgPrivates,
5324 ArrayRef<const Expr *> OrgLHSExprs,
5325 ArrayRef<const Expr *> OrgRHSExprs,
5326 ArrayRef<const Expr *> OrgReductionOps,
5327 ReductionOptionsTy Options) {
5328 if (!CGF.HaveInsertPoint())
5329 return;
5330
5331 bool WithNowait = Options.WithNowait;
5332 bool SimpleReduction = Options.SimpleReduction;
5333
5334 // Next code should be emitted for reduction:
5335 //
5336 // static kmp_critical_name lock = { 0 };
5337 //
5338 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5339 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5340 // ...
5341 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5342 // *(Type<n>-1*)rhs[<n>-1]);
5343 // }
5344 //
5345 // ...
5346 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5347 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5348 // RedList, reduce_func, &<lock>)) {
5349 // case 1:
5350 // ...
5351 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5352 // ...
5353 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5354 // break;
5355 // case 2:
5356 // ...
5357 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5358 // ...
5359 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5360 // break;
5361 // default:;
5362 // }
5363 //
5364 // if SimpleReduction is true, only the next code is generated:
5365 // ...
5366 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5367 // ...
5368
5369 ASTContext &C = CGM.getContext();
5370
5371 if (SimpleReduction) {
5373 const auto *IPriv = OrgPrivates.begin();
5374 const auto *ILHS = OrgLHSExprs.begin();
5375 const auto *IRHS = OrgRHSExprs.begin();
5376 for (const Expr *E : OrgReductionOps) {
5377 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5378 cast<DeclRefExpr>(*IRHS));
5379 ++IPriv;
5380 ++ILHS;
5381 ++IRHS;
5382 }
5383 return;
5384 }
5385
5386 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5387 // Only keep entries where the corresponding variable is not private.
5388 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5389 FilteredRHSExprs, FilteredReductionOps;
5390 for (unsigned I : llvm::seq<unsigned>(
5391 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5392 if (!Options.IsPrivateVarReduction[I]) {
5393 FilteredPrivates.emplace_back(OrgPrivates[I]);
5394 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5395 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5396 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5397 }
5398 }
5399 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5400 // processing.
5401 ArrayRef<const Expr *> Privates = FilteredPrivates;
5402 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5403 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5404 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5405
5406 // 1. Build a list of reduction variables.
5407 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5408 auto Size = RHSExprs.size();
5409 for (const Expr *E : Privates) {
5410 if (E->getType()->isVariablyModifiedType())
5411 // Reserve place for array size.
5412 ++Size;
5413 }
5414 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5415 QualType ReductionArrayTy = C.getConstantArrayType(
5416 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5417 /*IndexTypeQuals=*/0);
5418 RawAddress ReductionList =
5419 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5420 const auto *IPriv = Privates.begin();
5421 unsigned Idx = 0;
5422 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5423 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5424 CGF.Builder.CreateStore(
5426 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5427 Elem);
5428 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5429 // Store array size.
5430 ++Idx;
5431 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5432 llvm::Value *Size = CGF.Builder.CreateIntCast(
5433 CGF.getVLASize(
5434 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5435 .NumElts,
5436 CGF.SizeTy, /*isSigned=*/false);
5437 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5438 Elem);
5439 }
5440 }
5441
5442 // 2. Emit reduce_func().
5443 llvm::Function *ReductionFn = emitReductionFunction(
5444 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5445 Privates, LHSExprs, RHSExprs, ReductionOps);
5446
5447 // 3. Create static kmp_critical_name lock = { 0 };
5448 std::string Name = getName({"reduction"});
5449 llvm::Value *Lock = getCriticalRegionLock(Name);
5450
5451 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5452 // RedList, reduce_func, &<lock>);
5453 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5454 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5455 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5456 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457 ReductionList.getPointer(), CGF.VoidPtrTy);
5458 llvm::Value *Args[] = {
5459 IdentTLoc, // ident_t *<loc>
5460 ThreadId, // i32 <gtid>
5461 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5462 ReductionArrayTySize, // size_type sizeof(RedList)
5463 RL, // void *RedList
5464 ReductionFn, // void (*) (void *, void *) <reduce_func>
5465 Lock // kmp_critical_name *&<lock>
5466 };
5467 llvm::Value *Res = CGF.EmitRuntimeCall(
5468 OMPBuilder.getOrCreateRuntimeFunction(
5469 CGM.getModule(),
5470 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5471 Args);
5472
5473 // 5. Build switch(res)
5474 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5475 llvm::SwitchInst *SwInst =
5476 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5477
5478 // 6. Build case 1:
5479 // ...
5480 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5481 // ...
5482 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5483 // break;
5484 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5485 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5486 CGF.EmitBlock(Case1BB);
5487
5488 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5489 llvm::Value *EndArgs[] = {
5490 IdentTLoc, // ident_t *<loc>
5491 ThreadId, // i32 <gtid>
5492 Lock // kmp_critical_name *&<lock>
5493 };
5494 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5495 CodeGenFunction &CGF, PrePostActionTy &Action) {
5497 const auto *IPriv = Privates.begin();
5498 const auto *ILHS = LHSExprs.begin();
5499 const auto *IRHS = RHSExprs.begin();
5500 for (const Expr *E : ReductionOps) {
5501 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5502 cast<DeclRefExpr>(*IRHS));
5503 ++IPriv;
5504 ++ILHS;
5505 ++IRHS;
5506 }
5507 };
5509 CommonActionTy Action(
5510 nullptr, {},
5511 OMPBuilder.getOrCreateRuntimeFunction(
5512 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5513 : OMPRTL___kmpc_end_reduce),
5514 EndArgs);
5515 RCG.setAction(Action);
5516 RCG(CGF);
5517
5518 CGF.EmitBranch(DefaultBB);
5519
5520 // 7. Build case 2:
5521 // ...
5522 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5523 // ...
5524 // break;
5525 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5526 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5527 CGF.EmitBlock(Case2BB);
5528
5529 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5530 CodeGenFunction &CGF, PrePostActionTy &Action) {
5531 const auto *ILHS = LHSExprs.begin();
5532 const auto *IRHS = RHSExprs.begin();
5533 const auto *IPriv = Privates.begin();
5534 for (const Expr *E : ReductionOps) {
5535 const Expr *XExpr = nullptr;
5536 const Expr *EExpr = nullptr;
5537 const Expr *UpExpr = nullptr;
5538 BinaryOperatorKind BO = BO_Comma;
5539 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5540 if (BO->getOpcode() == BO_Assign) {
5541 XExpr = BO->getLHS();
5542 UpExpr = BO->getRHS();
5543 }
5544 }
5545 // Try to emit update expression as a simple atomic.
5546 const Expr *RHSExpr = UpExpr;
5547 if (RHSExpr) {
5548 // Analyze RHS part of the whole expression.
5549 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5550 RHSExpr->IgnoreParenImpCasts())) {
5551 // If this is a conditional operator, analyze its condition for
5552 // min/max reduction operator.
5553 RHSExpr = ACO->getCond();
5554 }
5555 if (const auto *BORHS =
5556 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5557 EExpr = BORHS->getRHS();
5558 BO = BORHS->getOpcode();
5559 }
5560 }
5561 if (XExpr) {
5562 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5563 auto &&AtomicRedGen = [BO, VD,
5564 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5565 const Expr *EExpr, const Expr *UpExpr) {
5566 LValue X = CGF.EmitLValue(XExpr);
5567 RValue E;
5568 if (EExpr)
5569 E = CGF.EmitAnyExpr(EExpr);
5570 CGF.EmitOMPAtomicSimpleUpdateExpr(
5571 X, E, BO, /*IsXLHSInRHSPart=*/true,
5572 llvm::AtomicOrdering::Monotonic, Loc,
5573 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5574 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5575 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5576 CGF.emitOMPSimpleStore(
5577 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5578 VD->getType().getNonReferenceType(), Loc);
5579 PrivateScope.addPrivate(VD, LHSTemp);
5580 (void)PrivateScope.Privatize();
5581 return CGF.EmitAnyExpr(UpExpr);
5582 });
5583 };
5584 if ((*IPriv)->getType()->isArrayType()) {
5585 // Emit atomic reduction for array section.
5586 const auto *RHSVar =
5587 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5588 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5589 AtomicRedGen, XExpr, EExpr, UpExpr);
5590 } else {
5591 // Emit atomic reduction for array subscript or single variable.
5592 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5593 }
5594 } else {
5595 // Emit as a critical region.
5596 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5597 const Expr *, const Expr *) {
5598 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5599 std::string Name = RT.getName({"atomic_reduction"});
5601 CGF, Name,
5602 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5603 Action.Enter(CGF);
5604 emitReductionCombiner(CGF, E);
5605 },
5606 Loc);
5607 };
5608 if ((*IPriv)->getType()->isArrayType()) {
5609 const auto *LHSVar =
5610 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5611 const auto *RHSVar =
5612 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5613 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5614 CritRedGen);
5615 } else {
5616 CritRedGen(CGF, nullptr, nullptr, nullptr);
5617 }
5618 }
5619 ++ILHS;
5620 ++IRHS;
5621 ++IPriv;
5622 }
5623 };
5624 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5625 if (!WithNowait) {
5626 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5627 llvm::Value *EndArgs[] = {
5628 IdentTLoc, // ident_t *<loc>
5629 ThreadId, // i32 <gtid>
5630 Lock // kmp_critical_name *&<lock>
5631 };
5632 CommonActionTy Action(nullptr, {},
5633 OMPBuilder.getOrCreateRuntimeFunction(
5634 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5635 EndArgs);
5636 AtomicRCG.setAction(Action);
5637 AtomicRCG(CGF);
5638 } else {
5639 AtomicRCG(CGF);
5640 }
5641
5642 CGF.EmitBranch(DefaultBB);
5643 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5644 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5645 "PrivateVarReduction: Privates size mismatch");
5646 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5647 "PrivateVarReduction: ReductionOps size mismatch");
5648 for (unsigned I : llvm::seq<unsigned>(
5649 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5650 if (Options.IsPrivateVarReduction[I])
5651 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5652 OrgRHSExprs[I], OrgReductionOps[I]);
5653 }
5654}
5655
5656/// Generates unique name for artificial threadprivate variables.
5657/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5658static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5659 const Expr *Ref) {
5660 SmallString<256> Buffer;
5661 llvm::raw_svector_ostream Out(Buffer);
5662 const clang::DeclRefExpr *DE;
5663 const VarDecl *D = ::getBaseDecl(Ref, DE);
5664 if (!D)
5665 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5666 D = D->getCanonicalDecl();
5667 std::string Name = CGM.getOpenMPRuntime().getName(
5668 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5669 Out << Prefix << Name << "_"
5671 return std::string(Out.str());
5672}
5673
5674/// Emits reduction initializer function:
5675/// \code
5676/// void @.red_init(void* %arg, void* %orig) {
5677/// %0 = bitcast void* %arg to <type>*
5678/// store <type> <init>, <type>* %0
5679/// ret void
5680/// }
5681/// \endcode
5682static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5683 SourceLocation Loc,
5684 ReductionCodeGen &RCG, unsigned N) {
5685 ASTContext &C = CGM.getContext();
5686 QualType VoidPtrTy = C.VoidPtrTy;
5687 VoidPtrTy.addRestrict();
5688 FunctionArgList Args;
5689 auto *Param =
5690 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5691 VoidPtrTy, ImplicitParamKind::Other);
5692 auto *ParamOrig =
5693 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5694 VoidPtrTy, ImplicitParamKind::Other);
5695 Args.emplace_back(Param);
5696 Args.emplace_back(ParamOrig);
5697 const auto &FnInfo =
5698 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5699 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5700 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5701 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5702 Name, &CGM.getModule());
5703 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5704 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5705 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5706 Fn->setDoesNotRecurse();
5707 CodeGenFunction CGF(CGM);
5708 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5709 QualType PrivateType = RCG.getPrivateType(N);
5710 Address PrivateAddr = CGF.EmitLoadOfPointer(
5711 CGF.GetAddrOfLocalVar(Param).withElementType(CGF.Builder.getPtrTy(0)),
5712 C.getPointerType(PrivateType)->castAs<PointerType>());
5713 llvm::Value *Size = nullptr;
5714 // If the size of the reduction item is non-constant, load it from global
5715 // threadprivate variable.
5716 if (RCG.getSizes(N).second) {
5718 CGF, CGM.getContext().getSizeType(),
5719 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5720 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5721 CGM.getContext().getSizeType(), Loc);
5722 }
5723 RCG.emitAggregateType(CGF, N, Size);
5724 Address OrigAddr = Address::invalid();
5725 // If initializer uses initializer from declare reduction construct, emit a
5726 // pointer to the address of the original reduction item (reuired by reduction
5727 // initializer)
5728 if (RCG.usesReductionInitializer(N)) {
5729 Address SharedAddr = CGF.GetAddrOfLocalVar(ParamOrig);
5730 OrigAddr = CGF.EmitLoadOfPointer(
5731 SharedAddr,
5732 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5733 }
5734 // Emit the initializer:
5735 // %0 = bitcast void* %arg to <type>*
5736 // store <type> <init>, <type>* %0
5737 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5738 [](CodeGenFunction &) { return false; });
5739 CGF.FinishFunction();
5740 return Fn;
5741}
5742
5743/// Emits reduction combiner function:
5744/// \code
5745/// void @.red_comb(void* %arg0, void* %arg1) {
5746/// %lhs = bitcast void* %arg0 to <type>*
5747/// %rhs = bitcast void* %arg1 to <type>*
5748/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5749/// store <type> %2, <type>* %lhs
5750/// ret void
5751/// }
5752/// \endcode
5753static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5754 SourceLocation Loc,
5755 ReductionCodeGen &RCG, unsigned N,
5756 const Expr *ReductionOp,
5757 const Expr *LHS, const Expr *RHS,
5758 const Expr *PrivateRef) {
5759 ASTContext &C = CGM.getContext();
5760 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5761 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5762 FunctionArgList Args;
5763 auto *ParamInOut =
5764 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5765 C.VoidPtrTy, ImplicitParamKind::Other);
5766 auto *ParamIn =
5767 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5768 C.VoidPtrTy, ImplicitParamKind::Other);
5769 Args.emplace_back(ParamInOut);
5770 Args.emplace_back(ParamIn);
5771 const auto &FnInfo =
5772 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5773 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5774 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5775 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5776 Name, &CGM.getModule());
5777 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5778 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5779 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5780 Fn->setDoesNotRecurse();
5781 CodeGenFunction CGF(CGM);
5782 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5783 llvm::Value *Size = nullptr;
5784 // If the size of the reduction item is non-constant, load it from global
5785 // threadprivate variable.
5786 if (RCG.getSizes(N).second) {
5788 CGF, CGM.getContext().getSizeType(),
5789 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5790 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5791 CGM.getContext().getSizeType(), Loc);
5792 }
5793 RCG.emitAggregateType(CGF, N, Size);
5794 // Remap lhs and rhs variables to the addresses of the function arguments.
5795 // %lhs = bitcast void* %arg0 to <type>*
5796 // %rhs = bitcast void* %arg1 to <type>*
5797 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5798 PrivateScope.addPrivate(
5799 LHSVD,
5800 // Pull out the pointer to the variable.
5802 CGF.GetAddrOfLocalVar(ParamInOut)
5803 .withElementType(CGF.Builder.getPtrTy(0)),
5804 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5805 PrivateScope.addPrivate(
5806 RHSVD,
5807 // Pull out the pointer to the variable.
5810 CGF.Builder.getPtrTy(0)),
5811 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5812 PrivateScope.Privatize();
5813 // Emit the combiner body:
5814 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5815 // store <type> %2, <type>* %lhs
5817 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5818 cast<DeclRefExpr>(RHS));
5819 CGF.FinishFunction();
5820 return Fn;
5821}
5822
5823/// Emits reduction finalizer function:
5824/// \code
5825/// void @.red_fini(void* %arg) {
5826/// %0 = bitcast void* %arg to <type>*
5827/// <destroy>(<type>* %0)
5828/// ret void
5829/// }
5830/// \endcode
5831static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5832 SourceLocation Loc,
5833 ReductionCodeGen &RCG, unsigned N) {
5834 if (!RCG.needCleanups(N))
5835 return nullptr;
5836 ASTContext &C = CGM.getContext();
5837 FunctionArgList Args;
5838 auto *Param =
5839 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5840 C.VoidPtrTy, ImplicitParamKind::Other);
5841 Args.emplace_back(Param);
5842 const auto &FnInfo =
5843 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5844 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5845 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5846 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5847 Name, &CGM.getModule());
5848 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5849 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5850 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5851 Fn->setDoesNotRecurse();
5852 CodeGenFunction CGF(CGM);
5853 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5854 Address PrivateAddr = CGF.EmitLoadOfPointer(
5855 CGF.GetAddrOfLocalVar(Param), C.VoidPtrTy.castAs<PointerType>());
5856 llvm::Value *Size = nullptr;
5857 // If the size of the reduction item is non-constant, load it from global
5858 // threadprivate variable.
5859 if (RCG.getSizes(N).second) {
5861 CGF, CGM.getContext().getSizeType(),
5862 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5863 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5864 CGM.getContext().getSizeType(), Loc);
5865 }
5866 RCG.emitAggregateType(CGF, N, Size);
5867 // Emit the finalizer body:
5868 // <destroy>(<type>* %0)
5869 RCG.emitCleanups(CGF, N, PrivateAddr);
5870 CGF.FinishFunction(Loc);
5871 return Fn;
5872}
5873
5876 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5877 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5878 return nullptr;
5879
5880 // Build typedef struct:
5881 // kmp_taskred_input {
5882 // void *reduce_shar; // shared reduction item
5883 // void *reduce_orig; // original reduction item used for initialization
5884 // size_t reduce_size; // size of data item
5885 // void *reduce_init; // data initialization routine
5886 // void *reduce_fini; // data finalization routine
5887 // void *reduce_comb; // data combiner routine
5888 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5889 // } kmp_taskred_input_t;
5890 ASTContext &C = CGM.getContext();
5891 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5892 RD->startDefinition();
5893 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5894 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5895 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5896 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5897 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5898 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5899 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5900 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5901 RD->completeDefinition();
5902 CanQualType RDType = C.getCanonicalTagType(RD);
5903 unsigned Size = Data.ReductionVars.size();
5904 llvm::APInt ArraySize(/*numBits=*/64, Size);
5905 QualType ArrayRDType =
5906 C.getConstantArrayType(RDType, ArraySize, nullptr,
5907 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5908 // kmp_task_red_input_t .rd_input.[Size];
5909 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5910 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5911 Data.ReductionCopies, Data.ReductionOps);
5912 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5913 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5914 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5915 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5916 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5917 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5918 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5919 ".rd_input.gep.");
5920 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5921 // ElemLVal.reduce_shar = &Shareds[Cnt];
5922 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5923 RCG.emitSharedOrigLValue(CGF, Cnt);
5924 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5925 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5926 // ElemLVal.reduce_orig = &Origs[Cnt];
5927 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5928 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5929 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5930 RCG.emitAggregateType(CGF, Cnt);
5931 llvm::Value *SizeValInChars;
5932 llvm::Value *SizeVal;
5933 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5934 // We use delayed creation/initialization for VLAs and array sections. It is
5935 // required because runtime does not provide the way to pass the sizes of
5936 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5937 // threadprivate global variables are used to store these values and use
5938 // them in the functions.
5939 bool DelayedCreation = !!SizeVal;
5940 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5941 /*isSigned=*/false);
5942 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5943 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5944 // ElemLVal.reduce_init = init;
5945 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5946 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5947 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5948 // ElemLVal.reduce_fini = fini;
5949 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5950 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5951 llvm::Value *FiniAddr =
5952 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5953 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5954 // ElemLVal.reduce_comb = comb;
5955 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5956 llvm::Value *CombAddr = emitReduceCombFunction(
5957 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5958 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5959 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5960 // ElemLVal.flags = 0;
5961 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5962 if (DelayedCreation) {
5964 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5965 FlagsLVal);
5966 } else
5967 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5968 }
5969 if (Data.IsReductionWithTaskMod) {
5970 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5971 // is_ws, int num, void *data);
5972 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5973 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5974 CGM.IntTy, /*isSigned=*/true);
5975 llvm::Value *Args[] = {
5976 IdentTLoc, GTid,
5977 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5978 /*isSigned=*/true),
5979 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5981 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5982 return CGF.EmitRuntimeCall(
5983 OMPBuilder.getOrCreateRuntimeFunction(
5984 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5985 Args);
5986 }
5987 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5988 llvm::Value *Args[] = {
5989 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5990 /*isSigned=*/true),
5991 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5993 CGM.VoidPtrTy)};
5994 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5995 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5996 Args);
5997}
5998
6000 SourceLocation Loc,
6001 bool IsWorksharingReduction) {
6002 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6003 // is_ws, int num, void *data);
6004 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6005 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6006 CGM.IntTy, /*isSigned=*/true);
6007 llvm::Value *Args[] = {IdentTLoc, GTid,
6008 llvm::ConstantInt::get(CGM.IntTy,
6009 IsWorksharingReduction ? 1 : 0,
6010 /*isSigned=*/true)};
6011 (void)CGF.EmitRuntimeCall(
6012 OMPBuilder.getOrCreateRuntimeFunction(
6013 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6014 Args);
6015}
6016
6018 SourceLocation Loc,
6019 ReductionCodeGen &RCG,
6020 unsigned N) {
6021 auto Sizes = RCG.getSizes(N);
6022 // Emit threadprivate global variable if the type is non-constant
6023 // (Sizes.second = nullptr).
6024 if (Sizes.second) {
6025 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6026 /*isSigned=*/false);
6028 CGF, CGM.getContext().getSizeType(),
6029 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6030 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6031 }
6032}
6033
6035 SourceLocation Loc,
6036 llvm::Value *ReductionsPtr,
6037 LValue SharedLVal) {
6038 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6039 // *d);
6040 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6041 CGM.IntTy,
6042 /*isSigned=*/true),
6043 ReductionsPtr,
6045 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6046 return Address(
6047 CGF.EmitRuntimeCall(
6048 OMPBuilder.getOrCreateRuntimeFunction(
6049 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6050 Args),
6051 CGF.Int8Ty, SharedLVal.getAlignment());
6052}
6053
6055 const OMPTaskDataTy &Data) {
6056 if (!CGF.HaveInsertPoint())
6057 return;
6058
6059 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6060 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6061 OMPBuilder.createTaskwait(CGF.Builder);
6062 } else {
6063 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6064 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6065 auto &M = CGM.getModule();
6066 Address DependenciesArray = Address::invalid();
6067 llvm::Value *NumOfElements;
6068 std::tie(NumOfElements, DependenciesArray) =
6069 emitDependClause(CGF, Data.Dependences, Loc);
6070 if (!Data.Dependences.empty()) {
6071 llvm::Value *DepWaitTaskArgs[7];
6072 DepWaitTaskArgs[0] = UpLoc;
6073 DepWaitTaskArgs[1] = ThreadID;
6074 DepWaitTaskArgs[2] = NumOfElements;
6075 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6076 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6077 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6078 DepWaitTaskArgs[6] =
6079 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
6080
6081 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6082
6083 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6084 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6085 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6086 // kmp_int32 has_no_wait); if dependence info is specified.
6087 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6088 M, OMPRTL___kmpc_omp_taskwait_deps_51),
6089 DepWaitTaskArgs);
6090
6091 } else {
6092
6093 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6094 // global_tid);
6095 llvm::Value *Args[] = {UpLoc, ThreadID};
6096 // Ignore return result until untied tasks are supported.
6097 CGF.EmitRuntimeCall(
6098 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6099 Args);
6100 }
6101 }
6102
6103 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6104 Region->emitUntiedSwitch(CGF);
6105}
6106
6108 OpenMPDirectiveKind InnerKind,
6109 const RegionCodeGenTy &CodeGen,
6110 bool HasCancel) {
6111 if (!CGF.HaveInsertPoint())
6112 return;
6113 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6114 InnerKind != OMPD_critical &&
6115 InnerKind != OMPD_master &&
6116 InnerKind != OMPD_masked);
6117 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6118}
6119
6120namespace {
6121enum RTCancelKind {
6122 CancelNoreq = 0,
6123 CancelParallel = 1,
6124 CancelLoop = 2,
6125 CancelSections = 3,
6126 CancelTaskgroup = 4
6127};
6128} // anonymous namespace
6129
6130static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6131 RTCancelKind CancelKind = CancelNoreq;
6132 if (CancelRegion == OMPD_parallel)
6133 CancelKind = CancelParallel;
6134 else if (CancelRegion == OMPD_for)
6135 CancelKind = CancelLoop;
6136 else if (CancelRegion == OMPD_sections)
6137 CancelKind = CancelSections;
6138 else {
6139 assert(CancelRegion == OMPD_taskgroup);
6140 CancelKind = CancelTaskgroup;
6141 }
6142 return CancelKind;
6143}
6144
6147 OpenMPDirectiveKind CancelRegion) {
6148 if (!CGF.HaveInsertPoint())
6149 return;
6150 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6151 // global_tid, kmp_int32 cncl_kind);
6152 if (auto *OMPRegionInfo =
6153 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6154 // For 'cancellation point taskgroup', the task region info may not have a
6155 // cancel. This may instead happen in another adjacent task.
6156 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6157 llvm::Value *Args[] = {
6158 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6159 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6160 // Ignore return result until untied tasks are supported.
6161 llvm::Value *Result = CGF.EmitRuntimeCall(
6162 OMPBuilder.getOrCreateRuntimeFunction(
6163 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6164 Args);
6165 // if (__kmpc_cancellationpoint()) {
6166 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6167 // exit from construct;
6168 // }
6169 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6170 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6171 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6172 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6173 CGF.EmitBlock(ExitBB);
6174 if (CancelRegion == OMPD_parallel)
6175 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6176 // exit from construct;
6177 CodeGenFunction::JumpDest CancelDest =
6178 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6179 CGF.EmitBranchThroughCleanup(CancelDest);
6180 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6181 }
6182 }
6183}
6184
6186 const Expr *IfCond,
6187 OpenMPDirectiveKind CancelRegion) {
6188 if (!CGF.HaveInsertPoint())
6189 return;
6190 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6191 // kmp_int32 cncl_kind);
6192 auto &M = CGM.getModule();
6193 if (auto *OMPRegionInfo =
6194 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6195 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6196 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6197 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6198 llvm::Value *Args[] = {
6199 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6200 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6201 // Ignore return result until untied tasks are supported.
6202 llvm::Value *Result = CGF.EmitRuntimeCall(
6203 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6204 // if (__kmpc_cancel()) {
6205 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6206 // exit from construct;
6207 // }
6208 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6209 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6210 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6211 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6212 CGF.EmitBlock(ExitBB);
6213 if (CancelRegion == OMPD_parallel)
6214 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6215 // exit from construct;
6216 CodeGenFunction::JumpDest CancelDest =
6217 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6218 CGF.EmitBranchThroughCleanup(CancelDest);
6219 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6220 };
6221 if (IfCond) {
6222 emitIfClause(CGF, IfCond, ThenGen,
6223 [](CodeGenFunction &, PrePostActionTy &) {});
6224 } else {
6225 RegionCodeGenTy ThenRCG(ThenGen);
6226 ThenRCG(CGF);
6227 }
6228 }
6229}
6230
6231namespace {
6232/// Cleanup action for uses_allocators support.
6233class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6235
6236public:
6237 OMPUsesAllocatorsActionTy(
6238 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6239 : Allocators(Allocators) {}
6240 void Enter(CodeGenFunction &CGF) override {
6241 if (!CGF.HaveInsertPoint())
6242 return;
6243 for (const auto &AllocatorData : Allocators) {
6245 CGF, AllocatorData.first, AllocatorData.second);
6246 }
6247 }
6248 void Exit(CodeGenFunction &CGF) override {
6249 if (!CGF.HaveInsertPoint())
6250 return;
6251 for (const auto &AllocatorData : Allocators) {
6253 AllocatorData.first);
6254 }
6255 }
6256};
6257} // namespace
6258
6260 const OMPExecutableDirective &D, StringRef ParentName,
6261 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6262 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6263 assert(!ParentName.empty() && "Invalid target entry parent name!");
6266 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6267 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6268 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6269 if (!D.AllocatorTraits)
6270 continue;
6271 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6272 }
6273 }
6274 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6275 CodeGen.setAction(UsesAllocatorAction);
6276 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6277 IsOffloadEntry, CodeGen);
6278}
6279
6281 const Expr *Allocator,
6282 const Expr *AllocatorTraits) {
6283 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6284 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6285 // Use default memspace handle.
6286 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6287 llvm::Value *NumTraits = llvm::ConstantInt::get(
6289 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6290 ->getSize()
6291 .getLimitedValue());
6292 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6294 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6295 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6296 AllocatorTraitsLVal.getBaseInfo(),
6297 AllocatorTraitsLVal.getTBAAInfo());
6298 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6299
6300 llvm::Value *AllocatorVal =
6301 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6302 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6303 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6304 // Store to allocator.
6306 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6307 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6308 AllocatorVal =
6309 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6310 Allocator->getType(), Allocator->getExprLoc());
6311 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6312}
6313
6315 const Expr *Allocator) {
6316 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6317 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6318 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6319 llvm::Value *AllocatorVal =
6320 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6321 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6322 CGF.getContext().VoidPtrTy,
6323 Allocator->getExprLoc());
6324 (void)CGF.EmitRuntimeCall(
6325 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6326 OMPRTL___kmpc_destroy_allocator),
6327 {ThreadId, AllocatorVal});
6328}
6329
6332 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6333 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6334 "invalid default attrs structure");
6335 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6336 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6337
6338 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6339 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6340 /*UpperBoundOnly=*/true);
6341
6342 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6343 for (auto *A : C->getAttrs()) {
6344 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6345 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6346 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6347 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6348 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6349 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6350 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6351 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6352 &AttrMaxThreadsVal);
6353 else
6354 continue;
6355
6356 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6357 if (AttrMaxThreadsVal > 0)
6358 MaxThreadsVal = MaxThreadsVal > 0
6359 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6360 : AttrMaxThreadsVal;
6361 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6362 if (AttrMaxBlocksVal > 0)
6363 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6364 : AttrMaxBlocksVal;
6365 }
6366 }
6367}
6368
6370 const OMPExecutableDirective &D, StringRef ParentName,
6371 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6372 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6373
6374 llvm::TargetRegionEntryInfo EntryInfo =
6375 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6376
6377 CodeGenFunction CGF(CGM, true);
6378 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6379 [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
6380 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6381
6382 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6383 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6384 if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
6386 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6387 };
6388
6389 cantFail(OMPBuilder.emitTargetRegionFunction(
6390 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6391 OutlinedFnID));
6392
6393 if (!OutlinedFn)
6394 return;
6395
6396 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6397
6398 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6399 for (auto *A : C->getAttrs()) {
6400 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6401 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6402 }
6403 }
6404 registerVTable(D);
6405}
6406
6407/// Checks if the expression is constant or does not have non-trivial function
6408/// calls.
6409static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6410 // We can skip constant expressions.
6411 // We can skip expressions with trivial calls or simple expressions.
6413 !E->hasNonTrivialCall(Ctx)) &&
6414 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6415}
6416
6418 const Stmt *Body) {
6419 const Stmt *Child = Body->IgnoreContainers();
6420 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6421 Child = nullptr;
6422 for (const Stmt *S : C->body()) {
6423 if (const auto *E = dyn_cast<Expr>(S)) {
6424 if (isTrivial(Ctx, E))
6425 continue;
6426 }
6427 // Some of the statements can be ignored.
6430 continue;
6431 // Analyze declarations.
6432 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6433 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6434 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6435 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6436 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6437 isa<UsingDirectiveDecl>(D) ||
6438 isa<OMPDeclareReductionDecl>(D) ||
6439 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6440 return true;
6441 const auto *VD = dyn_cast<VarDecl>(D);
6442 if (!VD)
6443 return false;
6444 return VD->hasGlobalStorage() || !VD->isUsed();
6445 }))
6446 continue;
6447 }
6448 // Found multiple children - cannot get the one child only.
6449 if (Child)
6450 return nullptr;
6451 Child = S;
6452 }
6453 if (Child)
6454 Child = Child->IgnoreContainers();
6455 }
6456 return Child;
6457}
6458
6460 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6461 int32_t &MaxTeamsVal) {
6462
6463 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6464 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6465 "Expected target-based executable directive.");
6466 switch (DirectiveKind) {
6467 case OMPD_target: {
6468 const auto *CS = D.getInnermostCapturedStmt();
6469 const auto *Body =
6470 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6471 const Stmt *ChildStmt =
6473 if (const auto *NestedDir =
6474 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6475 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6476 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6477 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6478 ->getNumTeams()
6479 .front();
6480 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6481 if (auto Constant =
6482 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6483 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6484 return NumTeams;
6485 }
6486 MinTeamsVal = MaxTeamsVal = 0;
6487 return nullptr;
6488 }
6489 MinTeamsVal = MaxTeamsVal = 1;
6490 return nullptr;
6491 }
6492 // A value of -1 is used to check if we need to emit no teams region
6493 MinTeamsVal = MaxTeamsVal = -1;
6494 return nullptr;
6495 }
6496 case OMPD_target_teams_loop:
6497 case OMPD_target_teams:
6498 case OMPD_target_teams_distribute:
6499 case OMPD_target_teams_distribute_simd:
6500 case OMPD_target_teams_distribute_parallel_for:
6501 case OMPD_target_teams_distribute_parallel_for_simd: {
6502 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6503 const Expr *NumTeams =
6504 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6505 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6506 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6507 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6508 return NumTeams;
6509 }
6510 MinTeamsVal = MaxTeamsVal = 0;
6511 return nullptr;
6512 }
6513 case OMPD_target_parallel:
6514 case OMPD_target_parallel_for:
6515 case OMPD_target_parallel_for_simd:
6516 case OMPD_target_parallel_loop:
6517 case OMPD_target_simd:
6518 MinTeamsVal = MaxTeamsVal = 1;
6519 return nullptr;
6520 case OMPD_parallel:
6521 case OMPD_for:
6522 case OMPD_parallel_for:
6523 case OMPD_parallel_loop:
6524 case OMPD_parallel_master:
6525 case OMPD_parallel_sections:
6526 case OMPD_for_simd:
6527 case OMPD_parallel_for_simd:
6528 case OMPD_cancel:
6529 case OMPD_cancellation_point:
6530 case OMPD_ordered:
6531 case OMPD_threadprivate:
6532 case OMPD_allocate:
6533 case OMPD_task:
6534 case OMPD_simd:
6535 case OMPD_tile:
6536 case OMPD_unroll:
6537 case OMPD_sections:
6538 case OMPD_section:
6539 case OMPD_single:
6540 case OMPD_master:
6541 case OMPD_critical:
6542 case OMPD_taskyield:
6543 case OMPD_barrier:
6544 case OMPD_taskwait:
6545 case OMPD_taskgroup:
6546 case OMPD_atomic:
6547 case OMPD_flush:
6548 case OMPD_depobj:
6549 case OMPD_scan:
6550 case OMPD_teams:
6551 case OMPD_target_data:
6552 case OMPD_target_exit_data:
6553 case OMPD_target_enter_data:
6554 case OMPD_distribute:
6555 case OMPD_distribute_simd:
6556 case OMPD_distribute_parallel_for:
6557 case OMPD_distribute_parallel_for_simd:
6558 case OMPD_teams_distribute:
6559 case OMPD_teams_distribute_simd:
6560 case OMPD_teams_distribute_parallel_for:
6561 case OMPD_teams_distribute_parallel_for_simd:
6562 case OMPD_target_update:
6563 case OMPD_declare_simd:
6564 case OMPD_declare_variant:
6565 case OMPD_begin_declare_variant:
6566 case OMPD_end_declare_variant:
6567 case OMPD_declare_target:
6568 case OMPD_end_declare_target:
6569 case OMPD_declare_reduction:
6570 case OMPD_declare_mapper:
6571 case OMPD_taskloop:
6572 case OMPD_taskloop_simd:
6573 case OMPD_master_taskloop:
6574 case OMPD_master_taskloop_simd:
6575 case OMPD_parallel_master_taskloop:
6576 case OMPD_parallel_master_taskloop_simd:
6577 case OMPD_requires:
6578 case OMPD_metadirective:
6579 case OMPD_unknown:
6580 break;
6581 default:
6582 break;
6583 }
6584 llvm_unreachable("Unexpected directive kind.");
6585}
6586
6588 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6589 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6590 "Clauses associated with the teams directive expected to be emitted "
6591 "only for the host!");
6592 CGBuilderTy &Bld = CGF.Builder;
6593 int32_t MinNT = -1, MaxNT = -1;
6594 const Expr *NumTeams =
6595 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6596 if (NumTeams != nullptr) {
6597 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6598
6599 switch (DirectiveKind) {
6600 case OMPD_target: {
6601 const auto *CS = D.getInnermostCapturedStmt();
6602 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6603 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6604 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6605 /*IgnoreResultAssign*/ true);
6606 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6607 /*isSigned=*/true);
6608 }
6609 case OMPD_target_teams:
6610 case OMPD_target_teams_distribute:
6611 case OMPD_target_teams_distribute_simd:
6612 case OMPD_target_teams_distribute_parallel_for:
6613 case OMPD_target_teams_distribute_parallel_for_simd: {
6614 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6615 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6616 /*IgnoreResultAssign*/ true);
6617 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6618 /*isSigned=*/true);
6619 }
6620 default:
6621 break;
6622 }
6623 }
6624
6625 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6626 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6627}
6628
6629/// Check for a num threads constant value (stored in \p DefaultVal), or
6630/// expression (stored in \p E). If the value is conditional (via an if-clause),
6631/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6632/// nullptr, no expression evaluation is perfomed.
6633static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6634 const Expr **E, int32_t &UpperBound,
6635 bool UpperBoundOnly, llvm::Value **CondVal) {
6637 CGF.getContext(), CS->getCapturedStmt());
6638 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6639 if (!Dir)
6640 return;
6641
6642 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6643 // Handle if clause. If if clause present, the number of threads is
6644 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6645 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6646 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6647 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6648 const OMPIfClause *IfClause = nullptr;
6649 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6650 if (C->getNameModifier() == OMPD_unknown ||
6651 C->getNameModifier() == OMPD_parallel) {
6652 IfClause = C;
6653 break;
6654 }
6655 }
6656 if (IfClause) {
6657 const Expr *CondExpr = IfClause->getCondition();
6658 bool Result;
6659 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6660 if (!Result) {
6661 UpperBound = 1;
6662 return;
6663 }
6664 } else {
6666 if (const auto *PreInit =
6667 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6668 for (const auto *I : PreInit->decls()) {
6669 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6670 CGF.EmitVarDecl(cast<VarDecl>(*I));
6671 } else {
6674 CGF.EmitAutoVarCleanups(Emission);
6675 }
6676 }
6677 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6678 }
6679 }
6680 }
6681 }
6682 // Check the value of num_threads clause iff if clause was not specified
6683 // or is not evaluated to false.
6684 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6685 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6686 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6687 const auto *NumThreadsClause =
6688 Dir->getSingleClause<OMPNumThreadsClause>();
6689 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6690 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6691 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6692 UpperBound =
6693 UpperBound
6694 ? Constant->getZExtValue()
6695 : std::min(UpperBound,
6696 static_cast<int32_t>(Constant->getZExtValue()));
6697 // If we haven't found a upper bound, remember we saw a thread limiting
6698 // clause.
6699 if (UpperBound == -1)
6700 UpperBound = 0;
6701 if (!E)
6702 return;
6703 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6704 if (const auto *PreInit =
6705 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6706 for (const auto *I : PreInit->decls()) {
6707 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6708 CGF.EmitVarDecl(cast<VarDecl>(*I));
6709 } else {
6712 CGF.EmitAutoVarCleanups(Emission);
6713 }
6714 }
6715 }
6716 *E = NTExpr;
6717 }
6718 return;
6719 }
6720 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6721 UpperBound = 1;
6722}
6723
6725 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6726 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6727 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6728 "Clauses associated with the teams directive expected to be emitted "
6729 "only for the host!");
6730 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6731 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6732 "Expected target-based executable directive.");
6733
6734 const Expr *NT = nullptr;
6735 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6736
6737 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6738 if (E->isIntegerConstantExpr(CGF.getContext())) {
6739 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6740 UpperBound = UpperBound ? Constant->getZExtValue()
6741 : std::min(UpperBound,
6742 int32_t(Constant->getZExtValue()));
6743 }
6744 // If we haven't found a upper bound, remember we saw a thread limiting
6745 // clause.
6746 if (UpperBound == -1)
6747 UpperBound = 0;
6748 if (EPtr)
6749 *EPtr = E;
6750 };
6751
6752 auto ReturnSequential = [&]() {
6753 UpperBound = 1;
6754 return NT;
6755 };
6756
6757 switch (DirectiveKind) {
6758 case OMPD_target: {
6759 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6760 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6762 CGF.getContext(), CS->getCapturedStmt());
6763 // TODO: The standard is not clear how to resolve two thread limit clauses,
6764 // let's pick the teams one if it's present, otherwise the target one.
6765 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6766 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6767 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6768 ThreadLimitClause = TLC;
6769 if (ThreadLimitExpr) {
6770 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6771 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6773 CGF,
6774 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6775 if (const auto *PreInit =
6776 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6777 for (const auto *I : PreInit->decls()) {
6778 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6779 CGF.EmitVarDecl(cast<VarDecl>(*I));
6780 } else {
6783 CGF.EmitAutoVarCleanups(Emission);
6784 }
6785 }
6786 }
6787 }
6788 }
6789 }
6790 if (ThreadLimitClause)
6791 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6792 ThreadLimitExpr);
6793 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6794 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6795 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6796 CS = Dir->getInnermostCapturedStmt();
6798 CGF.getContext(), CS->getCapturedStmt());
6799 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6800 }
6801 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6802 CS = Dir->getInnermostCapturedStmt();
6803 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6804 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6805 return ReturnSequential();
6806 }
6807 return NT;
6808 }
6809 case OMPD_target_teams: {
6810 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6811 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6812 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6813 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6814 ThreadLimitExpr);
6815 }
6816 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6817 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6819 CGF.getContext(), CS->getCapturedStmt());
6820 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6821 if (Dir->getDirectiveKind() == OMPD_distribute) {
6822 CS = Dir->getInnermostCapturedStmt();
6823 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6824 }
6825 }
6826 return NT;
6827 }
6828 case OMPD_target_teams_distribute:
6829 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6830 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6831 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6832 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6833 ThreadLimitExpr);
6834 }
6835 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6836 UpperBoundOnly, CondVal);
6837 return NT;
6838 case OMPD_target_teams_loop:
6839 case OMPD_target_parallel_loop:
6840 case OMPD_target_parallel:
6841 case OMPD_target_parallel_for:
6842 case OMPD_target_parallel_for_simd:
6843 case OMPD_target_teams_distribute_parallel_for:
6844 case OMPD_target_teams_distribute_parallel_for_simd: {
6845 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6846 const OMPIfClause *IfClause = nullptr;
6847 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6848 if (C->getNameModifier() == OMPD_unknown ||
6849 C->getNameModifier() == OMPD_parallel) {
6850 IfClause = C;
6851 break;
6852 }
6853 }
6854 if (IfClause) {
6855 const Expr *Cond = IfClause->getCondition();
6856 bool Result;
6857 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6858 if (!Result)
6859 return ReturnSequential();
6860 } else {
6862 *CondVal = CGF.EvaluateExprAsBool(Cond);
6863 }
6864 }
6865 }
6866 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6867 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6868 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6869 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6870 ThreadLimitExpr);
6871 }
6872 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6873 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6874 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6875 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6876 return NumThreadsClause->getNumThreads();
6877 }
6878 return NT;
6879 }
6880 case OMPD_target_teams_distribute_simd:
6881 case OMPD_target_simd:
6882 return ReturnSequential();
6883 default:
6884 break;
6885 }
6886 llvm_unreachable("Unsupported directive kind.");
6887}
6888
6890 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6891 llvm::Value *NumThreadsVal = nullptr;
6892 llvm::Value *CondVal = nullptr;
6893 llvm::Value *ThreadLimitVal = nullptr;
6894 const Expr *ThreadLimitExpr = nullptr;
6895 int32_t UpperBound = -1;
6896
6898 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6899 &ThreadLimitExpr);
6900
6901 // Thread limit expressions are used below, emit them.
6902 if (ThreadLimitExpr) {
6903 ThreadLimitVal =
6904 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6905 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6906 /*isSigned=*/false);
6907 }
6908
6909 // Generate the num teams expression.
6910 if (UpperBound == 1) {
6911 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6912 } else if (NT) {
6913 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6914 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6915 /*isSigned=*/false);
6916 } else if (ThreadLimitVal) {
6917 // If we do not have a num threads value but a thread limit, replace the
6918 // former with the latter. We know handled the thread limit expression.
6919 NumThreadsVal = ThreadLimitVal;
6920 ThreadLimitVal = nullptr;
6921 } else {
6922 // Default to "0" which means runtime choice.
6923 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6924 NumThreadsVal = CGF.Builder.getInt32(0);
6925 }
6926
6927 // Handle if clause. If if clause present, the number of threads is
6928 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6929 if (CondVal) {
6931 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6932 CGF.Builder.getInt32(1));
6933 }
6934
6935 // If the thread limit and num teams expression were present, take the
6936 // minimum.
6937 if (ThreadLimitVal) {
6938 NumThreadsVal = CGF.Builder.CreateSelect(
6939 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6940 ThreadLimitVal, NumThreadsVal);
6941 }
6942
6943 return NumThreadsVal;
6944}
6945
6946namespace {
6948
6949// Utility to handle information from clauses associated with a given
6950// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6951// It provides a convenient interface to obtain the information and generate
6952// code for that information.
6953class MappableExprsHandler {
6954public:
6955 /// Custom comparator for attach-pointer expressions that compares them by
6956 /// complexity (i.e. their component-depth) first, then by the order in which
6957 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6958 /// different.
6959 struct AttachPtrExprComparator {
6960 const MappableExprsHandler &Handler;
6961 // Cache of previous equality comparison results.
6962 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6963 CachedEqualityComparisons;
6964
6965 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6966 AttachPtrExprComparator() = delete;
6967
6968 // Return true iff LHS is "less than" RHS.
6969 bool operator()(const Expr *LHS, const Expr *RHS) const {
6970 if (LHS == RHS)
6971 return false;
6972
6973 // First, compare by complexity (depth)
6974 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6975 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6976
6977 std::optional<size_t> DepthLHS =
6978 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6979 : std::nullopt;
6980 std::optional<size_t> DepthRHS =
6981 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6982 : std::nullopt;
6983
6984 // std::nullopt (no attach pointer) has lowest complexity
6985 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6986 // Both have same complexity, now check semantic equality
6987 if (areEqual(LHS, RHS))
6988 return false;
6989 // Different semantically, compare by computation order
6990 return wasComputedBefore(LHS, RHS);
6991 }
6992 if (!DepthLHS.has_value())
6993 return true; // LHS has lower complexity
6994 if (!DepthRHS.has_value())
6995 return false; // RHS has lower complexity
6996
6997 // Both have values, compare by depth (lower depth = lower complexity)
6998 if (DepthLHS.value() != DepthRHS.value())
6999 return DepthLHS.value() < DepthRHS.value();
7000
7001 // Same complexity, now check semantic equality
7002 if (areEqual(LHS, RHS))
7003 return false;
7004 // Different semantically, compare by computation order
7005 return wasComputedBefore(LHS, RHS);
7006 }
7007
7008 public:
7009 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7010 /// results, if available, otherwise does a recursive semantic comparison.
7011 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7012 // Check cache first for faster lookup
7013 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
7014 if (CachedResultIt != CachedEqualityComparisons.end())
7015 return CachedResultIt->second;
7016
7017 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7018
7019 // Cache the result for future lookups (both orders since semantic
7020 // equality is commutative)
7021 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7022 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7023 return ComparisonResult;
7024 }
7025
7026 /// Compare the two attach-ptr expressions by their computation order.
7027 /// Returns true iff LHS was computed before RHS by
7028 /// collectAttachPtrExprInfo().
7029 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7030 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
7031 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
7032
7033 return OrderLHS < OrderRHS;
7034 }
7035
7036 private:
7037 /// Helper function to compare attach-pointer expressions semantically.
7038 /// This function handles various expression types that can be part of an
7039 /// attach-pointer.
7040 /// TODO: Not urgent, but we should ideally return true when comparing
7041 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7042 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7043 if (LHS == RHS)
7044 return true;
7045
7046 // If only one is null, they aren't equal
7047 if (!LHS || !RHS)
7048 return false;
7049
7050 ASTContext &Ctx = Handler.CGF.getContext();
7051 // Strip away parentheses and no-op casts to get to the core expression
7052 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7053 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7054
7055 // Direct pointer comparison of the underlying expressions
7056 if (LHS == RHS)
7057 return true;
7058
7059 // Check if the expression classes match
7060 if (LHS->getStmtClass() != RHS->getStmtClass())
7061 return false;
7062
7063 // Handle DeclRefExpr (variable references)
7064 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
7065 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
7066 if (!RD)
7067 return false;
7068 return LD->getDecl()->getCanonicalDecl() ==
7069 RD->getDecl()->getCanonicalDecl();
7070 }
7071
7072 // Handle ArraySubscriptExpr (array indexing like a[i])
7073 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
7074 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
7075 if (!RA)
7076 return false;
7077 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
7078 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
7079 }
7080
7081 // Handle MemberExpr (member access like s.m or p->m)
7082 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
7083 const auto *RM = dyn_cast<MemberExpr>(RHS);
7084 if (!RM)
7085 return false;
7086 if (LM->getMemberDecl()->getCanonicalDecl() !=
7087 RM->getMemberDecl()->getCanonicalDecl())
7088 return false;
7089 return areSemanticallyEqual(LM->getBase(), RM->getBase());
7090 }
7091
7092 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7093 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
7094 const auto *RU = dyn_cast<UnaryOperator>(RHS);
7095 if (!RU)
7096 return false;
7097 if (LU->getOpcode() != RU->getOpcode())
7098 return false;
7099 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
7100 }
7101
7102 // Handle BinaryOperator (binary operations like p + offset)
7103 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
7104 const auto *RB = dyn_cast<BinaryOperator>(RHS);
7105 if (!RB)
7106 return false;
7107 if (LB->getOpcode() != RB->getOpcode())
7108 return false;
7109 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
7110 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
7111 }
7112
7113 // Handle ArraySectionExpr (array sections like a[0:1])
7114 // Attach pointers should not contain array-sections, but currently we
7115 // don't emit an error.
7116 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
7117 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
7118 if (!RAS)
7119 return false;
7120 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
7121 areSemanticallyEqual(LAS->getLowerBound(),
7122 RAS->getLowerBound()) &&
7123 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
7124 }
7125
7126 // Handle CastExpr (explicit casts)
7127 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
7128 const auto *RC = dyn_cast<CastExpr>(RHS);
7129 if (!RC)
7130 return false;
7131 if (LC->getCastKind() != RC->getCastKind())
7132 return false;
7133 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
7134 }
7135
7136 // Handle CXXThisExpr (this pointer)
7137 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
7138 return true;
7139
7140 // Handle IntegerLiteral (integer constants)
7141 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
7142 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
7143 if (!RI)
7144 return false;
7145 return LI->getValue() == RI->getValue();
7146 }
7147
7148 // Handle CharacterLiteral (character constants)
7149 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7150 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7151 if (!RC)
7152 return false;
7153 return LC->getValue() == RC->getValue();
7154 }
7155
7156 // Handle FloatingLiteral (floating point constants)
7157 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7158 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7159 if (!RF)
7160 return false;
7161 // Use bitwise comparison for floating point literals
7162 return LF->getValue().bitwiseIsEqual(RF->getValue());
7163 }
7164
7165 // Handle StringLiteral (string constants)
7166 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7167 const auto *RS = dyn_cast<StringLiteral>(RHS);
7168 if (!RS)
7169 return false;
7170 return LS->getString() == RS->getString();
7171 }
7172
7173 // Handle CXXNullPtrLiteralExpr (nullptr)
7175 return true;
7176
7177 // Handle CXXBoolLiteralExpr (true/false)
7178 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7179 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7180 if (!RB)
7181 return false;
7182 return LB->getValue() == RB->getValue();
7183 }
7184
7185 // Fallback for other forms - use the existing comparison method
7186 return Expr::isSameComparisonOperand(LHS, RHS);
7187 }
7188 };
7189
7190 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7191 static unsigned getFlagMemberOffset() {
7192 unsigned Offset = 0;
7193 for (uint64_t Remain =
7194 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7195 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7196 !(Remain & 1); Remain = Remain >> 1)
7197 Offset++;
7198 return Offset;
7199 }
7200
7201 /// Class that holds debugging information for a data mapping to be passed to
7202 /// the runtime library.
7203 class MappingExprInfo {
7204 /// The variable declaration used for the data mapping.
7205 const ValueDecl *MapDecl = nullptr;
7206 /// The original expression used in the map clause, or null if there is
7207 /// none.
7208 const Expr *MapExpr = nullptr;
7209
7210 public:
7211 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7212 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7213
7214 const ValueDecl *getMapDecl() const { return MapDecl; }
7215 const Expr *getMapExpr() const { return MapExpr; }
7216 };
7217
7218 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7219 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7220 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7221 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7222 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7223 using MapNonContiguousArrayTy =
7224 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7225 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7226 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7227 using MapData =
7229 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7230 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7231 using MapDataArrayTy = SmallVector<MapData, 4>;
7232
7233 /// This structure contains combined information generated for mappable
7234 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7235 /// mappers, and non-contiguous information.
7236 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7237 MapExprsArrayTy Exprs;
7238 MapValueDeclsArrayTy Mappers;
7239 MapValueDeclsArrayTy DevicePtrDecls;
7240
7241 /// Append arrays in \a CurInfo.
7242 void append(MapCombinedInfoTy &CurInfo) {
7243 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7244 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7245 CurInfo.DevicePtrDecls.end());
7246 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7247 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7248 }
7249 };
7250
7251 /// Map between a struct and the its lowest & highest elements which have been
7252 /// mapped.
7253 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7254 /// HE(FieldIndex, Pointer)}
7255 struct StructRangeInfoTy {
7256 MapCombinedInfoTy PreliminaryMapData;
7257 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7258 0, Address::invalid()};
7259 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7260 0, Address::invalid()};
7263 bool IsArraySection = false;
7264 bool HasCompleteRecord = false;
7265 };
7266
7267 /// A struct to store the attach pointer and pointee information, to be used
7268 /// when emitting an attach entry.
7269 struct AttachInfoTy {
7270 Address AttachPtrAddr = Address::invalid();
7271 Address AttachPteeAddr = Address::invalid();
7272 const ValueDecl *AttachPtrDecl = nullptr;
7273 const Expr *AttachMapExpr = nullptr;
7274
7275 bool isValid() const {
7276 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7277 }
7278 };
7279
7280 /// Check if there's any component list where the attach pointer expression
7281 /// matches the given captured variable.
7282 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7283 for (const auto &AttachEntry : AttachPtrExprMap) {
7284 if (AttachEntry.second) {
7285 // Check if the attach pointer expression is a DeclRefExpr that
7286 // references the captured variable
7287 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7288 if (DRE->getDecl() == VD)
7289 return true;
7290 }
7291 }
7292 return false;
7293 }
7294
7295 /// Get the previously-cached attach pointer for a component list, if-any.
7296 const Expr *getAttachPtrExpr(
7298 const {
7299 const auto It = AttachPtrExprMap.find(Components);
7300 if (It != AttachPtrExprMap.end())
7301 return It->second;
7302
7303 return nullptr;
7304 }
7305
7306private:
7307 /// Kind that defines how a device pointer has to be returned.
7308 struct MapInfo {
7311 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7312 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7313 bool ReturnDevicePointer = false;
7314 bool IsImplicit = false;
7315 const ValueDecl *Mapper = nullptr;
7316 const Expr *VarRef = nullptr;
7317 bool ForDeviceAddr = false;
7318 bool HasUdpFbNullify = false;
7319
7320 MapInfo() = default;
7321 MapInfo(
7323 OpenMPMapClauseKind MapType,
7324 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7325 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7326 bool ReturnDevicePointer, bool IsImplicit,
7327 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7328 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7329 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7330 MotionModifiers(MotionModifiers),
7331 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7332 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7333 HasUdpFbNullify(HasUdpFbNullify) {}
7334 };
7335
7336 /// The target directive from where the mappable clauses were extracted. It
7337 /// is either a executable directive or a user-defined mapper directive.
7338 llvm::PointerUnion<const OMPExecutableDirective *,
7339 const OMPDeclareMapperDecl *>
7340 CurDir;
7341
7342 /// Function the directive is being generated for.
7343 CodeGenFunction &CGF;
7344
7345 /// Set of all first private variables in the current directive.
7346 /// bool data is set to true if the variable is implicitly marked as
7347 /// firstprivate, false otherwise.
7348 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7349
7350 /// Set of defaultmap clause kinds that use firstprivate behavior.
7351 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7352
7353 /// Map between device pointer declarations and their expression components.
7354 /// The key value for declarations in 'this' is null.
7355 llvm::DenseMap<
7356 const ValueDecl *,
7357 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7358 DevPointersMap;
7359
7360 /// Map between device addr declarations and their expression components.
7361 /// The key value for declarations in 'this' is null.
7362 llvm::DenseMap<
7363 const ValueDecl *,
7364 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7365 HasDevAddrsMap;
7366
7367 /// Map between lambda declarations and their map type.
7368 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7369
7370 /// Map from component lists to their attach pointer expressions.
7372 const Expr *>
7373 AttachPtrExprMap;
7374
7375 /// Map from attach pointer expressions to their component depth.
7376 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7377 /// expressions with increasing/decreasing depth.
7378 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7379 /// TODO: Not urgent, but we should ideally use the number of pointer
7380 /// dereferences in an expr as an indicator of its complexity, instead of the
7381 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7382 /// `*(p + 5 + 5)` together.
7383 llvm::DenseMap<const Expr *, std::optional<size_t>>
7384 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7385
7386 /// Map from attach pointer expressions to the order they were computed in, in
7387 /// collectAttachPtrExprInfo().
7388 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7389 {nullptr, 0}};
7390
7391 /// An instance of attach-ptr-expr comparator that can be used throughout the
7392 /// lifetime of this handler.
7393 AttachPtrExprComparator AttachPtrComparator;
7394
7395 llvm::Value *getExprTypeSize(const Expr *E) const {
7396 QualType ExprTy = E->getType().getCanonicalType();
7397
7398 // Calculate the size for array shaping expression.
7399 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7400 llvm::Value *Size =
7401 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7402 for (const Expr *SE : OAE->getDimensions()) {
7403 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7404 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7405 CGF.getContext().getSizeType(),
7406 SE->getExprLoc());
7407 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7408 }
7409 return Size;
7410 }
7411
7412 // Reference types are ignored for mapping purposes.
7413 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7414 ExprTy = RefTy->getPointeeType().getCanonicalType();
7415
7416 // Given that an array section is considered a built-in type, we need to
7417 // do the calculation based on the length of the section instead of relying
7418 // on CGF.getTypeSize(E->getType()).
7419 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7420 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7421 OAE->getBase()->IgnoreParenImpCasts())
7423
7424 // If there is no length associated with the expression and lower bound is
7425 // not specified too, that means we are using the whole length of the
7426 // base.
7427 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7428 !OAE->getLowerBound())
7429 return CGF.getTypeSize(BaseTy);
7430
7431 llvm::Value *ElemSize;
7432 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7433 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7434 } else {
7435 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7436 assert(ATy && "Expecting array type if not a pointer type.");
7437 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7438 }
7439
7440 // If we don't have a length at this point, that is because we have an
7441 // array section with a single element.
7442 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7443 return ElemSize;
7444
7445 if (const Expr *LenExpr = OAE->getLength()) {
7446 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7447 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7448 CGF.getContext().getSizeType(),
7449 LenExpr->getExprLoc());
7450 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7451 }
7452 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7453 OAE->getLowerBound() && "expected array_section[lb:].");
7454 // Size = sizetype - lb * elemtype;
7455 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7456 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7457 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7458 CGF.getContext().getSizeType(),
7459 OAE->getLowerBound()->getExprLoc());
7460 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7461 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7462 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7463 LengthVal = CGF.Builder.CreateSelect(
7464 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7465 return LengthVal;
7466 }
7467 return CGF.getTypeSize(ExprTy);
7468 }
7469
7470 /// Return the corresponding bits for a given map clause modifier. Add
7471 /// a flag marking the map as a pointer if requested. Add a flag marking the
7472 /// map as the first one of a series of maps that relate to the same map
7473 /// expression.
7474 OpenMPOffloadMappingFlags getMapTypeBits(
7475 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7476 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7477 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7478 OpenMPOffloadMappingFlags Bits =
7479 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7480 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7481 switch (MapType) {
7482 case OMPC_MAP_alloc:
7483 case OMPC_MAP_release:
7484 // alloc and release is the default behavior in the runtime library, i.e.
7485 // if we don't pass any bits alloc/release that is what the runtime is
7486 // going to do. Therefore, we don't need to signal anything for these two
7487 // type modifiers.
7488 break;
7489 case OMPC_MAP_to:
7490 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7491 break;
7492 case OMPC_MAP_from:
7493 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7494 break;
7495 case OMPC_MAP_tofrom:
7496 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7497 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7498 break;
7499 case OMPC_MAP_delete:
7500 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7501 break;
7502 case OMPC_MAP_unknown:
7503 llvm_unreachable("Unexpected map type!");
7504 }
7505 if (AddPtrFlag)
7506 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7507 if (AddIsTargetParamFlag)
7508 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7509 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7510 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7511 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7512 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7513 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7514 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7515 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7516 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7517 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7518 if (IsNonContiguous)
7519 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7520 return Bits;
7521 }
7522
7523 /// Return true if the provided expression is a final array section. A
7524 /// final array section, is one whose length can't be proved to be one.
7525 bool isFinalArraySectionExpression(const Expr *E) const {
7526 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7527
7528 // It is not an array section and therefore not a unity-size one.
7529 if (!OASE)
7530 return false;
7531
7532 // An array section with no colon always refer to a single element.
7533 if (OASE->getColonLocFirst().isInvalid())
7534 return false;
7535
7536 const Expr *Length = OASE->getLength();
7537
7538 // If we don't have a length we have to check if the array has size 1
7539 // for this dimension. Also, we should always expect a length if the
7540 // base type is pointer.
7541 if (!Length) {
7542 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7543 OASE->getBase()->IgnoreParenImpCasts())
7545 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7546 return ATy->getSExtSize() != 1;
7547 // If we don't have a constant dimension length, we have to consider
7548 // the current section as having any size, so it is not necessarily
7549 // unitary. If it happen to be unity size, that's user fault.
7550 return true;
7551 }
7552
7553 // Check if the length evaluates to 1.
7554 Expr::EvalResult Result;
7555 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7556 return true; // Can have more that size 1.
7557
7558 llvm::APSInt ConstLength = Result.Val.getInt();
7559 return ConstLength.getSExtValue() != 1;
7560 }
7561
7562 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7563 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7564 /// an attach entry has the following form:
7565 /// &p, &p[1], sizeof(void*), ATTACH
7566 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7567 const AttachInfoTy &AttachInfo) const {
7568 assert(AttachInfo.isValid() &&
7569 "Expected valid attach pointer/pointee information!");
7570
7571 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7572 // size
7573 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7574 llvm::ConstantInt::get(
7575 CGF.CGM.SizeTy, CGF.getContext()
7577 .getQuantity()),
7578 CGF.Int64Ty, /*isSigned=*/true);
7579
7580 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7581 AttachInfo.AttachMapExpr);
7582 CombinedInfo.BasePointers.push_back(
7583 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7584 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7585 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7586 CombinedInfo.Pointers.push_back(
7587 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7588 CombinedInfo.Sizes.push_back(PointerSize);
7589 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7590 CombinedInfo.Mappers.push_back(nullptr);
7591 CombinedInfo.NonContigInfo.Dims.push_back(1);
7592 }
7593
7594 /// A helper class to copy structures with overlapped elements, i.e. those
7595 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7596 /// are not explicitly copied have mapping nodes synthesized for them,
7597 /// taking care to avoid generating zero-sized copies.
7598 class CopyOverlappedEntryGaps {
7599 CodeGenFunction &CGF;
7600 MapCombinedInfoTy &CombinedInfo;
7601 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7602 const ValueDecl *MapDecl = nullptr;
7603 const Expr *MapExpr = nullptr;
7605 bool IsNonContiguous = false;
7606 uint64_t DimSize = 0;
7607 // These elements track the position as the struct is iterated over
7608 // (in order of increasing element address).
7609 const RecordDecl *LastParent = nullptr;
7610 uint64_t Cursor = 0;
7611 unsigned LastIndex = -1u;
7613
7614 public:
7615 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7616 MapCombinedInfoTy &CombinedInfo,
7617 OpenMPOffloadMappingFlags Flags,
7618 const ValueDecl *MapDecl, const Expr *MapExpr,
7619 Address BP, Address LB, bool IsNonContiguous,
7620 uint64_t DimSize)
7621 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7622 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7623 DimSize(DimSize), LB(LB) {}
7624
7625 void processField(
7626 const OMPClauseMappableExprCommon::MappableComponent &MC,
7627 const FieldDecl *FD,
7628 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7629 EmitMemberExprBase) {
7630 const RecordDecl *RD = FD->getParent();
7631 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7632 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7633 uint64_t FieldSize =
7635 Address ComponentLB = Address::invalid();
7636
7637 if (FD->getType()->isLValueReferenceType()) {
7638 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7639 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7640 ComponentLB =
7641 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7642 } else {
7643 ComponentLB =
7645 }
7646
7647 if (!LastParent)
7648 LastParent = RD;
7649 if (FD->getParent() == LastParent) {
7650 if (FD->getFieldIndex() != LastIndex + 1)
7651 copyUntilField(FD, ComponentLB);
7652 } else {
7653 LastParent = FD->getParent();
7654 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7655 copyUntilField(FD, ComponentLB);
7656 }
7657 Cursor = FieldOffset + FieldSize;
7658 LastIndex = FD->getFieldIndex();
7659 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7660 }
7661
7662 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7663 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7664 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7665 llvm::Value *Size = CGF.Builder.CreatePtrDiff(ComponentLBPtr, LBPtr);
7666 copySizedChunk(LBPtr, Size);
7667 }
7668
7669 void copyUntilEnd(Address HB) {
7670 if (LastParent) {
7671 const ASTRecordLayout &RL =
7672 CGF.getContext().getASTRecordLayout(LastParent);
7673 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7674 return;
7675 }
7676 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7677 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7678 CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), LBPtr);
7679 copySizedChunk(LBPtr, Size);
7680 }
7681
7682 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7683 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7684 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7685 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7686 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7687 CombinedInfo.Pointers.push_back(Base);
7688 CombinedInfo.Sizes.push_back(
7689 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/false));
7690 CombinedInfo.Types.push_back(Flags);
7691 CombinedInfo.Mappers.push_back(nullptr);
7692 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7693 }
7694 };
7695
7696 /// Generate the base pointers, section pointers, sizes, map type bits, and
7697 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7698 /// map type, map or motion modifiers, and expression components.
7699 /// \a IsFirstComponent should be set to true if the provided set of
7700 /// components is the first associated with a capture.
7701 void generateInfoForComponentList(
7702 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7703 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7705 MapCombinedInfoTy &CombinedInfo,
7706 MapCombinedInfoTy &StructBaseCombinedInfo,
7707 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7708 bool IsFirstComponentList, bool IsImplicit,
7709 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7710 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7711 const Expr *MapExpr = nullptr,
7712 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7713 OverlappedElements = {}) const {
7714
7715 // The following summarizes what has to be generated for each map and the
7716 // types below. The generated information is expressed in this order:
7717 // base pointer, section pointer, size, flags
7718 // (to add to the ones that come from the map type and modifier).
7719 // Entries annotated with (+) are only generated for "target" constructs,
7720 // and only if the variable at the beginning of the expression is used in
7721 // the region.
7722 //
7723 // double d;
7724 // int i[100];
7725 // float *p;
7726 // int **a = &i;
7727 //
7728 // struct S1 {
7729 // int i;
7730 // float f[50];
7731 // }
7732 // struct S2 {
7733 // int i;
7734 // float f[50];
7735 // S1 s;
7736 // double *p;
7737 // double *&pref;
7738 // struct S2 *ps;
7739 // int &ref;
7740 // }
7741 // S2 s;
7742 // S2 *ps;
7743 //
7744 // map(d)
7745 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7746 //
7747 // map(i)
7748 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7749 //
7750 // map(i[1:23])
7751 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7752 //
7753 // map(p)
7754 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7755 //
7756 // map(p[1:24])
7757 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7758 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7759 // // are present, and either is new
7760 //
7761 // map(([22])p)
7762 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7763 // &p, p, sizeof(void*), ATTACH
7764 //
7765 // map((*a)[0:3])
7766 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7767 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7768 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7769 // (+) Only on target, if a is used in the region
7770 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7771 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7772 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7773 // referenced in the target region, because it is a pointer.
7774 //
7775 // map(**a)
7776 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7777 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7778 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7779 // (+) Only on target, if a is used in the region
7780 //
7781 // map(s)
7782 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7783 // effect is supposed to be same as if the user had a map for every element
7784 // of the struct. We currently do a shallow-map of s.
7785 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7786 //
7787 // map(s.i)
7788 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7789 //
7790 // map(s.s.f)
7791 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7792 //
7793 // map(s.p)
7794 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7795 //
7796 // map(to: s.p[:22])
7797 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7798 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7799 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7800 //
7801 // map(to: s.ref)
7802 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7803 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7804 // (*) alloc space for struct members, only this is a target parameter.
7805 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7806 // optimizes this entry out, same in the examples below)
7807 // (***) map the pointee (map: to)
7808 // Note: ptr(s.ref) represents the referring pointer of s.ref
7809 // ptee(s.ref) represents the referenced pointee of s.ref
7810 //
7811 // map(to: s.pref)
7812 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7813 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7814 //
7815 // map(to: s.pref[:22])
7816 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7817 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7818 // FROM | IMPLICIT // (+)
7819 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7820 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7821 //
7822 // map(s.ps)
7823 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7824 //
7825 // map(from: s.ps->s.i)
7826 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7827 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7828 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7829 //
7830 // map(to: s.ps->ps)
7831 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7832 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7833 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7834 //
7835 // map(s.ps->ps->ps)
7836 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7837 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7838 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7839 //
7840 // map(to: s.ps->ps->s.f[:22])
7841 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7842 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7843 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7844 //
7845 // map(ps)
7846 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7847 //
7848 // map(ps->i)
7849 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7850 // &ps, &(ps->i), sizeof(void*), ATTACH
7851 //
7852 // map(ps->s.f)
7853 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7854 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7855 //
7856 // map(from: ps->p)
7857 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7858 // &ps, &(ps->p), sizeof(ps), ATTACH
7859 //
7860 // map(to: ps->p[:22])
7861 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7862 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7863 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7864 //
7865 // map(ps->ps)
7866 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7867 // &ps, &(ps->ps), sizeof(ps), ATTACH
7868 //
7869 // map(from: ps->ps->s.i)
7870 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7871 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7872 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7873 //
7874 // map(from: ps->ps->ps)
7875 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7876 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7877 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7878 //
7879 // map(ps->ps->ps->ps)
7880 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7881 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7882 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7883 //
7884 // map(to: ps->ps->ps->s.f[:22])
7885 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7886 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7887 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7888 //
7889 // map(to: s.f[:22]) map(from: s.p[:33])
7890 // On target, and if s is used in the region:
7891 //
7892 // &s, &(s.f[0]), 50*sizeof(float) +
7893 // sizeof(struct S1) +
7894 // sizeof(double*) (**), TARGET_PARAM
7895 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7896 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7897 // FROM | IMPLICIT
7898 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7899 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7900 // (**) allocate contiguous space needed to fit all mapped members even if
7901 // we allocate space for members not mapped (in this example,
7902 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7903 // them as well because they fall between &s.f[0] and &s.p)
7904 //
7905 // On other constructs, and, if s is not used in the region, on target:
7906 // &s, &(s.f[0]), 22*sizeof(float), TO
7907 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7908 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7909 //
7910 // map(from: s.f[:22]) map(to: ps->p[:33])
7911 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7912 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7913 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7914 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7915 //
7916 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7917 // &s, &(s.f[0]), 50*sizeof(float) +
7918 // sizeof(struct S1), TARGET_PARAM
7919 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7920 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7921 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7922 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7923 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7924 //
7925 // map(p[:100], p)
7926 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7927 // p, &p[0], 100*sizeof(float), TO | FROM
7928 // &p, &p[0], sizeof(float*), ATTACH
7929
7930 // Track if the map information being generated is the first for a capture.
7931 bool IsCaptureFirstInfo = IsFirstComponentList;
7932 // When the variable is on a declare target link or in a to clause with
7933 // unified memory, a reference is needed to hold the host/device address
7934 // of the variable.
7935 bool RequiresReference = false;
7936
7937 // Scan the components from the base to the complete expression.
7938 auto CI = Components.rbegin();
7939 auto CE = Components.rend();
7940 auto I = CI;
7941
7942 // Track if the map information being generated is the first for a list of
7943 // components.
7944 bool IsExpressionFirstInfo = true;
7945 bool FirstPointerInComplexData = false;
7947 Address FinalLowestElem = Address::invalid();
7948 const Expr *AssocExpr = I->getAssociatedExpression();
7949 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7950 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7951 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7952
7953 // Get the pointer-attachment base-pointer for the given list, if any.
7954 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7955 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7956 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7957
7958 bool HasAttachPtr = AttachPtrExpr != nullptr;
7959 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7960 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7961
7962 if (FirstComponentIsForAttachPtr) {
7963 // No need to process AttachPtr here. It will be processed at the end
7964 // after we have computed the pointee's address.
7965 ++I;
7966 } else if (isa<MemberExpr>(AssocExpr)) {
7967 // The base is the 'this' pointer. The content of the pointer is going
7968 // to be the base of the field being mapped.
7969 BP = CGF.LoadCXXThisAddress();
7970 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7971 (OASE &&
7972 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7973 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7974 } else if (OAShE &&
7975 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7976 BP = Address(
7977 CGF.EmitScalarExpr(OAShE->getBase()),
7978 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7979 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7980 } else {
7981 // The base is the reference to the variable.
7982 // BP = &Var.
7983 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7984 if (const auto *VD =
7985 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7986 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7987 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7988 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7989 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7990 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7992 RequiresReference = true;
7994 }
7995 }
7996 }
7997
7998 // If the variable is a pointer and is being dereferenced (i.e. is not
7999 // the last component), the base has to be the pointer itself, not its
8000 // reference. References are ignored for mapping purposes.
8001 QualType Ty =
8002 I->getAssociatedDeclaration()->getType().getNonReferenceType();
8003 if (Ty->isAnyPointerType() && std::next(I) != CE) {
8004 // No need to generate individual map information for the pointer, it
8005 // can be associated with the combined storage if shared memory mode is
8006 // active or the base declaration is not global variable.
8007 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
8009 !VD || VD->hasLocalStorage() || HasAttachPtr)
8010 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8011 else
8012 FirstPointerInComplexData = true;
8013 ++I;
8014 }
8015 }
8016
8017 // Track whether a component of the list should be marked as MEMBER_OF some
8018 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8019 // in a component list should be marked as MEMBER_OF, all subsequent entries
8020 // do not belong to the base struct. E.g.
8021 // struct S2 s;
8022 // s.ps->ps->ps->f[:]
8023 // (1) (2) (3) (4)
8024 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8025 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8026 // is the pointee of ps(2) which is not member of struct s, so it should not
8027 // be marked as such (it is still PTR_AND_OBJ).
8028 // The variable is initialized to false so that PTR_AND_OBJ entries which
8029 // are not struct members are not considered (e.g. array of pointers to
8030 // data).
8031 bool ShouldBeMemberOf = false;
8032
8033 // Variable keeping track of whether or not we have encountered a component
8034 // in the component list which is a member expression. Useful when we have a
8035 // pointer or a final array section, in which case it is the previous
8036 // component in the list which tells us whether we have a member expression.
8037 // E.g. X.f[:]
8038 // While processing the final array section "[:]" it is "f" which tells us
8039 // whether we are dealing with a member of a declared struct.
8040 const MemberExpr *EncounteredME = nullptr;
8041
8042 // Track for the total number of dimension. Start from one for the dummy
8043 // dimension.
8044 uint64_t DimSize = 1;
8045
8046 // Detects non-contiguous updates due to strided accesses.
8047 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8048 // correctly when generating information to be passed to the runtime. The
8049 // flag is set to true if any array section has a stride not equal to 1, or
8050 // if the stride is not a constant expression (conservatively assumed
8051 // non-contiguous).
8052 bool IsNonContiguous =
8053 CombinedInfo.NonContigInfo.IsNonContiguous ||
8054 any_of(Components, [&](const auto &Component) {
8055 const auto *OASE =
8056 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8057 if (!OASE)
8058 return false;
8059
8060 const Expr *StrideExpr = OASE->getStride();
8061 if (!StrideExpr)
8062 return false;
8063
8064 assert(StrideExpr->getType()->isIntegerType() &&
8065 "Stride expression must be of integer type");
8066
8067 // If stride is not evaluatable as a constant, treat as
8068 // non-contiguous.
8069 const auto Constant =
8070 StrideExpr->getIntegerConstantExpr(CGF.getContext());
8071 if (!Constant)
8072 return true;
8073
8074 // Treat non-unitary strides as non-contiguous.
8075 return !Constant->isOne();
8076 });
8077
8078 bool IsPrevMemberReference = false;
8079
8080 bool IsPartialMapped =
8081 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8082
8083 // We need to check if we will be encountering any MEs. If we do not
8084 // encounter any ME expression it means we will be mapping the whole struct.
8085 // In that case we need to skip adding an entry for the struct to the
8086 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8087 // list only when generating all info for clauses.
8088 bool IsMappingWholeStruct = true;
8089 if (!GenerateAllInfoForClauses) {
8090 IsMappingWholeStruct = false;
8091 } else {
8092 for (auto TempI = I; TempI != CE; ++TempI) {
8093 const MemberExpr *PossibleME =
8094 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
8095 if (PossibleME) {
8096 IsMappingWholeStruct = false;
8097 break;
8098 }
8099 }
8100 }
8101
8102 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8103 for (; I != CE; ++I) {
8104 // If we have a valid attach-ptr, we skip processing all components until
8105 // after the attach-ptr.
8106 if (HasAttachPtr && !SeenAttachPtr) {
8107 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8108 continue;
8109 }
8110
8111 // After finding the attach pointer, skip binary-ops, to skip past
8112 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8113 // the attach-ptr.
8114 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8115 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8116 if (BO)
8117 continue;
8118
8119 // Found the first non-binary-operator component after attach
8120 SeenFirstNonBinOpExprAfterAttachPtr = true;
8121 BP = AttachPteeBaseAddr;
8122 }
8123
8124 // If the current component is member of a struct (parent struct) mark it.
8125 if (!EncounteredME) {
8126 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8127 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8128 // as MEMBER_OF the parent struct.
8129 if (EncounteredME) {
8130 ShouldBeMemberOf = true;
8131 // Do not emit as complex pointer if this is actually not array-like
8132 // expression.
8133 if (FirstPointerInComplexData) {
8134 QualType Ty = std::prev(I)
8135 ->getAssociatedDeclaration()
8136 ->getType()
8137 .getNonReferenceType();
8138 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8139 FirstPointerInComplexData = false;
8140 }
8141 }
8142 }
8143
8144 auto Next = std::next(I);
8145
8146 // We need to generate the addresses and sizes if this is the last
8147 // component, if the component is a pointer or if it is an array section
8148 // whose length can't be proved to be one. If this is a pointer, it
8149 // becomes the base address for the following components.
8150
8151 // A final array section, is one whose length can't be proved to be one.
8152 // If the map item is non-contiguous then we don't treat any array section
8153 // as final array section.
8154 bool IsFinalArraySection =
8155 !IsNonContiguous &&
8156 isFinalArraySectionExpression(I->getAssociatedExpression());
8157
8158 // If we have a declaration for the mapping use that, otherwise use
8159 // the base declaration of the map clause.
8160 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8161 ? I->getAssociatedDeclaration()
8162 : BaseDecl;
8163 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8164 : MapExpr;
8165
8166 // Get information on whether the element is a pointer. Have to do a
8167 // special treatment for array sections given that they are built-in
8168 // types.
8169 const auto *OASE =
8170 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8171 const auto *OAShE =
8172 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8173 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8174 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8175 bool IsPointer =
8176 OAShE ||
8179 ->isAnyPointerType()) ||
8180 I->getAssociatedExpression()->getType()->isAnyPointerType();
8181 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8182 MapDecl &&
8183 MapDecl->getType()->isLValueReferenceType();
8184 bool IsNonDerefPointer = IsPointer &&
8185 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8186 !IsNonContiguous;
8187
8188 if (OASE)
8189 ++DimSize;
8190
8191 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8192 IsFinalArraySection) {
8193 // If this is not the last component, we expect the pointer to be
8194 // associated with an array expression or member expression.
8195 assert((Next == CE ||
8196 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8197 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8198 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8199 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8200 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8201 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8202 "Unexpected expression");
8203
8205 Address LowestElem = Address::invalid();
8206 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8207 const MemberExpr *E) {
8208 const Expr *BaseExpr = E->getBase();
8209 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8210 // scalar.
8211 LValue BaseLV;
8212 if (E->isArrow()) {
8213 LValueBaseInfo BaseInfo;
8214 TBAAAccessInfo TBAAInfo;
8215 Address Addr =
8216 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8217 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8218 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8219 } else {
8220 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8221 }
8222 return BaseLV;
8223 };
8224 if (OAShE) {
8225 LowestElem = LB =
8226 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8228 OAShE->getBase()->getType()->getPointeeType()),
8230 OAShE->getBase()->getType()));
8231 } else if (IsMemberReference) {
8232 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8233 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8234 LowestElem = CGF.EmitLValueForFieldInitialization(
8235 BaseLVal, cast<FieldDecl>(MapDecl))
8236 .getAddress();
8237 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8238 .getAddress();
8239 } else {
8240 LowestElem = LB =
8241 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8242 .getAddress();
8243 }
8244
8245 // Save the final LowestElem, to use it as the pointee in attach maps,
8246 // if emitted.
8247 if (Next == CE)
8248 FinalLowestElem = LowestElem;
8249
8250 // If this component is a pointer inside the base struct then we don't
8251 // need to create any entry for it - it will be combined with the object
8252 // it is pointing to into a single PTR_AND_OBJ entry.
8253 bool IsMemberPointerOrAddr =
8254 EncounteredME &&
8255 (((IsPointer || ForDeviceAddr) &&
8256 I->getAssociatedExpression() == EncounteredME) ||
8257 (IsPrevMemberReference && !IsPointer) ||
8258 (IsMemberReference && Next != CE &&
8259 !Next->getAssociatedExpression()->getType()->isPointerType()));
8260 if (!OverlappedElements.empty() && Next == CE) {
8261 // Handle base element with the info for overlapped elements.
8262 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8263 assert(!IsPointer &&
8264 "Unexpected base element with the pointer type.");
8265 // Mark the whole struct as the struct that requires allocation on the
8266 // device.
8267 PartialStruct.LowestElem = {0, LowestElem};
8268 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8269 I->getAssociatedExpression()->getType());
8272 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8273 TypeSize.getQuantity() - 1);
8274 PartialStruct.HighestElem = {
8275 std::numeric_limits<decltype(
8276 PartialStruct.HighestElem.first)>::max(),
8277 HB};
8278 PartialStruct.Base = BP;
8279 PartialStruct.LB = LB;
8280 assert(
8281 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8282 "Overlapped elements must be used only once for the variable.");
8283 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8284 // Emit data for non-overlapped data.
8285 OpenMPOffloadMappingFlags Flags =
8286 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8287 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8288 /*AddPtrFlag=*/false,
8289 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8290 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8291 MapExpr, BP, LB, IsNonContiguous,
8292 DimSize);
8293 // Do bitcopy of all non-overlapped structure elements.
8295 Component : OverlappedElements) {
8296 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8297 Component) {
8298 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8299 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8300 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8301 }
8302 }
8303 }
8304 }
8305 CopyGaps.copyUntilEnd(HB);
8306 break;
8307 }
8308 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8309 // Skip adding an entry in the CurInfo of this combined entry if the
8310 // whole struct is currently being mapped. The struct needs to be added
8311 // in the first position before any data internal to the struct is being
8312 // mapped.
8313 // Skip adding an entry in the CurInfo of this combined entry if the
8314 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8315 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8316 (Next == CE && MapType != OMPC_MAP_unknown)) {
8317 if (!IsMappingWholeStruct) {
8318 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8319 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8320 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8321 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8322 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8323 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8324 Size, CGF.Int64Ty, /*isSigned=*/true));
8325 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8326 : 1);
8327 } else {
8328 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8329 StructBaseCombinedInfo.BasePointers.push_back(
8330 BP.emitRawPointer(CGF));
8331 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8332 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8333 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8334 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8335 Size, CGF.Int64Ty, /*isSigned=*/true));
8336 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8337 IsNonContiguous ? DimSize : 1);
8338 }
8339
8340 // If Mapper is valid, the last component inherits the mapper.
8341 bool HasMapper = Mapper && Next == CE;
8342 if (!IsMappingWholeStruct)
8343 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8344 else
8345 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8346 : nullptr);
8347
8348 // We need to add a pointer flag for each map that comes from the
8349 // same expression except for the first one. We also need to signal
8350 // this map is the first one that relates with the current capture
8351 // (there is a set of entries for each capture).
8352 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8353 MapType, MapModifiers, MotionModifiers, IsImplicit,
8354 !IsExpressionFirstInfo || RequiresReference ||
8355 FirstPointerInComplexData || IsMemberReference,
8356 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8357
8358 if (!IsExpressionFirstInfo || IsMemberReference) {
8359 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8360 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8361 if (IsPointer || (IsMemberReference && Next != CE))
8362 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8363 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8364 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8365 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8366 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8367
8368 if (ShouldBeMemberOf) {
8369 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8370 // should be later updated with the correct value of MEMBER_OF.
8371 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8372 // From now on, all subsequent PTR_AND_OBJ entries should not be
8373 // marked as MEMBER_OF.
8374 ShouldBeMemberOf = false;
8375 }
8376 }
8377
8378 if (!IsMappingWholeStruct)
8379 CombinedInfo.Types.push_back(Flags);
8380 else
8381 StructBaseCombinedInfo.Types.push_back(Flags);
8382 }
8383
8384 // If we have encountered a member expression so far, keep track of the
8385 // mapped member. If the parent is "*this", then the value declaration
8386 // is nullptr.
8387 if (EncounteredME) {
8388 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8389 unsigned FieldIndex = FD->getFieldIndex();
8390
8391 // Update info about the lowest and highest elements for this struct
8392 if (!PartialStruct.Base.isValid()) {
8393 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8394 if (IsFinalArraySection && OASE) {
8395 Address HB =
8396 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8397 .getAddress();
8398 PartialStruct.HighestElem = {FieldIndex, HB};
8399 } else {
8400 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8401 }
8402 PartialStruct.Base = BP;
8403 PartialStruct.LB = BP;
8404 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8405 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8406 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8407 if (IsFinalArraySection && OASE) {
8408 Address HB =
8409 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8410 .getAddress();
8411 PartialStruct.HighestElem = {FieldIndex, HB};
8412 } else {
8413 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8414 }
8415 }
8416 }
8417
8418 // Need to emit combined struct for array sections.
8419 if (IsFinalArraySection || IsNonContiguous)
8420 PartialStruct.IsArraySection = true;
8421
8422 // If we have a final array section, we are done with this expression.
8423 if (IsFinalArraySection)
8424 break;
8425
8426 // The pointer becomes the base for the next element.
8427 if (Next != CE)
8428 BP = IsMemberReference ? LowestElem : LB;
8429 if (!IsPartialMapped)
8430 IsExpressionFirstInfo = false;
8431 IsCaptureFirstInfo = false;
8432 FirstPointerInComplexData = false;
8433 IsPrevMemberReference = IsMemberReference;
8434 } else if (FirstPointerInComplexData) {
8435 QualType Ty = Components.rbegin()
8436 ->getAssociatedDeclaration()
8437 ->getType()
8438 .getNonReferenceType();
8439 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8440 FirstPointerInComplexData = false;
8441 }
8442 }
8443 // If ran into the whole component - allocate the space for the whole
8444 // record.
8445 if (!EncounteredME)
8446 PartialStruct.HasCompleteRecord = true;
8447
8448 // Populate ATTACH information for later processing by emitAttachEntry.
8449 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8450 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8451 AttachInfo.AttachPteeAddr = FinalLowestElem;
8452 AttachInfo.AttachPtrDecl = BaseDecl;
8453 AttachInfo.AttachMapExpr = MapExpr;
8454 }
8455
8456 if (!IsNonContiguous)
8457 return;
8458
8459 const ASTContext &Context = CGF.getContext();
8460
8461 // For supporting stride in array section, we need to initialize the first
8462 // dimension size as 1, first offset as 0, and first count as 1
8463 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8464 MapValuesArrayTy CurCounts;
8465 MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8466 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8467 uint64_t ElementTypeSize;
8468
8469 // Collect Size information for each dimension and get the element size as
8470 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8471 // should be [10, 10] and the first stride is 4 btyes.
8472 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8473 Components) {
8474 const Expr *AssocExpr = Component.getAssociatedExpression();
8475 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8476
8477 if (!OASE)
8478 continue;
8479
8480 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8481 auto *CAT = Context.getAsConstantArrayType(Ty);
8482 auto *VAT = Context.getAsVariableArrayType(Ty);
8483
8484 // We need all the dimension size except for the last dimension.
8485 assert((VAT || CAT || &Component == &*Components.begin()) &&
8486 "Should be either ConstantArray or VariableArray if not the "
8487 "first Component");
8488
8489 // Get element size if CurCounts is empty.
8490 if (CurCounts.empty()) {
8491 const Type *ElementType = nullptr;
8492 if (CAT)
8493 ElementType = CAT->getElementType().getTypePtr();
8494 else if (VAT)
8495 ElementType = VAT->getElementType().getTypePtr();
8496 else if (&Component == &*Components.begin()) {
8497 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8498 // there was no earlier CAT/VAT/array handling to establish
8499 // ElementType. Capture the pointee type now so that subsequent
8500 // components (offset/length/stride) have a concrete element type to
8501 // work with. This makes pointer-backed sections behave consistently
8502 // with CAT/VAT/array bases.
8503 if (const auto *PtrType = Ty->getAs<PointerType>())
8504 ElementType = PtrType->getPointeeType().getTypePtr();
8505 } else {
8506 // Any component after the first should never have a raw pointer type;
8507 // by this point. ElementType must already be known (set above or in
8508 // prior array / CAT / VAT handling).
8509 assert(!Ty->isPointerType() &&
8510 "Non-first components should not be raw pointers");
8511 }
8512
8513 // At this stage, if ElementType was a base pointer and we are in the
8514 // first iteration, it has been computed.
8515 if (ElementType) {
8516 // For the case that having pointer as base, we need to remove one
8517 // level of indirection.
8518 if (&Component != &*Components.begin())
8519 ElementType = ElementType->getPointeeOrArrayElementType();
8520 ElementTypeSize =
8521 Context.getTypeSizeInChars(ElementType).getQuantity();
8522 CurCounts.push_back(
8523 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8524 }
8525 }
8526 // Get dimension value except for the last dimension since we don't need
8527 // it.
8528 if (DimSizes.size() < Components.size() - 1) {
8529 if (CAT)
8530 DimSizes.push_back(
8531 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8532 else if (VAT)
8533 DimSizes.push_back(CGF.Builder.CreateIntCast(
8534 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8535 /*IsSigned=*/false));
8536 }
8537 }
8538
8539 // Skip the dummy dimension since we have already have its information.
8540 auto *DI = DimSizes.begin() + 1;
8541 // Product of dimension.
8542 llvm::Value *DimProd =
8543 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8544
8545 // Collect info for non-contiguous. Notice that offset, count, and stride
8546 // are only meaningful for array-section, so we insert a null for anything
8547 // other than array-section.
8548 // Also, the size of offset, count, and stride are not the same as
8549 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8550 // count, and stride are the same as the number of non-contiguous
8551 // declaration in target update to/from clause.
8552 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8553 Components) {
8554 const Expr *AssocExpr = Component.getAssociatedExpression();
8555
8556 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8557 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8558 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8559 /*isSigned=*/false);
8560 CurOffsets.push_back(Offset);
8561 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8562 CurStrides.push_back(CurStrides.back());
8563 continue;
8564 }
8565
8566 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8567
8568 if (!OASE)
8569 continue;
8570
8571 // Offset
8572 const Expr *OffsetExpr = OASE->getLowerBound();
8573 llvm::Value *Offset = nullptr;
8574 if (!OffsetExpr) {
8575 // If offset is absent, then we just set it to zero.
8576 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8577 } else {
8578 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8579 CGF.Int64Ty,
8580 /*isSigned=*/false);
8581 }
8582
8583 // Count
8584 const Expr *CountExpr = OASE->getLength();
8585 llvm::Value *Count = nullptr;
8586 if (!CountExpr) {
8587 // In Clang, once a high dimension is an array section, we construct all
8588 // the lower dimension as array section, however, for case like
8589 // arr[0:2][2], Clang construct the inner dimension as an array section
8590 // but it actually is not in an array section form according to spec.
8591 if (!OASE->getColonLocFirst().isValid() &&
8592 !OASE->getColonLocSecond().isValid()) {
8593 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8594 } else {
8595 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8596 // When the length is absent it defaults to ⌈(size −
8597 // lower-bound)/stride⌉, where size is the size of the array
8598 // dimension.
8599 const Expr *StrideExpr = OASE->getStride();
8600 llvm::Value *Stride =
8601 StrideExpr
8602 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8603 CGF.Int64Ty, /*isSigned=*/false)
8604 : nullptr;
8605 if (Stride)
8606 Count = CGF.Builder.CreateUDiv(
8607 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8608 else
8609 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8610 }
8611 } else {
8612 Count = CGF.EmitScalarExpr(CountExpr);
8613 }
8614 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8615 CurCounts.push_back(Count);
8616
8617 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8618 // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
8619 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8620 // Offset Count Stride
8621 // D0 0 4 1 (int) <- dummy dimension
8622 // D1 0 2 8 (2 * (1) * 4)
8623 // D2 100 2 20 (1 * (1 * 5) * 4)
8624 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8625 const Expr *StrideExpr = OASE->getStride();
8626 llvm::Value *Stride =
8627 StrideExpr
8628 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8629 CGF.Int64Ty, /*isSigned=*/false)
8630 : nullptr;
8631 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8632 if (Stride)
8633 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8634 else
8635 CurStrides.push_back(DimProd);
8636
8637 Offset = CGF.Builder.CreateNUWMul(DimProd, Offset);
8638 CurOffsets.push_back(Offset);
8639
8640 if (DI != DimSizes.end())
8641 ++DI;
8642 }
8643
8644 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8645 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8646 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8647 }
8648
8649 /// Return the adjusted map modifiers if the declaration a capture refers to
8650 /// appears in a first-private clause. This is expected to be used only with
8651 /// directives that start with 'target'.
8652 OpenMPOffloadMappingFlags
8653 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8654 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8655
8656 // A first private variable captured by reference will use only the
8657 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8658 // declaration is known as first-private in this handler.
8659 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8660 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8661 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8662 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8663 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8664 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8665 }
8666 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8667 if (I != LambdasMap.end())
8668 // for map(to: lambda): using user specified map type.
8669 return getMapTypeBits(
8670 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8671 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8672 /*AddPtrFlag=*/false,
8673 /*AddIsTargetParamFlag=*/false,
8674 /*isNonContiguous=*/false);
8675 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8676 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8677 }
8678
8679 void getPlainLayout(const CXXRecordDecl *RD,
8680 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8681 bool AsBase) const {
8682 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8683
8684 llvm::StructType *St =
8685 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8686
8687 unsigned NumElements = St->getNumElements();
8688 llvm::SmallVector<
8689 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8690 RecordLayout(NumElements);
8691
8692 // Fill bases.
8693 for (const auto &I : RD->bases()) {
8694 if (I.isVirtual())
8695 continue;
8696
8697 QualType BaseTy = I.getType();
8698 const auto *Base = BaseTy->getAsCXXRecordDecl();
8699 // Ignore empty bases.
8700 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8701 CGF.getContext()
8702 .getASTRecordLayout(Base)
8704 .isZero())
8705 continue;
8706
8707 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8708 RecordLayout[FieldIndex] = Base;
8709 }
8710 // Fill in virtual bases.
8711 for (const auto &I : RD->vbases()) {
8712 QualType BaseTy = I.getType();
8713 // Ignore empty bases.
8714 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8715 continue;
8716
8717 const auto *Base = BaseTy->getAsCXXRecordDecl();
8718 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8719 if (RecordLayout[FieldIndex])
8720 continue;
8721 RecordLayout[FieldIndex] = Base;
8722 }
8723 // Fill in all the fields.
8724 assert(!RD->isUnion() && "Unexpected union.");
8725 for (const auto *Field : RD->fields()) {
8726 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8727 // will fill in later.)
8728 if (!Field->isBitField() &&
8729 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8730 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8731 RecordLayout[FieldIndex] = Field;
8732 }
8733 }
8734 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8735 &Data : RecordLayout) {
8736 if (Data.isNull())
8737 continue;
8738 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8739 getPlainLayout(Base, Layout, /*AsBase=*/true);
8740 else
8741 Layout.push_back(cast<const FieldDecl *>(Data));
8742 }
8743 }
8744
8745 /// Returns the address corresponding to \p PointerExpr.
8746 static Address getAttachPtrAddr(const Expr *PointerExpr,
8747 CodeGenFunction &CGF) {
8748 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8749 Address AttachPtrAddr = Address::invalid();
8750
8751 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8752 // If the pointer is a variable, we can use its address directly.
8753 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8754 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8755 AttachPtrAddr =
8756 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8757 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8758 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8759 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8760 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8761 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8762 assert(UO->getOpcode() == UO_Deref &&
8763 "Unexpected unary-operator on attach-ptr-expr");
8764 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8765 }
8766 assert(AttachPtrAddr.isValid() &&
8767 "Failed to get address for attach pointer expression");
8768 return AttachPtrAddr;
8769 }
8770
8771 /// Get the address of the attach pointer, and a load from it, to get the
8772 /// pointee base address.
8773 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8774 /// contains invalid addresses if \p AttachPtrExpr is null.
8775 static std::pair<Address, Address>
8776 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8777 CodeGenFunction &CGF) {
8778
8779 if (!AttachPtrExpr)
8780 return {Address::invalid(), Address::invalid()};
8781
8782 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8783 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8784
8785 QualType AttachPtrType =
8788
8789 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8790 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8791 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8792
8793 return {AttachPtrAddr, AttachPteeBaseAddr};
8794 }
8795
8796 /// Returns whether an attach entry should be emitted for a map on
8797 /// \p MapBaseDecl on the directive \p CurDir.
8798 static bool
8799 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8800 CodeGenFunction &CGF,
8801 llvm::PointerUnion<const OMPExecutableDirective *,
8802 const OMPDeclareMapperDecl *>
8803 CurDir) {
8804 if (!PointerExpr)
8805 return false;
8806
8807 // Pointer attachment is needed at map-entering time or for declare
8808 // mappers.
8809 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8812 ->getDirectiveKind());
8813 }
8814
8815 /// Computes the attach-ptr expr for \p Components, and updates various maps
8816 /// with the information.
8817 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8818 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8819 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8820 /// AttachPtrExprMap.
8821 void collectAttachPtrExprInfo(
8823 llvm::PointerUnion<const OMPExecutableDirective *,
8824 const OMPDeclareMapperDecl *>
8825 CurDir) {
8826
8827 OpenMPDirectiveKind CurDirectiveID =
8829 ? OMPD_declare_mapper
8830 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8831
8832 const auto &[AttachPtrExpr, Depth] =
8834 CurDirectiveID);
8835
8836 AttachPtrComputationOrderMap.try_emplace(
8837 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8838 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8839 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8840 }
8841
8842 /// Generate all the base pointers, section pointers, sizes, map types, and
8843 /// mappers for the extracted mappable expressions (all included in \a
8844 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8845 /// pair of the relevant declaration and index where it occurs is appended to
8846 /// the device pointers info array.
8847 void generateAllInfoForClauses(
8848 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8849 llvm::OpenMPIRBuilder &OMPBuilder,
8850 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8851 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8852 // We have to process the component lists that relate with the same
8853 // declaration in a single chunk so that we can generate the map flags
8854 // correctly. Therefore, we organize all lists in a map.
8855 enum MapKind { Present, Allocs, Other, Total };
8856 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8857 SmallVector<SmallVector<MapInfo, 8>, 4>>
8858 Info;
8859
8860 // Helper function to fill the information map for the different supported
8861 // clauses.
8862 auto &&InfoGen =
8863 [&Info, &SkipVarSet](
8864 const ValueDecl *D, MapKind Kind,
8866 OpenMPMapClauseKind MapType,
8867 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8868 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8869 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8870 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8871 if (SkipVarSet.contains(D))
8872 return;
8873 auto It = Info.try_emplace(D, Total).first;
8874 It->second[Kind].emplace_back(
8875 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8876 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8877 };
8878
8879 for (const auto *Cl : Clauses) {
8880 const auto *C = dyn_cast<OMPMapClause>(Cl);
8881 if (!C)
8882 continue;
8883 MapKind Kind = Other;
8884 if (llvm::is_contained(C->getMapTypeModifiers(),
8885 OMPC_MAP_MODIFIER_present))
8886 Kind = Present;
8887 else if (C->getMapType() == OMPC_MAP_alloc)
8888 Kind = Allocs;
8889 const auto *EI = C->getVarRefs().begin();
8890 for (const auto L : C->component_lists()) {
8891 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8892 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8893 C->getMapTypeModifiers(), {},
8894 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8895 E);
8896 ++EI;
8897 }
8898 }
8899 for (const auto *Cl : Clauses) {
8900 const auto *C = dyn_cast<OMPToClause>(Cl);
8901 if (!C)
8902 continue;
8903 MapKind Kind = Other;
8904 if (llvm::is_contained(C->getMotionModifiers(),
8905 OMPC_MOTION_MODIFIER_present))
8906 Kind = Present;
8907 if (llvm::is_contained(C->getMotionModifiers(),
8908 OMPC_MOTION_MODIFIER_iterator)) {
8909 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8910 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8911 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8912 CGF.EmitVarDecl(*VD);
8913 }
8914 }
8915
8916 const auto *EI = C->getVarRefs().begin();
8917 for (const auto L : C->component_lists()) {
8918 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8919 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8920 C->isImplicit(), std::get<2>(L), *EI);
8921 ++EI;
8922 }
8923 }
8924 for (const auto *Cl : Clauses) {
8925 const auto *C = dyn_cast<OMPFromClause>(Cl);
8926 if (!C)
8927 continue;
8928 MapKind Kind = Other;
8929 if (llvm::is_contained(C->getMotionModifiers(),
8930 OMPC_MOTION_MODIFIER_present))
8931 Kind = Present;
8932 if (llvm::is_contained(C->getMotionModifiers(),
8933 OMPC_MOTION_MODIFIER_iterator)) {
8934 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8935 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8936 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8937 CGF.EmitVarDecl(*VD);
8938 }
8939 }
8940
8941 const auto *EI = C->getVarRefs().begin();
8942 for (const auto L : C->component_lists()) {
8943 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8944 C->getMotionModifiers(),
8945 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8946 *EI);
8947 ++EI;
8948 }
8949 }
8950
8951 // Look at the use_device_ptr and use_device_addr clauses information and
8952 // mark the existing map entries as such. If there is no map information for
8953 // an entry in the use_device_ptr and use_device_addr list, we create one
8954 // with map type 'return_param' and zero size section. It is the user's
8955 // fault if that was not mapped before. If there is no map information, then
8956 // we defer the emission of that entry until all the maps for the same VD
8957 // have been handled.
8958 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8959
8960 auto &&UseDeviceDataCombinedInfoGen =
8961 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8962 CodeGenFunction &CGF, bool IsDevAddr,
8963 bool HasUdpFbNullify = false) {
8964 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8965 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8966 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8967 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8968 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8969 // FIXME: For use_device_addr on array-sections, this should
8970 // be the starting address of the section.
8971 // e.g. int *p;
8972 // ... use_device_addr(p[3])
8973 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8974 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8975 UseDeviceDataCombinedInfo.Sizes.push_back(
8976 llvm::Constant::getNullValue(CGF.Int64Ty));
8977 OpenMPOffloadMappingFlags Flags =
8978 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8979 if (HasUdpFbNullify)
8980 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8981 UseDeviceDataCombinedInfo.Types.push_back(Flags);
8982 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8983 };
8984
8985 auto &&MapInfoGen =
8986 [&UseDeviceDataCombinedInfoGen](
8987 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8989 Components,
8990 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8991 bool HasUdpFbNullify = false) {
8992 // We didn't find any match in our map information - generate a zero
8993 // size array section.
8994 llvm::Value *Ptr;
8995 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8996 if (IE->isGLValue())
8997 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8998 else
8999 Ptr = CGF.EmitScalarExpr(IE);
9000 } else {
9001 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
9002 }
9003 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
9004 // For the purpose of address-translation, treat something like the
9005 // following:
9006 // int *p;
9007 // ... use_device_addr(p[1])
9008 // equivalent to
9009 // ... use_device_ptr(p)
9010 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9011 !TreatDevAddrAsDevPtr,
9012 HasUdpFbNullify);
9013 };
9014
9015 auto &&IsMapInfoExist =
9016 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9017 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9018 bool HasUdpFbNullify = false) -> bool {
9019 // We potentially have map information for this declaration already.
9020 // Look for the first set of components that refer to it. If found,
9021 // return true.
9022 // If the first component is a member expression, we have to look into
9023 // 'this', which maps to null in the map of map information. Otherwise
9024 // look directly for the information.
9025 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
9026 if (It != Info.end()) {
9027 bool Found = false;
9028 for (auto &Data : It->second) {
9029 MapInfo *CI = nullptr;
9030 // We potentially have multiple maps for the same decl. We need to
9031 // only consider those for which the attach-ptr matches the desired
9032 // attach-ptr.
9033 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
9034 if (MI.Components.back().getAssociatedDeclaration() != VD)
9035 return false;
9036
9037 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
9038 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
9039 DesiredAttachPtrExpr);
9040 return Match;
9041 });
9042
9043 if (It != Data.end())
9044 CI = &*It;
9045
9046 if (CI) {
9047 if (IsDevAddr) {
9048 CI->ForDeviceAddr = true;
9049 CI->ReturnDevicePointer = true;
9050 CI->HasUdpFbNullify = HasUdpFbNullify;
9051 Found = true;
9052 break;
9053 } else {
9054 auto PrevCI = std::next(CI->Components.rbegin());
9055 const auto *VarD = dyn_cast<VarDecl>(VD);
9056 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
9057 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9058 isa<MemberExpr>(IE) ||
9059 !VD->getType().getNonReferenceType()->isPointerType() ||
9060 PrevCI == CI->Components.rend() ||
9061 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
9062 VarD->hasLocalStorage() ||
9063 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
9064 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
9065 CI->ForDeviceAddr = IsDevAddr;
9066 CI->ReturnDevicePointer = true;
9067 CI->HasUdpFbNullify = HasUdpFbNullify;
9068 Found = true;
9069 break;
9070 }
9071 }
9072 }
9073 }
9074 return Found;
9075 }
9076 return false;
9077 };
9078
9079 // Look at the use_device_ptr clause information and mark the existing map
9080 // entries as such. If there is no map information for an entry in the
9081 // use_device_ptr list, we create one with map type 'alloc' and zero size
9082 // section. It is the user fault if that was not mapped before. If there is
9083 // no map information and the pointer is a struct member, then we defer the
9084 // emission of that entry until the whole struct has been processed.
9085 for (const auto *Cl : Clauses) {
9086 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
9087 if (!C)
9088 continue;
9089 bool HasUdpFbNullify =
9090 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9091 for (const auto L : C->component_lists()) {
9093 std::get<1>(L);
9094 assert(!Components.empty() &&
9095 "Not expecting empty list of components!");
9096 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9098 const Expr *IE = Components.back().getAssociatedExpression();
9099 // For use_device_ptr, we match an existing map clause if its attach-ptr
9100 // is same as the use_device_ptr operand. e.g.
9101 // map expr | use_device_ptr expr | current behavior
9102 // ---------|---------------------|-----------------
9103 // p[1] | p | match
9104 // ps->a | ps | match
9105 // p | p | no match
9106 const Expr *UDPOperandExpr =
9107 Components.front().getAssociatedExpression();
9108 if (IsMapInfoExist(CGF, VD, IE,
9109 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9110 /*IsDevAddr=*/false, HasUdpFbNullify))
9111 continue;
9112 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9113 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9114 }
9115 }
9116
9117 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9118 for (const auto *Cl : Clauses) {
9119 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
9120 if (!C)
9121 continue;
9122 for (const auto L : C->component_lists()) {
9124 std::get<1>(L);
9125 assert(!std::get<1>(L).empty() &&
9126 "Not expecting empty list of components!");
9127 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
9128 if (!Processed.insert(VD).second)
9129 continue;
9131 // For use_device_addr, we match an existing map clause if the
9132 // use_device_addr operand's attach-ptr matches the map operand's
9133 // attach-ptr.
9134 // We chould also restrict to only match cases when there is a full
9135 // match between the map/use_device_addr clause exprs, but that may be
9136 // unnecessary.
9137 //
9138 // map expr | use_device_addr expr | current | possible restrictive/
9139 // | | behavior | safer behavior
9140 // ---------|----------------------|-----------|-----------------------
9141 // p | p | match | match
9142 // p[0] | p[0] | match | match
9143 // p[0:1] | p[0] | match | no match
9144 // p[0:1] | p[2:1] | match | no match
9145 // p[1] | p[0] | match | no match
9146 // ps->a | ps->b | match | no match
9147 // p | p[0] | no match | no match
9148 // pp | pp[0][0] | no match | no match
9149 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9150 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
9151 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9152 "use_device_addr operand has an attach-ptr, but does not match "
9153 "last component's expr.");
9154 if (IsMapInfoExist(CGF, VD, IE,
9155 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9156 /*IsDevAddr=*/true))
9157 continue;
9158 MapInfoGen(CGF, IE, VD, Components,
9159 /*IsDevAddr=*/true,
9160 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9161 }
9162 }
9163
9164 for (const auto &Data : Info) {
9165 MapCombinedInfoTy CurInfo;
9166 const Decl *D = Data.first;
9167 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
9168 // Group component lists by their AttachPtrExpr and process them in order
9169 // of increasing complexity (nullptr first, then simple expressions like
9170 // p, then more complex ones like p[0], etc.)
9171 //
9172 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9173 // grouping for target constructs.
9174 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9175
9176 // First, collect all MapData entries with their attach-ptr exprs.
9177 for (const auto &M : Data.second) {
9178 for (const MapInfo &L : M) {
9179 assert(!L.Components.empty() &&
9180 "Not expecting declaration with no component lists.");
9181
9182 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9183 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9184 }
9185 }
9186
9187 // Next, sort by increasing order of their complexity.
9188 llvm::stable_sort(AttachPtrMapInfoPairs,
9189 [this](const auto &LHS, const auto &RHS) {
9190 return AttachPtrComparator(LHS.first, RHS.first);
9191 });
9192
9193 // And finally, process them all in order, grouping those with
9194 // equivalent attach-ptr exprs together.
9195 auto *It = AttachPtrMapInfoPairs.begin();
9196 while (It != AttachPtrMapInfoPairs.end()) {
9197 const Expr *AttachPtrExpr = It->first;
9198
9199 SmallVector<MapInfo, 8> GroupLists;
9200 while (It != AttachPtrMapInfoPairs.end() &&
9201 (It->first == AttachPtrExpr ||
9202 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9203 GroupLists.push_back(It->second);
9204 ++It;
9205 }
9206 assert(!GroupLists.empty() && "GroupLists should not be empty");
9207
9208 StructRangeInfoTy PartialStruct;
9209 AttachInfoTy AttachInfo;
9210 MapCombinedInfoTy GroupCurInfo;
9211 // Current group's struct base information:
9212 MapCombinedInfoTy GroupStructBaseCurInfo;
9213 for (const MapInfo &L : GroupLists) {
9214 // Remember the current base pointer index.
9215 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9216 unsigned StructBasePointersIdx =
9217 GroupStructBaseCurInfo.BasePointers.size();
9218
9219 GroupCurInfo.NonContigInfo.IsNonContiguous =
9220 L.Components.back().isNonContiguous();
9221 generateInfoForComponentList(
9222 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9223 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9224 /*IsFirstComponentList=*/false, L.IsImplicit,
9225 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9226 L.VarRef, /*OverlappedElements*/ {});
9227
9228 // If this entry relates to a device pointer, set the relevant
9229 // declaration and add the 'return pointer' flag.
9230 if (L.ReturnDevicePointer) {
9231 // Check whether a value was added to either GroupCurInfo or
9232 // GroupStructBaseCurInfo and error if no value was added to either
9233 // of them:
9234 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9235 StructBasePointersIdx <
9236 GroupStructBaseCurInfo.BasePointers.size()) &&
9237 "Unexpected number of mapped base pointers.");
9238
9239 // Choose a base pointer index which is always valid:
9240 const ValueDecl *RelevantVD =
9241 L.Components.back().getAssociatedDeclaration();
9242 assert(RelevantVD &&
9243 "No relevant declaration related with device pointer??");
9244
9245 // If GroupStructBaseCurInfo has been updated this iteration then
9246 // work on the first new entry added to it i.e. make sure that when
9247 // multiple values are added to any of the lists, the first value
9248 // added is being modified by the assignments below (not the last
9249 // value added).
9250 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9251 unsigned Idx) {
9252 Info.DevicePtrDecls[Idx] = RelevantVD;
9253 Info.DevicePointers[Idx] = L.ForDeviceAddr
9254 ? DeviceInfoTy::Address
9255 : DeviceInfoTy::Pointer;
9256 Info.Types[Idx] |=
9257 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9258 if (L.HasUdpFbNullify)
9259 Info.Types[Idx] |=
9260 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9261 };
9262
9263 if (StructBasePointersIdx <
9264 GroupStructBaseCurInfo.BasePointers.size())
9265 SetDevicePointerInfo(GroupStructBaseCurInfo,
9266 StructBasePointersIdx);
9267 else
9268 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9269 }
9270 }
9271
9272 // Unify entries in one list making sure the struct mapping precedes the
9273 // individual fields:
9274 MapCombinedInfoTy GroupUnionCurInfo;
9275 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9276 GroupUnionCurInfo.append(GroupCurInfo);
9277
9278 // If there is an entry in PartialStruct it means we have a struct with
9279 // individual members mapped. Emit an extra combined entry.
9280 if (PartialStruct.Base.isValid()) {
9281 // Prepend a synthetic dimension of length 1 to represent the
9282 // aggregated struct object. Using 1 (not 0, as 0 produced an
9283 // incorrect non-contiguous descriptor (DimSize==1), causing the
9284 // non-contiguous motion clause path to be skipped.) is important:
9285 // * It preserves the correct rank so targetDataUpdate() computes
9286 // DimSize == 2 for cases like strided array sections originating
9287 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9288 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9289 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9290 emitCombinedEntry(
9291 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9292 /*IsMapThis=*/!VD, OMPBuilder, VD,
9293 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9294 /*NotTargetParams=*/true);
9295 }
9296
9297 // Append this group's results to the overall CurInfo in the correct
9298 // order: combined-entry -> original-field-entries -> attach-entry
9299 CurInfo.append(GroupUnionCurInfo);
9300 if (AttachInfo.isValid())
9301 emitAttachEntry(CGF, CurInfo, AttachInfo);
9302 }
9303
9304 // We need to append the results of this capture to what we already have.
9305 CombinedInfo.append(CurInfo);
9306 }
9307 // Append data for use_device_ptr/addr clauses.
9308 CombinedInfo.append(UseDeviceDataCombinedInfo);
9309 }
9310
9311public:
9312 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9313 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9314 // Extract firstprivate clause information.
9315 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9316 for (const auto *D : C->varlist())
9317 FirstPrivateDecls.try_emplace(
9318 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9319 // Extract implicit firstprivates from uses_allocators clauses.
9320 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9321 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9322 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9323 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9324 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9325 /*Implicit=*/true);
9326 else if (const auto *VD = dyn_cast<VarDecl>(
9327 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9328 ->getDecl()))
9329 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9330 }
9331 }
9332 // Extract defaultmap clause information.
9333 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9334 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9335 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9336 // Extract device pointer clause information.
9337 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9338 for (auto L : C->component_lists())
9339 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9340 // Extract device addr clause information.
9341 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9342 for (auto L : C->component_lists())
9343 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9344 // Extract map information.
9345 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9346 if (C->getMapType() != OMPC_MAP_to)
9347 continue;
9348 for (auto L : C->component_lists()) {
9349 const ValueDecl *VD = std::get<0>(L);
9350 const auto *RD = VD ? VD->getType()
9351 .getCanonicalType()
9352 .getNonReferenceType()
9353 ->getAsCXXRecordDecl()
9354 : nullptr;
9355 if (RD && RD->isLambda())
9356 LambdasMap.try_emplace(std::get<0>(L), C);
9357 }
9358 }
9359
9360 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9361 for (auto L : C->component_lists()) {
9363 std::get<1>(L);
9364 if (!Components.empty())
9365 collectAttachPtrExprInfo(Components, CurDir);
9366 }
9367 };
9368
9369 // Populate the AttachPtrExprMap for all component lists from map-related
9370 // clauses.
9371 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9372 CollectAttachPtrExprsForClauseComponents(C);
9373 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9374 CollectAttachPtrExprsForClauseComponents(C);
9375 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9376 CollectAttachPtrExprsForClauseComponents(C);
9377 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9378 CollectAttachPtrExprsForClauseComponents(C);
9379 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9380 CollectAttachPtrExprsForClauseComponents(C);
9381 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9382 CollectAttachPtrExprsForClauseComponents(C);
9383 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9384 CollectAttachPtrExprsForClauseComponents(C);
9385 }
9386
9387 /// Constructor for the declare mapper directive.
9388 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9389 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9390
9391 /// Generate code for the combined entry if we have a partially mapped struct
9392 /// and take care of the mapping flags of the arguments corresponding to
9393 /// individual struct members.
9394 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9395 /// to the combined-entry's begin address, if emitted.
9396 /// \p PartialStruct contains attach base-pointer information.
9397 /// \returns The index of the combined entry if one was added, std::nullopt
9398 /// otherwise.
9399 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9400 MapFlagsArrayTy &CurTypes,
9401 const StructRangeInfoTy &PartialStruct,
9402 AttachInfoTy &AttachInfo, bool IsMapThis,
9403 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9404 unsigned OffsetForMemberOfFlag,
9405 bool NotTargetParams) const {
9406 if (CurTypes.size() == 1 &&
9407 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9408 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9409 !PartialStruct.IsArraySection)
9410 return;
9411 Address LBAddr = PartialStruct.LowestElem.second;
9412 Address HBAddr = PartialStruct.HighestElem.second;
9413 if (PartialStruct.HasCompleteRecord) {
9414 LBAddr = PartialStruct.LB;
9415 HBAddr = PartialStruct.LB;
9416 }
9417 CombinedInfo.Exprs.push_back(VD);
9418 // Base is the base of the struct
9419 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9420 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9421 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9422 // Pointer is the address of the lowest element
9423 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9424 const CXXMethodDecl *MD =
9425 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9426 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9427 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9428 // There should not be a mapper for a combined entry.
9429 if (HasBaseClass) {
9430 // OpenMP 5.2 148:21:
9431 // If the target construct is within a class non-static member function,
9432 // and a variable is an accessible data member of the object for which the
9433 // non-static data member function is invoked, the variable is treated as
9434 // if the this[:1] expression had appeared in a map clause with a map-type
9435 // of tofrom.
9436 // Emit this[:1]
9437 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9438 QualType Ty = MD->getFunctionObjectParameterType();
9439 llvm::Value *Size =
9440 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9441 /*isSigned=*/true);
9442 CombinedInfo.Sizes.push_back(Size);
9443 } else {
9444 CombinedInfo.Pointers.push_back(LB);
9445 // Size is (addr of {highest+1} element) - (addr of lowest element)
9446 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9447 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9448 HBAddr.getElementType(), HB, /*Idx0=*/1);
9449 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9450 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9451 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
9452 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9453 /*isSigned=*/false);
9454 CombinedInfo.Sizes.push_back(Size);
9455 }
9456 CombinedInfo.Mappers.push_back(nullptr);
9457 // Map type is always TARGET_PARAM, if generate info for captures.
9458 CombinedInfo.Types.push_back(
9459 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9460 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9461 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9462 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9463 // If any element has the present modifier, then make sure the runtime
9464 // doesn't attempt to allocate the struct.
9465 if (CurTypes.end() !=
9466 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9467 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9468 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9469 }))
9470 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9471 // Remove TARGET_PARAM flag from the first element
9472 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9473 // If any element has the ompx_hold modifier, then make sure the runtime
9474 // uses the hold reference count for the struct as a whole so that it won't
9475 // be unmapped by an extra dynamic reference count decrement. Add it to all
9476 // elements as well so the runtime knows which reference count to check
9477 // when determining whether it's time for device-to-host transfers of
9478 // individual elements.
9479 if (CurTypes.end() !=
9480 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9481 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9482 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9483 })) {
9484 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9485 for (auto &M : CurTypes)
9486 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9487 }
9488
9489 // All other current entries will be MEMBER_OF the combined entry
9490 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9491 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9492 // to be handled by themselves, after all other maps).
9493 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9494 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9495 for (auto &M : CurTypes)
9496 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9497
9498 // When we are emitting a combined entry. If there were any pending
9499 // attachments to be done, we do them to the begin address of the combined
9500 // entry. Note that this means only one attachment per combined-entry will
9501 // be done. So, for instance, if we have:
9502 // S *ps;
9503 // ... map(ps->a, ps->b)
9504 // When we are emitting a combined entry. If AttachInfo is valid,
9505 // update the pointee address to point to the begin address of the combined
9506 // entry. This ensures that if we have multiple maps like:
9507 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9508 //
9509 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9510 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9511 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9512 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9513 if (AttachInfo.isValid())
9514 AttachInfo.AttachPteeAddr = LBAddr;
9515 }
9516
9517 /// Generate all the base pointers, section pointers, sizes, map types, and
9518 /// mappers for the extracted mappable expressions (all included in \a
9519 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9520 /// pair of the relevant declaration and index where it occurs is appended to
9521 /// the device pointers info array.
9522 void generateAllInfo(
9523 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9524 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9525 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9526 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9527 "Expect a executable directive");
9528 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9529 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9530 SkipVarSet);
9531 }
9532
9533 /// Generate all the base pointers, section pointers, sizes, map types, and
9534 /// mappers for the extracted map clauses of user-defined mapper (all included
9535 /// in \a CombinedInfo).
9536 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9537 llvm::OpenMPIRBuilder &OMPBuilder) const {
9538 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9539 "Expect a declare mapper directive");
9540 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9541 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9542 OMPBuilder);
9543 }
9544
9545 /// Emit capture info for lambdas for variables captured by reference.
9546 void generateInfoForLambdaCaptures(
9547 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9548 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9549 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9550 const auto *RD = VDType->getAsCXXRecordDecl();
9551 if (!RD || !RD->isLambda())
9552 return;
9553 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9554 CGF.getContext().getDeclAlign(VD));
9555 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9556 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9557 FieldDecl *ThisCapture = nullptr;
9558 RD->getCaptureFields(Captures, ThisCapture);
9559 if (ThisCapture) {
9560 LValue ThisLVal =
9561 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9562 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9563 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9564 VDLVal.getPointer(CGF));
9565 CombinedInfo.Exprs.push_back(VD);
9566 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9567 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9568 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9569 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9570 CombinedInfo.Sizes.push_back(
9571 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9572 CGF.Int64Ty, /*isSigned=*/true));
9573 CombinedInfo.Types.push_back(
9574 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9575 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9576 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9577 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9578 CombinedInfo.Mappers.push_back(nullptr);
9579 }
9580 for (const LambdaCapture &LC : RD->captures()) {
9581 if (!LC.capturesVariable())
9582 continue;
9583 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9584 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9585 continue;
9586 auto It = Captures.find(VD);
9587 assert(It != Captures.end() && "Found lambda capture without field.");
9588 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9589 if (LC.getCaptureKind() == LCK_ByRef) {
9590 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9591 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9592 VDLVal.getPointer(CGF));
9593 CombinedInfo.Exprs.push_back(VD);
9594 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9595 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9596 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9597 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9598 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9599 CGF.getTypeSize(
9601 CGF.Int64Ty, /*isSigned=*/true));
9602 } else {
9603 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9604 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9605 VDLVal.getPointer(CGF));
9606 CombinedInfo.Exprs.push_back(VD);
9607 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9608 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9609 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9610 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9611 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9612 }
9613 CombinedInfo.Types.push_back(
9614 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9615 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9616 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9617 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9618 CombinedInfo.Mappers.push_back(nullptr);
9619 }
9620 }
9621
9622 /// Set correct indices for lambdas captures.
9623 void adjustMemberOfForLambdaCaptures(
9624 llvm::OpenMPIRBuilder &OMPBuilder,
9625 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9626 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9627 MapFlagsArrayTy &Types) const {
9628 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9629 // Set correct member_of idx for all implicit lambda captures.
9630 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9631 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9632 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9633 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9634 continue;
9635 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9636 assert(BasePtr && "Unable to find base lambda address.");
9637 int TgtIdx = -1;
9638 for (unsigned J = I; J > 0; --J) {
9639 unsigned Idx = J - 1;
9640 if (Pointers[Idx] != BasePtr)
9641 continue;
9642 TgtIdx = Idx;
9643 break;
9644 }
9645 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9646 // All other current entries will be MEMBER_OF the combined entry
9647 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9648 // 0xFFFF in the MEMBER_OF field).
9649 OpenMPOffloadMappingFlags MemberOfFlag =
9650 OMPBuilder.getMemberOfFlag(TgtIdx);
9651 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9652 }
9653 }
9654
9655 /// Populate component lists for non-lambda captured variables from map,
9656 /// is_device_ptr and has_device_addr clause info.
9657 void populateComponentListsForNonLambdaCaptureFromClauses(
9658 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9659 SmallVectorImpl<
9660 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9661 &StorageForImplicitlyAddedComponentLists) const {
9662 if (VD && LambdasMap.count(VD))
9663 return;
9664
9665 // For member fields list in is_device_ptr, store it in
9666 // DeclComponentLists for generating components info.
9668 auto It = DevPointersMap.find(VD);
9669 if (It != DevPointersMap.end())
9670 for (const auto &MCL : It->second)
9671 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9672 /*IsImpicit = */ true, nullptr,
9673 nullptr);
9674 auto I = HasDevAddrsMap.find(VD);
9675 if (I != HasDevAddrsMap.end())
9676 for (const auto &MCL : I->second)
9677 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9678 /*IsImpicit = */ true, nullptr,
9679 nullptr);
9680 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9681 "Expect a executable directive");
9682 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9683 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9684 const auto *EI = C->getVarRefs().begin();
9685 for (const auto L : C->decl_component_lists(VD)) {
9686 const ValueDecl *VDecl, *Mapper;
9687 // The Expression is not correct if the mapping is implicit
9688 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9690 std::tie(VDecl, Components, Mapper) = L;
9691 assert(VDecl == VD && "We got information for the wrong declaration??");
9692 assert(!Components.empty() &&
9693 "Not expecting declaration with no component lists.");
9694 DeclComponentLists.emplace_back(Components, C->getMapType(),
9695 C->getMapTypeModifiers(),
9696 C->isImplicit(), Mapper, E);
9697 ++EI;
9698 }
9699 }
9700
9701 // For the target construct, if there's a map with a base-pointer that's
9702 // a member of an implicitly captured struct, of the current class,
9703 // we need to emit an implicit map on the pointer.
9704 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9705 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9706 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9707
9708 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9709 const MapData &RHS) {
9710 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9711 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9712 bool HasPresent =
9713 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9714 bool HasAllocs = MapType == OMPC_MAP_alloc;
9715 MapModifiers = std::get<2>(RHS);
9716 MapType = std::get<1>(LHS);
9717 bool HasPresentR =
9718 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9719 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9720 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9721 });
9722 }
9723
9724 /// On a target construct, if there's an implicit map on a struct, or that of
9725 /// this[:], and an explicit map with a member of that struct/class as the
9726 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9727 /// to make sure we don't map the full struct/class. For example:
9728 ///
9729 /// \code
9730 /// struct S {
9731 /// int dummy[10000];
9732 /// int *p;
9733 /// void f1() {
9734 /// #pragma omp target map(p[0:1])
9735 /// (void)this;
9736 /// }
9737 /// }; S s;
9738 ///
9739 /// void f2() {
9740 /// #pragma omp target map(s.p[0:10])
9741 /// (void)s;
9742 /// }
9743 /// \endcode
9744 ///
9745 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9746 //
9747 // OpenMP 6.0: 7.9.6 map clause, pg 285
9748 // If a list item with an implicitly determined data-mapping attribute does
9749 // not have any corresponding storage in the device data environment prior to
9750 // a task encountering the construct associated with the map clause, and one
9751 // or more contiguous parts of the original storage are either list items or
9752 // base pointers to list items that are explicitly mapped on the construct,
9753 // only those parts of the original storage will have corresponding storage in
9754 // the device data environment as a result of the map clauses on the
9755 // construct.
9756 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9757 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9758 SmallVectorImpl<
9759 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9760 &ComponentVectorStorage) const {
9761 bool IsThisCapture = CapturedVD == nullptr;
9762
9763 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9765 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9766 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9767 if (!AttachPtrExpr)
9768 continue;
9769
9770 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9771 if (!ME)
9772 continue;
9773
9774 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9775
9776 // If we are handling a "this" capture, then we are looking for
9777 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9778 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9779 continue;
9780
9781 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9782 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9783 continue;
9784
9785 // For non-this captures, we are looking for attach-ptrs of form
9786 // `s.p`.
9787 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9788 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9789 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9790 continue;
9791
9792 // Check if we have an existing map on either:
9793 // this[:], s, this->p, or s.p, in which case, we don't need to add
9794 // an implicit one for the attach-ptr s.p/this->p.
9795 bool FoundExistingMap = false;
9796 for (const MapData &ExistingL : DeclComponentLists) {
9798 ExistingComponents = std::get<0>(ExistingL);
9799
9800 if (ExistingComponents.empty())
9801 continue;
9802
9803 // First check if we have a map like map(this->p) or map(s.p).
9804 const auto &FirstComponent = ExistingComponents.front();
9805 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9806
9807 if (!FirstExpr)
9808 continue;
9809
9810 // First check if we have a map like map(this->p) or map(s.p).
9811 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9812 FoundExistingMap = true;
9813 break;
9814 }
9815
9816 // Check if we have a map like this[0:1]
9817 if (IsThisCapture) {
9818 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9819 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9820 FoundExistingMap = true;
9821 break;
9822 }
9823 }
9824 continue;
9825 }
9826
9827 // When the attach-ptr is something like `s.p`, check if
9828 // `s` itself is mapped explicitly.
9829 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9830 if (DRE->getDecl() == CapturedVD) {
9831 FoundExistingMap = true;
9832 break;
9833 }
9834 }
9835 }
9836
9837 if (FoundExistingMap)
9838 continue;
9839
9840 // If no base map is found, we need to create an implicit map for the
9841 // attach-pointer expr.
9842
9843 ComponentVectorStorage.emplace_back();
9844 auto &AttachPtrComponents = ComponentVectorStorage.back();
9845
9847 bool SeenAttachPtrComponent = false;
9848 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9849 // components from the component-list which has `s.p/this->p`
9850 // as the attach-ptr, starting from the component which matches
9851 // `s.p/this->p`. This way, we'll have component-lists of
9852 // `s.p` -> `s`, and `this->p` -> `this`.
9853 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9854 const auto &Component = ComponentsWithAttachPtr[i];
9855 const Expr *ComponentExpr = Component.getAssociatedExpression();
9856
9857 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9858 continue;
9859 SeenAttachPtrComponent = true;
9860
9861 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9862 Component.getAssociatedDeclaration(),
9863 Component.isNonContiguous());
9864 }
9865 assert(!AttachPtrComponents.empty() &&
9866 "Could not populate component-lists for mapping attach-ptr");
9867
9868 DeclComponentLists.emplace_back(
9869 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9870 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9871 }
9872 }
9873
9874 /// For a capture that has an associated clause, generate the base pointers,
9875 /// section pointers, sizes, map types, and mappers (all included in
9876 /// \a CurCaptureVarInfo).
9877 void generateInfoForCaptureFromClauseInfo(
9878 const MapDataArrayTy &DeclComponentListsFromClauses,
9879 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9880 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9881 unsigned OffsetForMemberOfFlag) const {
9882 assert(!Cap->capturesVariableArrayType() &&
9883 "Not expecting to generate map info for a variable array type!");
9884
9885 // We need to know when we generating information for the first component
9886 const ValueDecl *VD = Cap->capturesThis()
9887 ? nullptr
9888 : Cap->getCapturedVar()->getCanonicalDecl();
9889
9890 // for map(to: lambda): skip here, processing it in
9891 // generateDefaultMapInfo
9892 if (LambdasMap.count(VD))
9893 return;
9894
9895 // If this declaration appears in a is_device_ptr clause we just have to
9896 // pass the pointer by value. If it is a reference to a declaration, we just
9897 // pass its value.
9898 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9899 CurCaptureVarInfo.Exprs.push_back(VD);
9900 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9901 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9902 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9903 CurCaptureVarInfo.Pointers.push_back(Arg);
9904 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9905 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9906 /*isSigned=*/true));
9907 CurCaptureVarInfo.Types.push_back(
9908 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9909 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9910 CurCaptureVarInfo.Mappers.push_back(nullptr);
9911 return;
9912 }
9913
9914 auto GenerateInfoForComponentLists =
9915 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9916 bool IsEligibleForTargetParamFlag) {
9917 MapCombinedInfoTy CurInfoForComponentLists;
9918 StructRangeInfoTy PartialStruct;
9919 AttachInfoTy AttachInfo;
9920
9921 if (DeclComponentListsFromClauses.empty())
9922 return;
9923
9924 generateInfoForCaptureFromComponentLists(
9925 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9926 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9927
9928 // If there is an entry in PartialStruct it means we have a
9929 // struct with individual members mapped. Emit an extra combined
9930 // entry.
9931 if (PartialStruct.Base.isValid()) {
9932 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9933 emitCombinedEntry(
9934 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9935 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9936 /*VD=*/nullptr, OffsetForMemberOfFlag,
9937 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9938 }
9939
9940 // We do the appends to get the entries in the following order:
9941 // combined-entry -> individual-field-entries -> attach-entry,
9942 CurCaptureVarInfo.append(CurInfoForComponentLists);
9943 if (AttachInfo.isValid())
9944 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9945 };
9946
9947 // Group component lists by their AttachPtrExpr and process them in order
9948 // of increasing complexity (nullptr first, then simple expressions like p,
9949 // then more complex ones like p[0], etc.)
9950 //
9951 // This ensure that we:
9952 // * handle maps that can contribute towards setting the kernel argument,
9953 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9954 // * allocate a single contiguous storage for all exprs with the same
9955 // captured var and having the same attach-ptr.
9956 //
9957 // Example: The map clauses below should be handled grouped together based
9958 // on their attachable-base-pointers:
9959 // map-clause | attachable-base-pointer
9960 // --------------------------+------------------------
9961 // map(p, ps) | nullptr
9962 // map(p[0]) | p
9963 // map(p[0]->b, p[0]->c) | p[0]
9964 // map(ps->d, ps->e, ps->pt) | ps
9965 // map(ps->pt->d, ps->pt->e) | ps->pt
9966
9967 // First, collect all MapData entries with their attach-ptr exprs.
9968 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9969
9970 for (const MapData &L : DeclComponentListsFromClauses) {
9972 std::get<0>(L);
9973 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9974 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9975 }
9976
9977 // Next, sort by increasing order of their complexity.
9978 llvm::stable_sort(AttachPtrMapDataPairs,
9979 [this](const auto &LHS, const auto &RHS) {
9980 return AttachPtrComparator(LHS.first, RHS.first);
9981 });
9982
9983 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9984 bool IsFirstGroup = true;
9985
9986 // And finally, process them all in order, grouping those with
9987 // equivalent attach-ptr exprs together.
9988 auto *It = AttachPtrMapDataPairs.begin();
9989 while (It != AttachPtrMapDataPairs.end()) {
9990 const Expr *AttachPtrExpr = It->first;
9991
9992 MapDataArrayTy GroupLists;
9993 while (It != AttachPtrMapDataPairs.end() &&
9994 (It->first == AttachPtrExpr ||
9995 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9996 GroupLists.push_back(It->second);
9997 ++It;
9998 }
9999 assert(!GroupLists.empty() && "GroupLists should not be empty");
10000
10001 // Determine if this group of component-lists is eligible for TARGET_PARAM
10002 // flag. Only the first group processed should be eligible, and only if no
10003 // default mapping was done.
10004 bool IsEligibleForTargetParamFlag =
10005 IsFirstGroup && NoDefaultMappingDoneForVD;
10006
10007 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
10008 IsFirstGroup = false;
10009 }
10010 }
10011
10012 /// Generate the base pointers, section pointers, sizes, map types, and
10013 /// mappers associated to \a DeclComponentLists for a given capture
10014 /// \a VD (all included in \a CurComponentListInfo).
10015 void generateInfoForCaptureFromComponentLists(
10016 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10017 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10018 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10019 // Find overlapping elements (including the offset from the base element).
10020 llvm::SmallDenseMap<
10021 const MapData *,
10022 llvm::SmallVector<
10024 4>
10025 OverlappedData;
10026 size_t Count = 0;
10027 for (const MapData &L : DeclComponentLists) {
10029 OpenMPMapClauseKind MapType;
10030 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10031 bool IsImplicit;
10032 const ValueDecl *Mapper;
10033 const Expr *VarRef;
10034 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10035 L;
10036 ++Count;
10037 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
10039 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
10040 VarRef) = L1;
10041 auto CI = Components.rbegin();
10042 auto CE = Components.rend();
10043 auto SI = Components1.rbegin();
10044 auto SE = Components1.rend();
10045 for (; CI != CE && SI != SE; ++CI, ++SI) {
10046 if (CI->getAssociatedExpression()->getStmtClass() !=
10047 SI->getAssociatedExpression()->getStmtClass())
10048 break;
10049 // Are we dealing with different variables/fields?
10050 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10051 break;
10052 }
10053 // Found overlapping if, at least for one component, reached the head
10054 // of the components list.
10055 if (CI == CE || SI == SE) {
10056 // Ignore it if it is the same component.
10057 if (CI == CE && SI == SE)
10058 continue;
10059 const auto It = (SI == SE) ? CI : SI;
10060 // If one component is a pointer and another one is a kind of
10061 // dereference of this pointer (array subscript, section, dereference,
10062 // etc.), it is not an overlapping.
10063 // Same, if one component is a base and another component is a
10064 // dereferenced pointer memberexpr with the same base.
10065 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
10066 (std::prev(It)->getAssociatedDeclaration() &&
10067 std::prev(It)
10068 ->getAssociatedDeclaration()
10069 ->getType()
10070 ->isPointerType()) ||
10071 (It->getAssociatedDeclaration() &&
10072 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10073 std::next(It) != CE && std::next(It) != SE))
10074 continue;
10075 const MapData &BaseData = CI == CE ? L : L1;
10077 SI == SE ? Components : Components1;
10078 OverlappedData[&BaseData].push_back(SubData);
10079 }
10080 }
10081 }
10082 // Sort the overlapped elements for each item.
10083 llvm::SmallVector<const FieldDecl *, 4> Layout;
10084 if (!OverlappedData.empty()) {
10085 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10086 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10087 while (BaseType != OrigType) {
10088 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10089 OrigType = BaseType->getPointeeOrArrayElementType();
10090 }
10091
10092 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10093 getPlainLayout(CRD, Layout, /*AsBase=*/false);
10094 else {
10095 const auto *RD = BaseType->getAsRecordDecl();
10096 Layout.append(RD->field_begin(), RD->field_end());
10097 }
10098 }
10099 for (auto &Pair : OverlappedData) {
10100 llvm::stable_sort(
10101 Pair.getSecond(),
10102 [&Layout](
10105 Second) {
10106 auto CI = First.rbegin();
10107 auto CE = First.rend();
10108 auto SI = Second.rbegin();
10109 auto SE = Second.rend();
10110 for (; CI != CE && SI != SE; ++CI, ++SI) {
10111 if (CI->getAssociatedExpression()->getStmtClass() !=
10112 SI->getAssociatedExpression()->getStmtClass())
10113 break;
10114 // Are we dealing with different variables/fields?
10115 if (CI->getAssociatedDeclaration() !=
10116 SI->getAssociatedDeclaration())
10117 break;
10118 }
10119
10120 // Lists contain the same elements.
10121 if (CI == CE && SI == SE)
10122 return false;
10123
10124 // List with less elements is less than list with more elements.
10125 if (CI == CE || SI == SE)
10126 return CI == CE;
10127
10128 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
10129 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
10130 if (FD1->getParent() == FD2->getParent())
10131 return FD1->getFieldIndex() < FD2->getFieldIndex();
10132 const auto *It =
10133 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
10134 return FD == FD1 || FD == FD2;
10135 });
10136 return *It == FD1;
10137 });
10138 }
10139
10140 // Associated with a capture, because the mapping flags depend on it.
10141 // Go through all of the elements with the overlapped elements.
10142 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10143 MapCombinedInfoTy StructBaseCombinedInfo;
10144 for (const auto &Pair : OverlappedData) {
10145 const MapData &L = *Pair.getFirst();
10147 OpenMPMapClauseKind MapType;
10148 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10149 bool IsImplicit;
10150 const ValueDecl *Mapper;
10151 const Expr *VarRef;
10152 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10153 L;
10154 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10155 OverlappedComponents = Pair.getSecond();
10156 generateInfoForComponentList(
10157 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10158 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
10159 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10160 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
10161 AddTargetParamFlag = false;
10162 }
10163 // Go through other elements without overlapped elements.
10164 for (const MapData &L : DeclComponentLists) {
10166 OpenMPMapClauseKind MapType;
10167 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10168 bool IsImplicit;
10169 const ValueDecl *Mapper;
10170 const Expr *VarRef;
10171 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10172 L;
10173 auto It = OverlappedData.find(&L);
10174 if (It == OverlappedData.end())
10175 generateInfoForComponentList(
10176 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10177 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10178 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10179 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10180 /*OverlappedElements*/ {});
10181 AddTargetParamFlag = false;
10182 }
10183 }
10184
10185 /// Check if a variable should be treated as firstprivate due to explicit
10186 /// firstprivate clause or defaultmap(firstprivate:...).
10187 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10188 // Check explicit firstprivate clauses (not implicit from defaultmap)
10189 auto I = FirstPrivateDecls.find(VD);
10190 if (I != FirstPrivateDecls.end() && !I->getSecond())
10191 return true; // Explicit firstprivate only
10192
10193 // Check defaultmap(firstprivate:scalar) for scalar types
10194 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10195 if (Type->isScalarType())
10196 return true;
10197 }
10198
10199 // Check defaultmap(firstprivate:pointer) for pointer types
10200 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10201 if (Type->isAnyPointerType())
10202 return true;
10203 }
10204
10205 // Check defaultmap(firstprivate:aggregate) for aggregate types
10206 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10207 if (Type->isAggregateType())
10208 return true;
10209 }
10210
10211 // Check defaultmap(firstprivate:all) for all types
10212 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10213 }
10214
10215 /// Generate the default map information for a given capture \a CI,
10216 /// record field declaration \a RI and captured value \a CV.
10217 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10218 const FieldDecl &RI, llvm::Value *CV,
10219 MapCombinedInfoTy &CombinedInfo) const {
10220 bool IsImplicit = true;
10221 // Do the default mapping.
10222 if (CI.capturesThis()) {
10223 CombinedInfo.Exprs.push_back(nullptr);
10224 CombinedInfo.BasePointers.push_back(CV);
10225 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10226 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10227 CombinedInfo.Pointers.push_back(CV);
10228 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10229 CombinedInfo.Sizes.push_back(
10230 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10231 CGF.Int64Ty, /*isSigned=*/true));
10232 // Default map type.
10233 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10234 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10235 } else if (CI.capturesVariableByCopy()) {
10236 const VarDecl *VD = CI.getCapturedVar();
10237 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10238 CombinedInfo.BasePointers.push_back(CV);
10239 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10240 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10241 CombinedInfo.Pointers.push_back(CV);
10242 bool IsFirstprivate =
10243 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10244
10245 if (!RI.getType()->isAnyPointerType()) {
10246 // We have to signal to the runtime captures passed by value that are
10247 // not pointers.
10248 CombinedInfo.Types.push_back(
10249 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10250 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10251 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10252 } else if (IsFirstprivate) {
10253 // Firstprivate pointers should be passed by value (as literals)
10254 // without performing a present table lookup at runtime.
10255 CombinedInfo.Types.push_back(
10256 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10257 // Use zero size for pointer literals (just passing the pointer value)
10258 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10259 } else {
10260 // Pointers are implicitly mapped with a zero size and no flags
10261 // (other than first map that is added for all implicit maps).
10262 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10263 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10264 }
10265 auto I = FirstPrivateDecls.find(VD);
10266 if (I != FirstPrivateDecls.end())
10267 IsImplicit = I->getSecond();
10268 } else {
10269 assert(CI.capturesVariable() && "Expected captured reference.");
10270 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10271 QualType ElementType = PtrTy->getPointeeType();
10272 const VarDecl *VD = CI.getCapturedVar();
10273 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10274 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10275 CombinedInfo.BasePointers.push_back(CV);
10276 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10277 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10278
10279 // For firstprivate pointers, pass by value instead of dereferencing
10280 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10281 // Treat as a literal value (pass the pointer value itself)
10282 CombinedInfo.Pointers.push_back(CV);
10283 // Use zero size for pointer literals
10284 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10285 CombinedInfo.Types.push_back(
10286 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10287 } else {
10288 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10289 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10290 // The default map type for a scalar/complex type is 'to' because by
10291 // default the value doesn't have to be retrieved. For an aggregate
10292 // type, the default is 'tofrom'.
10293 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10294 CombinedInfo.Pointers.push_back(CV);
10295 }
10296 auto I = FirstPrivateDecls.find(VD);
10297 if (I != FirstPrivateDecls.end())
10298 IsImplicit = I->getSecond();
10299 }
10300 // Every default map produces a single argument which is a target parameter.
10301 CombinedInfo.Types.back() |=
10302 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10303
10304 // Add flag stating this is an implicit map.
10305 if (IsImplicit)
10306 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10307
10308 // No user-defined mapper for default mapping.
10309 CombinedInfo.Mappers.push_back(nullptr);
10310 }
10311};
10312} // anonymous namespace
10313
10314// Try to extract the base declaration from a `this->x` expression if possible.
10316 if (!E)
10317 return nullptr;
10318
10319 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10320 if (const MemberExpr *ME =
10321 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10322 return ME->getMemberDecl();
10323 return nullptr;
10324}
10325
10326/// Emit a string constant containing the names of the values mapped to the
10327/// offloading runtime library.
10328static llvm::Constant *
10329emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10330 MappableExprsHandler::MappingExprInfo &MapExprs) {
10331
10332 uint32_t SrcLocStrSize;
10333 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10334 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10335
10336 SourceLocation Loc;
10337 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10338 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10339 Loc = VD->getLocation();
10340 else
10341 Loc = MapExprs.getMapExpr()->getExprLoc();
10342 } else {
10343 Loc = MapExprs.getMapDecl()->getLocation();
10344 }
10345
10346 std::string ExprName;
10347 if (MapExprs.getMapExpr()) {
10349 llvm::raw_string_ostream OS(ExprName);
10350 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10351 } else {
10352 ExprName = MapExprs.getMapDecl()->getNameAsString();
10353 }
10354
10355 std::string FileName;
10357 if (auto *DbgInfo = CGF.getDebugInfo())
10358 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10359 else
10360 FileName = PLoc.getFilename();
10361 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10362 PLoc.getColumn(), SrcLocStrSize);
10363}
10364/// Emit the arrays used to pass the captures and map information to the
10365/// offloading runtime library. If there is no map or capture information,
10366/// return nullptr by reference.
10368 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10369 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10370 bool IsNonContiguous = false, bool ForEndCall = false) {
10371 CodeGenModule &CGM = CGF.CGM;
10372
10373 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10374 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10375 CGF.AllocaInsertPt->getIterator());
10376 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10377 CGF.Builder.GetInsertPoint());
10378
10379 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10380 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10381 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10382 }
10383 };
10384
10385 auto CustomMapperCB = [&](unsigned int I) {
10386 llvm::Function *MFunc = nullptr;
10387 if (CombinedInfo.Mappers[I]) {
10388 Info.HasMapper = true;
10390 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10391 }
10392 return MFunc;
10393 };
10394 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10395 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10396 IsNonContiguous, ForEndCall, DeviceAddrCB));
10397}
10398
10399/// Check for inner distribute directive.
10400static const OMPExecutableDirective *
10402 const auto *CS = D.getInnermostCapturedStmt();
10403 const auto *Body =
10404 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10405 const Stmt *ChildStmt =
10407
10408 if (const auto *NestedDir =
10409 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10410 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10411 switch (D.getDirectiveKind()) {
10412 case OMPD_target:
10413 // For now, treat 'target' with nested 'teams loop' as if it's
10414 // distributed (target teams distribute).
10415 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10416 return NestedDir;
10417 if (DKind == OMPD_teams) {
10418 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10419 /*IgnoreCaptured=*/true);
10420 if (!Body)
10421 return nullptr;
10422 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10423 if (const auto *NND =
10424 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10425 DKind = NND->getDirectiveKind();
10426 if (isOpenMPDistributeDirective(DKind))
10427 return NND;
10428 }
10429 }
10430 return nullptr;
10431 case OMPD_target_teams:
10432 if (isOpenMPDistributeDirective(DKind))
10433 return NestedDir;
10434 return nullptr;
10435 case OMPD_target_parallel:
10436 case OMPD_target_simd:
10437 case OMPD_target_parallel_for:
10438 case OMPD_target_parallel_for_simd:
10439 return nullptr;
10440 case OMPD_target_teams_distribute:
10441 case OMPD_target_teams_distribute_simd:
10442 case OMPD_target_teams_distribute_parallel_for:
10443 case OMPD_target_teams_distribute_parallel_for_simd:
10444 case OMPD_parallel:
10445 case OMPD_for:
10446 case OMPD_parallel_for:
10447 case OMPD_parallel_master:
10448 case OMPD_parallel_sections:
10449 case OMPD_for_simd:
10450 case OMPD_parallel_for_simd:
10451 case OMPD_cancel:
10452 case OMPD_cancellation_point:
10453 case OMPD_ordered:
10454 case OMPD_threadprivate:
10455 case OMPD_allocate:
10456 case OMPD_task:
10457 case OMPD_simd:
10458 case OMPD_tile:
10459 case OMPD_unroll:
10460 case OMPD_sections:
10461 case OMPD_section:
10462 case OMPD_single:
10463 case OMPD_master:
10464 case OMPD_critical:
10465 case OMPD_taskyield:
10466 case OMPD_barrier:
10467 case OMPD_taskwait:
10468 case OMPD_taskgroup:
10469 case OMPD_atomic:
10470 case OMPD_flush:
10471 case OMPD_depobj:
10472 case OMPD_scan:
10473 case OMPD_teams:
10474 case OMPD_target_data:
10475 case OMPD_target_exit_data:
10476 case OMPD_target_enter_data:
10477 case OMPD_distribute:
10478 case OMPD_distribute_simd:
10479 case OMPD_distribute_parallel_for:
10480 case OMPD_distribute_parallel_for_simd:
10481 case OMPD_teams_distribute:
10482 case OMPD_teams_distribute_simd:
10483 case OMPD_teams_distribute_parallel_for:
10484 case OMPD_teams_distribute_parallel_for_simd:
10485 case OMPD_target_update:
10486 case OMPD_declare_simd:
10487 case OMPD_declare_variant:
10488 case OMPD_begin_declare_variant:
10489 case OMPD_end_declare_variant:
10490 case OMPD_declare_target:
10491 case OMPD_end_declare_target:
10492 case OMPD_declare_reduction:
10493 case OMPD_declare_mapper:
10494 case OMPD_taskloop:
10495 case OMPD_taskloop_simd:
10496 case OMPD_master_taskloop:
10497 case OMPD_master_taskloop_simd:
10498 case OMPD_parallel_master_taskloop:
10499 case OMPD_parallel_master_taskloop_simd:
10500 case OMPD_requires:
10501 case OMPD_metadirective:
10502 case OMPD_unknown:
10503 default:
10504 llvm_unreachable("Unexpected directive.");
10505 }
10506 }
10507
10508 return nullptr;
10509}
10510
10511/// Emit the user-defined mapper function. The code generation follows the
10512/// pattern in the example below.
10513/// \code
10514/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10515/// void *base, void *begin,
10516/// int64_t size, int64_t type,
10517/// void *name = nullptr) {
10518/// // Allocate space for an array section first.
10519/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10520/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10521/// size*sizeof(Ty), clearToFromMember(type));
10522/// // Map members.
10523/// for (unsigned i = 0; i < size; i++) {
10524/// // For each component specified by this mapper:
10525/// for (auto c : begin[i]->all_components) {
10526/// if (c.hasMapper())
10527/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10528/// c.arg_type, c.arg_name);
10529/// else
10530/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10531/// c.arg_begin, c.arg_size, c.arg_type,
10532/// c.arg_name);
10533/// }
10534/// }
10535/// // Delete the array section.
10536/// if (size > 1 && maptype.IsDelete)
10537/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10538/// size*sizeof(Ty), clearToFromMember(type));
10539/// }
10540/// \endcode
10542 CodeGenFunction *CGF) {
10543 if (UDMMap.count(D) > 0)
10544 return;
10545 ASTContext &C = CGM.getContext();
10546 QualType Ty = D->getType();
10547 auto *MapperVarDecl =
10549 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10550 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10551
10552 CodeGenFunction MapperCGF(CGM);
10553 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10554 auto PrivatizeAndGenMapInfoCB =
10555 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10556 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10557 MapperCGF.Builder.restoreIP(CodeGenIP);
10558
10559 // Privatize the declared variable of mapper to be the current array
10560 // element.
10561 Address PtrCurrent(
10562 PtrPHI, ElemTy,
10563 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10564 .getAlignment()
10565 .alignmentOfArrayElement(ElementSize));
10567 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10568 (void)Scope.Privatize();
10569
10570 // Get map clause information.
10571 MappableExprsHandler MEHandler(*D, MapperCGF);
10572 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10573
10574 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10575 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10576 };
10577 if (CGM.getCodeGenOpts().getDebugInfo() !=
10578 llvm::codegenoptions::NoDebugInfo) {
10579 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10580 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10581 FillInfoMap);
10582 }
10583
10584 return CombinedInfo;
10585 };
10586
10587 auto CustomMapperCB = [&](unsigned I) {
10588 llvm::Function *MapperFunc = nullptr;
10589 if (CombinedInfo.Mappers[I]) {
10590 // Call the corresponding mapper function.
10592 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10593 assert(MapperFunc && "Expect a valid mapper function is available.");
10594 }
10595 return MapperFunc;
10596 };
10597
10598 SmallString<64> TyStr;
10599 llvm::raw_svector_ostream Out(TyStr);
10600 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10601 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10602
10603 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10604 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10605 UDMMap.try_emplace(D, NewFn);
10606 if (CGF)
10607 FunctionUDMMap[CGF->CurFn].push_back(D);
10608}
10609
10611 const OMPDeclareMapperDecl *D) {
10612 auto I = UDMMap.find(D);
10613 if (I != UDMMap.end())
10614 return I->second;
10616 return UDMMap.lookup(D);
10617}
10618
10621 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10622 const OMPLoopDirective &D)>
10623 SizeEmitter) {
10624 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10625 const OMPExecutableDirective *TD = &D;
10626 // Get nested teams distribute kind directive, if any. For now, treat
10627 // 'target_teams_loop' as if it's really a target_teams_distribute.
10628 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10629 Kind != OMPD_target_teams_loop)
10630 TD = getNestedDistributeDirective(CGM.getContext(), D);
10631 if (!TD)
10632 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10633
10634 const auto *LD = cast<OMPLoopDirective>(TD);
10635 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10636 return NumIterations;
10637 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10638}
10639
10640static void
10641emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10642 const OMPExecutableDirective &D,
10644 bool RequiresOuterTask, const CapturedStmt &CS,
10645 bool OffloadingMandatory, CodeGenFunction &CGF) {
10646 if (OffloadingMandatory) {
10647 CGF.Builder.CreateUnreachable();
10648 } else {
10649 if (RequiresOuterTask) {
10650 CapturedVars.clear();
10651 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10652 }
10653 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10654 CapturedVars.end());
10655 Args.push_back(llvm::Constant::getNullValue(CGF.Builder.getPtrTy()));
10656 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10657 Args);
10658 }
10659}
10660
10661static llvm::Value *emitDeviceID(
10662 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10663 CodeGenFunction &CGF) {
10664 // Emit device ID if any.
10665 llvm::Value *DeviceID;
10666 if (Device.getPointer()) {
10667 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10668 Device.getInt() == OMPC_DEVICE_device_num) &&
10669 "Expected device_num modifier.");
10670 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10671 DeviceID =
10672 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10673 } else {
10674 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10675 }
10676 return DeviceID;
10677}
10678
10679static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10681 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10682 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10683
10684 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10685 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10686 llvm::Value *DynGPVal =
10687 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10688 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10689 /*isSigned=*/false);
10690 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10691 switch (FallbackModifier) {
10692 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10693 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10694 break;
10695 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10696 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10697 break;
10698 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10700 // This is the default for dyn_groupprivate.
10701 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10702 break;
10703 default:
10704 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10705 }
10706 } else if (auto *OMPXDynCGClause =
10707 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10708 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10709 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10710 /*IgnoreResultAssign=*/true);
10711 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10712 /*isSigned=*/false);
10713 }
10714 return {DynGP, DynGPFallback};
10715}
10716
10718 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10719 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10720 llvm::OpenMPIRBuilder &OMPBuilder,
10721 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10722 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10723
10724 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10725 auto RI = CS.getCapturedRecordDecl()->field_begin();
10726 auto *CV = CapturedVars.begin();
10728 CE = CS.capture_end();
10729 CI != CE; ++CI, ++RI, ++CV) {
10730 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10731
10732 // VLA sizes are passed to the outlined region by copy and do not have map
10733 // information associated.
10734 if (CI->capturesVariableArrayType()) {
10735 CurInfo.Exprs.push_back(nullptr);
10736 CurInfo.BasePointers.push_back(*CV);
10737 CurInfo.DevicePtrDecls.push_back(nullptr);
10738 CurInfo.DevicePointers.push_back(
10739 MappableExprsHandler::DeviceInfoTy::None);
10740 CurInfo.Pointers.push_back(*CV);
10741 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10742 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10743 // Copy to the device as an argument. No need to retrieve it.
10744 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10745 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10746 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10747 CurInfo.Mappers.push_back(nullptr);
10748 } else {
10749 const ValueDecl *CapturedVD =
10750 CI->capturesThis() ? nullptr
10752 bool HasEntryWithCVAsAttachPtr = false;
10753 if (CapturedVD)
10754 HasEntryWithCVAsAttachPtr =
10755 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10756
10757 // Populate component lists for the captured variable from clauses.
10758 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10761 StorageForImplicitlyAddedComponentLists;
10762 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10763 CapturedVD, DeclComponentLists,
10764 StorageForImplicitlyAddedComponentLists);
10765
10766 // OpenMP 6.0, 15.8, target construct, restrictions:
10767 // * A list item in a map clause that is specified on a target construct
10768 // must have a base variable or base pointer.
10769 //
10770 // Map clauses on a target construct must either have a base pointer, or a
10771 // base-variable. So, if we don't have a base-pointer, that means that it
10772 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10773 // etc. In such cases, we do not need to handle default map generation
10774 // for `s`.
10775 bool HasEntryWithoutAttachPtr =
10776 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10778 Components = std::get<0>(MapData);
10779 return !MEHandler.getAttachPtrExpr(Components);
10780 });
10781
10782 // Generate default map info first if there's no direct map with CV as
10783 // the base-variable, or attach pointer.
10784 if (DeclComponentLists.empty() ||
10785 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10786 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10787
10788 // If we have any information in the map clause, we use it, otherwise we
10789 // just do a default mapping.
10790 MEHandler.generateInfoForCaptureFromClauseInfo(
10791 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10792 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10793
10794 if (!CI->capturesThis())
10795 MappedVarSet.insert(CI->getCapturedVar());
10796 else
10797 MappedVarSet.insert(nullptr);
10798
10799 // Generate correct mapping for variables captured by reference in
10800 // lambdas.
10801 if (CI->capturesVariable())
10802 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10803 CurInfo, LambdaPointers);
10804 }
10805 // We expect to have at least an element of information for this capture.
10806 assert(!CurInfo.BasePointers.empty() &&
10807 "Non-existing map pointer for capture!");
10808 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10809 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10810 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10811 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10812 "Inconsistent map information sizes!");
10813
10814 // We need to append the results of this capture to what we already have.
10815 CombinedInfo.append(CurInfo);
10816 }
10817 // Adjust MEMBER_OF flags for the lambdas captures.
10818 MEHandler.adjustMemberOfForLambdaCaptures(
10819 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10820 CombinedInfo.Pointers, CombinedInfo.Types);
10821}
10822static void
10823genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10824 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10825 llvm::OpenMPIRBuilder &OMPBuilder,
10826 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10827 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10828
10829 CodeGenModule &CGM = CGF.CGM;
10830 // Map any list items in a map clause that were not captures because they
10831 // weren't referenced within the construct.
10832 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10833
10834 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10835 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10836 };
10837 if (CGM.getCodeGenOpts().getDebugInfo() !=
10838 llvm::codegenoptions::NoDebugInfo) {
10839 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10840 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10841 FillInfoMap);
10842 }
10843}
10844
10846 const CapturedStmt &CS,
10848 llvm::OpenMPIRBuilder &OMPBuilder,
10849 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10850 // Get mappable expression information.
10851 MappableExprsHandler MEHandler(D, CGF);
10852 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10853
10854 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10855 MappedVarSet, CombinedInfo);
10856 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10857}
10858
10859template <typename ClauseTy>
10860static void
10862 const OMPExecutableDirective &D,
10864 const auto *C = D.getSingleClause<ClauseTy>();
10865 assert(!C->varlist_empty() &&
10866 "ompx_bare requires explicit num_teams and thread_limit");
10868 for (auto *E : C->varlist()) {
10869 llvm::Value *V = CGF.EmitScalarExpr(E);
10870 Values.push_back(
10871 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10872 }
10873}
10874
10876 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10877 const OMPExecutableDirective &D,
10878 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10879 const CapturedStmt &CS, bool OffloadingMandatory,
10880 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10881 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10882 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10883 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10884 const OMPLoopDirective &D)>
10885 SizeEmitter,
10886 CodeGenFunction &CGF, CodeGenModule &CGM) {
10887 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10888
10889 // Fill up the arrays with all the captured variables.
10890 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10892 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10893
10894 // Append a null entry for the implicit dyn_ptr argument.
10895 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10896 auto *NullPtr = llvm::Constant::getNullValue(CGF.Builder.getPtrTy());
10897 CombinedInfo.BasePointers.push_back(NullPtr);
10898 CombinedInfo.Pointers.push_back(NullPtr);
10899 CombinedInfo.DevicePointers.push_back(
10900 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10901 CombinedInfo.Sizes.push_back(CGF.Builder.getInt64(0));
10902 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10903 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10904 if (!CombinedInfo.Names.empty())
10905 CombinedInfo.Names.push_back(NullPtr);
10906 CombinedInfo.Exprs.push_back(nullptr);
10907 CombinedInfo.Mappers.push_back(nullptr);
10908 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10909
10910 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10911 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10912
10913 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10914 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10915 CGF.VoidPtrTy, CGM.getPointerAlign());
10916 InputInfo.PointersArray =
10917 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10918 InputInfo.SizesArray =
10919 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10920 InputInfo.MappersArray =
10921 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10922 MapTypesArray = Info.RTArgs.MapTypesArray;
10923 MapNamesArray = Info.RTArgs.MapNamesArray;
10924
10925 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10926 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10927 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10928 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10929 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10930
10931 if (IsReverseOffloading) {
10932 // Reverse offloading is not supported, so just execute on the host.
10933 // FIXME: This fallback solution is incorrect since it ignores the
10934 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10935 // assert here and ensure SEMA emits an error.
10936 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10937 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10938 return;
10939 }
10940
10941 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10942 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10943
10944 llvm::Value *BasePointersArray =
10945 InputInfo.BasePointersArray.emitRawPointer(CGF);
10946 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10947 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10948 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10949
10950 auto &&EmitTargetCallFallbackCB =
10951 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10952 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10953 -> llvm::OpenMPIRBuilder::InsertPointTy {
10954 CGF.Builder.restoreIP(IP);
10955 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10956 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10957 return CGF.Builder.saveIP();
10958 };
10959
10960 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10963 if (IsBare) {
10966 NumThreads);
10967 } else {
10968 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10969 NumThreads.push_back(
10970 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10971 }
10972
10973 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10974 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10975 llvm::Value *NumIterations =
10976 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10977 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10978 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10979 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10980
10981 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10982 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10983 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10984
10985 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10986 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10987 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10988
10989 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10990 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10991 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10992 RTLoc, AllocaIP));
10993 CGF.Builder.restoreIP(AfterIP);
10994 };
10995
10996 if (RequiresOuterTask)
10997 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10998 else
10999 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11000}
11001
11002static void
11003emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
11004 const OMPExecutableDirective &D,
11006 bool RequiresOuterTask, const CapturedStmt &CS,
11007 bool OffloadingMandatory, CodeGenFunction &CGF) {
11008
11009 // Notify that the host version must be executed.
11010 auto &&ElseGen =
11011 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11012 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11013 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11014 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11015 };
11016
11017 if (RequiresOuterTask) {
11019 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
11020 } else {
11021 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
11022 }
11023}
11024
11027 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11028 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11029 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11030 const OMPLoopDirective &D)>
11031 SizeEmitter) {
11032 if (!CGF.HaveInsertPoint())
11033 return;
11034
11035 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11036 CGM.getLangOpts().OpenMPOffloadMandatory;
11037
11038 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11039
11040 const bool RequiresOuterTask =
11041 D.hasClausesOfKind<OMPDependClause>() ||
11042 D.hasClausesOfKind<OMPNowaitClause>() ||
11043 D.hasClausesOfKind<OMPInReductionClause>() ||
11044 (CGM.getLangOpts().OpenMP >= 51 &&
11045 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
11046 D.hasClausesOfKind<OMPThreadLimitClause>());
11048 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
11049 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11050 PrePostActionTy &) {
11051 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
11052 };
11053 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
11054
11056 llvm::Value *MapTypesArray = nullptr;
11057 llvm::Value *MapNamesArray = nullptr;
11058
11059 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11060 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11061 OutlinedFnID, &InputInfo, &MapTypesArray,
11062 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11063 PrePostActionTy &) {
11064 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
11065 RequiresOuterTask, CS, OffloadingMandatory,
11066 Device, OutlinedFnID, InputInfo, MapTypesArray,
11067 MapNamesArray, SizeEmitter, CGF, CGM);
11068 };
11069
11070 auto &&TargetElseGen =
11071 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11072 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11073 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11074 CS, OffloadingMandatory, CGF);
11075 };
11076
11077 // If we have a target function ID it means that we need to support
11078 // offloading, otherwise, just execute on the host. We need to execute on host
11079 // regardless of the conditional in the if clause if, e.g., the user do not
11080 // specify target triples.
11081 if (OutlinedFnID) {
11082 if (IfCond) {
11083 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
11084 } else {
11085 RegionCodeGenTy ThenRCG(TargetThenGen);
11086 ThenRCG(CGF);
11087 }
11088 } else {
11089 RegionCodeGenTy ElseRCG(TargetElseGen);
11090 ElseRCG(CGF);
11091 }
11092}
11093
11095 StringRef ParentName) {
11096 if (!S)
11097 return;
11098
11099 // Register vtable from device for target data and target directives.
11100 // Add this block here since scanForTargetRegionsFunctions ignores
11101 // target data by checking if S is a executable directive (target).
11102 if (auto *E = dyn_cast<OMPExecutableDirective>(S);
11103 E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
11104 // Don't need to check if it's device compile
11105 // since scanForTargetRegionsFunctions currently only called
11106 // in device compilation.
11107 registerVTable(*E);
11108 }
11109
11110 // Codegen OMP target directives that offload compute to the device.
11111 bool RequiresDeviceCodegen =
11114 cast<OMPExecutableDirective>(S)->getDirectiveKind());
11115
11116 if (RequiresDeviceCodegen) {
11117 const auto &E = *cast<OMPExecutableDirective>(S);
11118
11119 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11120 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
11121
11122 // Is this a target region that should not be emitted as an entry point? If
11123 // so just signal we are done with this target region.
11124 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11125 return;
11126
11127 switch (E.getDirectiveKind()) {
11128 case OMPD_target:
11131 break;
11132 case OMPD_target_parallel:
11134 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
11135 break;
11136 case OMPD_target_teams:
11138 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
11139 break;
11140 case OMPD_target_teams_distribute:
11143 break;
11144 case OMPD_target_teams_distribute_simd:
11147 break;
11148 case OMPD_target_parallel_for:
11151 break;
11152 case OMPD_target_parallel_for_simd:
11155 break;
11156 case OMPD_target_simd:
11158 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
11159 break;
11160 case OMPD_target_teams_distribute_parallel_for:
11162 CGM, ParentName,
11164 break;
11165 case OMPD_target_teams_distribute_parallel_for_simd:
11168 CGM, ParentName,
11170 break;
11171 case OMPD_target_teams_loop:
11174 break;
11175 case OMPD_target_parallel_loop:
11178 break;
11179 case OMPD_parallel:
11180 case OMPD_for:
11181 case OMPD_parallel_for:
11182 case OMPD_parallel_master:
11183 case OMPD_parallel_sections:
11184 case OMPD_for_simd:
11185 case OMPD_parallel_for_simd:
11186 case OMPD_cancel:
11187 case OMPD_cancellation_point:
11188 case OMPD_ordered:
11189 case OMPD_threadprivate:
11190 case OMPD_allocate:
11191 case OMPD_task:
11192 case OMPD_simd:
11193 case OMPD_tile:
11194 case OMPD_unroll:
11195 case OMPD_sections:
11196 case OMPD_section:
11197 case OMPD_single:
11198 case OMPD_master:
11199 case OMPD_critical:
11200 case OMPD_taskyield:
11201 case OMPD_barrier:
11202 case OMPD_taskwait:
11203 case OMPD_taskgroup:
11204 case OMPD_atomic:
11205 case OMPD_flush:
11206 case OMPD_depobj:
11207 case OMPD_scan:
11208 case OMPD_teams:
11209 case OMPD_target_data:
11210 case OMPD_target_exit_data:
11211 case OMPD_target_enter_data:
11212 case OMPD_distribute:
11213 case OMPD_distribute_simd:
11214 case OMPD_distribute_parallel_for:
11215 case OMPD_distribute_parallel_for_simd:
11216 case OMPD_teams_distribute:
11217 case OMPD_teams_distribute_simd:
11218 case OMPD_teams_distribute_parallel_for:
11219 case OMPD_teams_distribute_parallel_for_simd:
11220 case OMPD_target_update:
11221 case OMPD_declare_simd:
11222 case OMPD_declare_variant:
11223 case OMPD_begin_declare_variant:
11224 case OMPD_end_declare_variant:
11225 case OMPD_declare_target:
11226 case OMPD_end_declare_target:
11227 case OMPD_declare_reduction:
11228 case OMPD_declare_mapper:
11229 case OMPD_taskloop:
11230 case OMPD_taskloop_simd:
11231 case OMPD_master_taskloop:
11232 case OMPD_master_taskloop_simd:
11233 case OMPD_parallel_master_taskloop:
11234 case OMPD_parallel_master_taskloop_simd:
11235 case OMPD_requires:
11236 case OMPD_metadirective:
11237 case OMPD_unknown:
11238 default:
11239 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11240 }
11241 return;
11242 }
11243
11244 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11245 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11246 return;
11247
11248 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11249 return;
11250 }
11251
11252 // If this is a lambda function, look into its body.
11253 if (const auto *L = dyn_cast<LambdaExpr>(S))
11254 S = L->getBody();
11255
11256 // Keep looking for target regions recursively.
11257 for (const Stmt *II : S->children())
11258 scanForTargetRegionsFunctions(II, ParentName);
11259}
11260
11261static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11262 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11263 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11264 if (!DevTy)
11265 return false;
11266 // Do not emit device_type(nohost) functions for the host.
11267 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11268 return true;
11269 // Do not emit device_type(host) functions for the device.
11270 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11271 return true;
11272 return false;
11273}
11274
11276 // If emitting code for the host, we do not process FD here. Instead we do
11277 // the normal code generation.
11278 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11279 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11281 CGM.getLangOpts().OpenMPIsTargetDevice))
11282 return true;
11283 return false;
11284 }
11285
11286 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11287 // Try to detect target regions in the function.
11288 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11289 StringRef Name = CGM.getMangledName(GD);
11292 CGM.getLangOpts().OpenMPIsTargetDevice))
11293 return true;
11294 }
11295
11296 // Do not to emit function if it is not marked as declare target.
11297 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11298 AlreadyEmittedTargetDecls.count(VD) == 0;
11299}
11300
11303 CGM.getLangOpts().OpenMPIsTargetDevice))
11304 return true;
11305
11306 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11307 return false;
11308
11309 // Check if there are Ctors/Dtors in this declaration and look for target
11310 // regions in it. We use the complete variant to produce the kernel name
11311 // mangling.
11312 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11313 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11314 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11315 StringRef ParentName =
11316 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11317 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11318 }
11319 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11320 StringRef ParentName =
11321 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11322 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11323 }
11324 }
11325
11326 // Do not to emit variable if it is not marked as declare target.
11327 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11328 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11329 cast<VarDecl>(GD.getDecl()));
11330 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11331 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11332 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11335 return true;
11336 }
11337 return false;
11338}
11339
11341 llvm::Constant *Addr) {
11342 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11343 !CGM.getLangOpts().OpenMPIsTargetDevice)
11344 return;
11345
11346 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11347 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11348
11349 // If this is an 'extern' declaration we defer to the canonical definition and
11350 // do not emit an offloading entry.
11351 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11352 VD->hasExternalStorage())
11353 return;
11354
11355 // MT_Local variables use direct access with no host-device mapping.
11356 // No offload entry needed — the device global keeps its own initializer.
11357 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local)
11358 return;
11359
11360 if (!Res) {
11361 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11362 // Register non-target variables being emitted in device code (debug info
11363 // may cause this).
11364 StringRef VarName = CGM.getMangledName(VD);
11365 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11366 }
11367 return;
11368 }
11369
11370 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11371 auto LinkageForVariable = [&VD, this]() {
11372 return CGM.getLLVMLinkageVarDefinition(VD);
11373 };
11374
11375 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11376 OMPBuilder.registerTargetGlobalVariable(
11378 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11379 VD->isExternallyVisible(),
11381 VD->getCanonicalDecl()->getBeginLoc()),
11382 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11383 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11384 CGM.getTypes().ConvertTypeForMem(
11385 CGM.getContext().getPointerType(VD->getType())),
11386 Addr);
11387
11388 for (auto *ref : GeneratedRefs)
11389 CGM.addCompilerUsedGlobal(ref);
11390}
11391
11393 if (isa<FunctionDecl>(GD.getDecl()) ||
11395 return emitTargetFunctions(GD);
11396
11397 return emitTargetGlobalVariable(GD);
11398}
11399
11401 for (const VarDecl *VD : DeferredGlobalVariables) {
11402 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11403 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11404 if (!Res)
11405 continue;
11406 // MT_Local and MT_To/MT_Enter without USM are always emitted.
11407 if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
11408 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11409 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11411 CGM.EmitGlobal(VD);
11412 } else {
11413 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11414 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11415 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11416 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11418 "Expected link clause or to clause with unified memory.");
11419 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11420 }
11421 }
11422}
11423
11425 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11426 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11427 " Expected target-based directive.");
11428}
11429
11431 for (const OMPClause *Clause : D->clauselists()) {
11432 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11434 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11435 } else if (const auto *AC =
11436 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11437 switch (AC->getAtomicDefaultMemOrderKind()) {
11438 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11439 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11440 break;
11441 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11442 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11443 break;
11444 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11445 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11446 break;
11448 break;
11449 }
11450 }
11451 }
11452}
11453
11454llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11456}
11457
11459 LangAS &AS) {
11460 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11461 return false;
11462 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11463 switch(A->getAllocatorType()) {
11464 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11465 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11466 // Not supported, fallback to the default mem space.
11467 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11468 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11469 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11470 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11471 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11472 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11473 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11474 AS = LangAS::Default;
11475 return true;
11476 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11477 llvm_unreachable("Expected predefined allocator for the variables with the "
11478 "static storage.");
11479 }
11480 return false;
11481}
11482
11486
11488 CodeGenModule &CGM)
11489 : CGM(CGM) {
11490 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11491 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11492 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11493 }
11494}
11495
11497 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11498 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11499}
11500
11502 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11503 return true;
11504
11505 const auto *D = cast<FunctionDecl>(GD.getDecl());
11506 // Do not to emit function if it is marked as declare target as it was already
11507 // emitted.
11508 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11509 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11510 if (auto *F = dyn_cast_or_null<llvm::Function>(
11511 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11512 return !F->isDeclaration();
11513 return false;
11514 }
11515 return true;
11516 }
11517
11518 return !AlreadyEmittedTargetDecls.insert(D).second;
11519}
11520
11522 const OMPExecutableDirective &D,
11523 SourceLocation Loc,
11524 llvm::Function *OutlinedFn,
11525 ArrayRef<llvm::Value *> CapturedVars) {
11526 if (!CGF.HaveInsertPoint())
11527 return;
11528
11529 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11531
11532 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11533 llvm::Value *Args[] = {
11534 RTLoc,
11535 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11536 OutlinedFn};
11538 RealArgs.append(std::begin(Args), std::end(Args));
11539 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11540
11541 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11542 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11543 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11544}
11545
11547 const Expr *NumTeams,
11548 const Expr *ThreadLimit,
11549 SourceLocation Loc) {
11550 if (!CGF.HaveInsertPoint())
11551 return;
11552
11553 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11554
11555 llvm::Value *NumTeamsVal =
11556 NumTeams
11557 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11558 CGF.CGM.Int32Ty, /* isSigned = */ true)
11559 : CGF.Builder.getInt32(0);
11560
11561 llvm::Value *ThreadLimitVal =
11562 ThreadLimit
11563 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11564 CGF.CGM.Int32Ty, /* isSigned = */ true)
11565 : CGF.Builder.getInt32(0);
11566
11567 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11568 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11569 ThreadLimitVal};
11570 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11571 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11572 PushNumTeamsArgs);
11573}
11574
11576 const Expr *ThreadLimit,
11577 SourceLocation Loc) {
11578 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11579 llvm::Value *ThreadLimitVal =
11580 ThreadLimit
11581 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11582 CGF.CGM.Int32Ty, /* isSigned = */ true)
11583 : CGF.Builder.getInt32(0);
11584
11585 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11586 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11587 ThreadLimitVal};
11588 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11589 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11590 ThreadLimitArgs);
11591}
11592
11594 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11595 const Expr *Device, const RegionCodeGenTy &CodeGen,
11597 if (!CGF.HaveInsertPoint())
11598 return;
11599
11600 // Action used to replace the default codegen action and turn privatization
11601 // off.
11602 PrePostActionTy NoPrivAction;
11603
11604 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11605
11606 llvm::Value *IfCondVal = nullptr;
11607 if (IfCond)
11608 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11609
11610 // Emit device ID if any.
11611 llvm::Value *DeviceID = nullptr;
11612 if (Device) {
11613 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11614 CGF.Int64Ty, /*isSigned=*/true);
11615 } else {
11616 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11617 }
11618
11619 // Fill up the arrays with all the mapped variables.
11620 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11621 auto GenMapInfoCB =
11622 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11623 CGF.Builder.restoreIP(CodeGenIP);
11624 // Get map clause information.
11625 MappableExprsHandler MEHandler(D, CGF);
11626 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11627
11628 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11629 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11630 };
11631 if (CGM.getCodeGenOpts().getDebugInfo() !=
11632 llvm::codegenoptions::NoDebugInfo) {
11633 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11634 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11635 FillInfoMap);
11636 }
11637
11638 return CombinedInfo;
11639 };
11640 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11641 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11642 CGF.Builder.restoreIP(CodeGenIP);
11643 switch (BodyGenType) {
11644 case BodyGenTy::Priv:
11645 if (!Info.CaptureDeviceAddrMap.empty())
11646 CodeGen(CGF);
11647 break;
11648 case BodyGenTy::DupNoPriv:
11649 if (!Info.CaptureDeviceAddrMap.empty()) {
11650 CodeGen.setAction(NoPrivAction);
11651 CodeGen(CGF);
11652 }
11653 break;
11654 case BodyGenTy::NoPriv:
11655 if (Info.CaptureDeviceAddrMap.empty()) {
11656 CodeGen.setAction(NoPrivAction);
11657 CodeGen(CGF);
11658 }
11659 break;
11660 }
11661 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11662 CGF.Builder.GetInsertPoint());
11663 };
11664
11665 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11666 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11667 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11668 }
11669 };
11670
11671 auto CustomMapperCB = [&](unsigned int I) {
11672 llvm::Function *MFunc = nullptr;
11673 if (CombinedInfo.Mappers[I]) {
11674 Info.HasMapper = true;
11676 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11677 }
11678 return MFunc;
11679 };
11680
11681 // Source location for the ident struct
11682 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11683
11684 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11685 CGF.AllocaInsertPt->getIterator());
11686 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11687 CGF.Builder.GetInsertPoint());
11688 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11689 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11690 cantFail(OMPBuilder.createTargetData(
11691 OmpLoc, AllocaIP, CodeGenIP, /*DeallocBlocks=*/{}, DeviceID,
11692 IfCondVal, Info, GenMapInfoCB, CustomMapperCB,
11693 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11694 CGF.Builder.restoreIP(AfterIP);
11695}
11696
11698 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11699 const Expr *Device) {
11700 if (!CGF.HaveInsertPoint())
11701 return;
11702
11706 "Expecting either target enter, exit data, or update directives.");
11707
11709 llvm::Value *MapTypesArray = nullptr;
11710 llvm::Value *MapNamesArray = nullptr;
11711 // Generate the code for the opening of the data environment.
11712 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11713 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11714 // Emit device ID if any.
11715 llvm::Value *DeviceID = nullptr;
11716 if (Device) {
11717 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11718 CGF.Int64Ty, /*isSigned=*/true);
11719 } else {
11720 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11721 }
11722
11723 // Emit the number of elements in the offloading arrays.
11724 llvm::Constant *PointerNum =
11725 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11726
11727 // Source location for the ident struct
11728 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11729
11730 SmallVector<llvm::Value *, 13> OffloadingArgs(
11731 {RTLoc, DeviceID, PointerNum,
11732 InputInfo.BasePointersArray.emitRawPointer(CGF),
11733 InputInfo.PointersArray.emitRawPointer(CGF),
11734 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11735 InputInfo.MappersArray.emitRawPointer(CGF)});
11736
11737 // Select the right runtime function call for each standalone
11738 // directive.
11739 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11740 RuntimeFunction RTLFn;
11741 switch (D.getDirectiveKind()) {
11742 case OMPD_target_enter_data:
11743 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11744 : OMPRTL___tgt_target_data_begin_mapper;
11745 break;
11746 case OMPD_target_exit_data:
11747 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11748 : OMPRTL___tgt_target_data_end_mapper;
11749 break;
11750 case OMPD_target_update:
11751 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11752 : OMPRTL___tgt_target_data_update_mapper;
11753 break;
11754 case OMPD_parallel:
11755 case OMPD_for:
11756 case OMPD_parallel_for:
11757 case OMPD_parallel_master:
11758 case OMPD_parallel_sections:
11759 case OMPD_for_simd:
11760 case OMPD_parallel_for_simd:
11761 case OMPD_cancel:
11762 case OMPD_cancellation_point:
11763 case OMPD_ordered:
11764 case OMPD_threadprivate:
11765 case OMPD_allocate:
11766 case OMPD_task:
11767 case OMPD_simd:
11768 case OMPD_tile:
11769 case OMPD_unroll:
11770 case OMPD_sections:
11771 case OMPD_section:
11772 case OMPD_single:
11773 case OMPD_master:
11774 case OMPD_critical:
11775 case OMPD_taskyield:
11776 case OMPD_barrier:
11777 case OMPD_taskwait:
11778 case OMPD_taskgroup:
11779 case OMPD_atomic:
11780 case OMPD_flush:
11781 case OMPD_depobj:
11782 case OMPD_scan:
11783 case OMPD_teams:
11784 case OMPD_target_data:
11785 case OMPD_distribute:
11786 case OMPD_distribute_simd:
11787 case OMPD_distribute_parallel_for:
11788 case OMPD_distribute_parallel_for_simd:
11789 case OMPD_teams_distribute:
11790 case OMPD_teams_distribute_simd:
11791 case OMPD_teams_distribute_parallel_for:
11792 case OMPD_teams_distribute_parallel_for_simd:
11793 case OMPD_declare_simd:
11794 case OMPD_declare_variant:
11795 case OMPD_begin_declare_variant:
11796 case OMPD_end_declare_variant:
11797 case OMPD_declare_target:
11798 case OMPD_end_declare_target:
11799 case OMPD_declare_reduction:
11800 case OMPD_declare_mapper:
11801 case OMPD_taskloop:
11802 case OMPD_taskloop_simd:
11803 case OMPD_master_taskloop:
11804 case OMPD_master_taskloop_simd:
11805 case OMPD_parallel_master_taskloop:
11806 case OMPD_parallel_master_taskloop_simd:
11807 case OMPD_target:
11808 case OMPD_target_simd:
11809 case OMPD_target_teams_distribute:
11810 case OMPD_target_teams_distribute_simd:
11811 case OMPD_target_teams_distribute_parallel_for:
11812 case OMPD_target_teams_distribute_parallel_for_simd:
11813 case OMPD_target_teams:
11814 case OMPD_target_parallel:
11815 case OMPD_target_parallel_for:
11816 case OMPD_target_parallel_for_simd:
11817 case OMPD_requires:
11818 case OMPD_metadirective:
11819 case OMPD_unknown:
11820 default:
11821 llvm_unreachable("Unexpected standalone target data directive.");
11822 break;
11823 }
11824 if (HasNowait) {
11825 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11826 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11827 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11828 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11829 }
11830 CGF.EmitRuntimeCall(
11831 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11832 OffloadingArgs);
11833 };
11834
11835 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11836 &MapNamesArray](CodeGenFunction &CGF,
11837 PrePostActionTy &) {
11838 // Fill up the arrays with all the mapped variables.
11839 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11841 MappableExprsHandler MEHandler(D, CGF);
11842 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11843 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11844 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11845
11846 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11847 D.hasClausesOfKind<OMPNowaitClause>();
11848
11849 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11850 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11851 CGF.VoidPtrTy, CGM.getPointerAlign());
11852 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11853 CGM.getPointerAlign());
11854 InputInfo.SizesArray =
11855 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11856 InputInfo.MappersArray =
11857 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11858 MapTypesArray = Info.RTArgs.MapTypesArray;
11859 MapNamesArray = Info.RTArgs.MapNamesArray;
11860 if (RequiresOuterTask)
11861 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11862 else
11863 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11864 };
11865
11866 if (IfCond) {
11867 emitIfClause(CGF, IfCond, TargetThenGen,
11868 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11869 } else {
11870 RegionCodeGenTy ThenRCG(TargetThenGen);
11871 ThenRCG(CGF);
11872 }
11873}
11874
11875static unsigned
11878 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11879 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11880 // of that clause. The VLEN value must be power of 2.
11881 // In other case the notion of the function`s "characteristic data type" (CDT)
11882 // is used to compute the vector length.
11883 // CDT is defined in the following order:
11884 // a) For non-void function, the CDT is the return type.
11885 // b) If the function has any non-uniform, non-linear parameters, then the
11886 // CDT is the type of the first such parameter.
11887 // c) If the CDT determined by a) or b) above is struct, union, or class
11888 // type which is pass-by-value (except for the type that maps to the
11889 // built-in complex data type), the characteristic data type is int.
11890 // d) If none of the above three cases is applicable, the CDT is int.
11891 // The VLEN is then determined based on the CDT and the size of vector
11892 // register of that ISA for which current vector version is generated. The
11893 // VLEN is computed using the formula below:
11894 // VLEN = sizeof(vector_register) / sizeof(CDT),
11895 // where vector register size specified in section 3.2.1 Registers and the
11896 // Stack Frame of original AMD64 ABI document.
11897 QualType RetType = FD->getReturnType();
11898 if (RetType.isNull())
11899 return 0;
11900 ASTContext &C = FD->getASTContext();
11901 QualType CDT;
11902 if (!RetType.isNull() && !RetType->isVoidType()) {
11903 CDT = RetType;
11904 } else {
11905 unsigned Offset = 0;
11906 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11907 if (ParamAttrs[Offset].Kind ==
11908 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11909 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11910 ++Offset;
11911 }
11912 if (CDT.isNull()) {
11913 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11914 if (ParamAttrs[I + Offset].Kind ==
11915 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11916 CDT = FD->getParamDecl(I)->getType();
11917 break;
11918 }
11919 }
11920 }
11921 }
11922 if (CDT.isNull())
11923 CDT = C.IntTy;
11924 CDT = CDT->getCanonicalTypeUnqualified();
11925 if (CDT->isRecordType() || CDT->isUnionType())
11926 CDT = C.IntTy;
11927 return C.getTypeSize(CDT);
11928}
11929
11930// This are the Functions that are needed to mangle the name of the
11931// vector functions generated by the compiler, according to the rules
11932// defined in the "Vector Function ABI specifications for AArch64",
11933// available at
11934// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11935
11936/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11938 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11939 QT = QT.getCanonicalType();
11940
11941 if (QT->isVoidType())
11942 return false;
11943
11944 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11945 return false;
11946
11947 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11948 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11949 return false;
11950
11951 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11952 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11953 !QT->isReferenceType())
11954 return false;
11955
11956 return true;
11957}
11958
11959/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11961 QT = QT.getCanonicalType();
11962 unsigned Size = C.getTypeSize(QT);
11963
11964 // Only scalars and complex within 16 bytes wide set PVB to true.
11965 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11966 return false;
11967
11968 if (QT->isFloatingType())
11969 return true;
11970
11971 if (QT->isIntegerType())
11972 return true;
11973
11974 if (QT->isPointerType())
11975 return true;
11976
11977 // TODO: Add support for complex types (section 3.1.2, item 2).
11978
11979 return false;
11980}
11981
11982/// Computes the lane size (LS) of a return type or of an input parameter,
11983/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11984/// TODO: Add support for references, section 3.2.1, item 1.
11985static unsigned getAArch64LS(QualType QT,
11986 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
11987 ASTContext &C) {
11988 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11990 if (getAArch64PBV(PTy, C))
11991 return C.getTypeSize(PTy);
11992 }
11993 if (getAArch64PBV(QT, C))
11994 return C.getTypeSize(QT);
11995
11996 return C.getTypeSize(C.getUIntPtrType());
11997}
11998
11999// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12000// signature of the scalar function, as defined in 3.2.2 of the
12001// AAVFABI.
12002static std::tuple<unsigned, unsigned, bool>
12005 QualType RetType = FD->getReturnType().getCanonicalType();
12006
12007 ASTContext &C = FD->getASTContext();
12008
12009 bool OutputBecomesInput = false;
12010
12012 if (!RetType->isVoidType()) {
12013 Sizes.push_back(getAArch64LS(
12014 RetType, llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12015 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
12016 OutputBecomesInput = true;
12017 }
12018 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12020 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
12021 }
12022
12023 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12024 // The LS of a function parameter / return value can only be a power
12025 // of 2, starting from 8 bits, up to 128.
12026 assert(llvm::all_of(Sizes,
12027 [](unsigned Size) {
12028 return Size == 8 || Size == 16 || Size == 32 ||
12029 Size == 64 || Size == 128;
12030 }) &&
12031 "Invalid size");
12032
12033 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
12034 OutputBecomesInput);
12035}
12036
12037static llvm::OpenMPIRBuilder::DeclareSimdBranch
12038convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12039 switch (State) {
12040 case OMPDeclareSimdDeclAttr::BS_Undefined:
12041 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12042 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12043 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12044 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12045 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12046 }
12047 llvm_unreachable("unexpected declare simd branch state");
12048}
12049
12050// Check the values provided via `simdlen` by the user.
12052 unsigned UserVLEN, unsigned WDS, char ISA) {
12053 // 1. A `simdlen(1)` doesn't produce vector signatures.
12054 if (UserVLEN == 1) {
12055 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12056 return false;
12057 }
12058
12059 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12060 // SIMD.
12061 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12062 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12063 return false;
12064 }
12065
12066 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12067 if (ISA == 's' && UserVLEN != 0 &&
12068 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12069 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12070 return false;
12071 }
12072
12073 return true;
12074}
12075
12077 llvm::Function *Fn) {
12078 ASTContext &C = CGM.getContext();
12079 FD = FD->getMostRecentDecl();
12080 while (FD) {
12081 // Map params to their positions in function decl.
12082 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12083 if (isa<CXXMethodDecl>(FD))
12084 ParamPositions.try_emplace(FD, 0);
12085 unsigned ParamPos = ParamPositions.size();
12086 for (const ParmVarDecl *P : FD->parameters()) {
12087 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12088 ++ParamPos;
12089 }
12090 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12092 ParamPositions.size());
12093 // Mark uniform parameters.
12094 for (const Expr *E : Attr->uniforms()) {
12095 E = E->IgnoreParenImpCasts();
12096 unsigned Pos;
12097 if (isa<CXXThisExpr>(E)) {
12098 Pos = ParamPositions[FD];
12099 } else {
12100 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12101 ->getCanonicalDecl();
12102 auto It = ParamPositions.find(PVD);
12103 assert(It != ParamPositions.end() && "Function parameter not found");
12104 Pos = It->second;
12105 }
12106 ParamAttrs[Pos].Kind =
12107 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12108 }
12109 // Get alignment info.
12110 auto *NI = Attr->alignments_begin();
12111 for (const Expr *E : Attr->aligneds()) {
12112 E = E->IgnoreParenImpCasts();
12113 unsigned Pos;
12114 QualType ParmTy;
12115 if (isa<CXXThisExpr>(E)) {
12116 Pos = ParamPositions[FD];
12117 ParmTy = E->getType();
12118 } else {
12119 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12120 ->getCanonicalDecl();
12121 auto It = ParamPositions.find(PVD);
12122 assert(It != ParamPositions.end() && "Function parameter not found");
12123 Pos = It->second;
12124 ParmTy = PVD->getType();
12125 }
12126 ParamAttrs[Pos].Alignment =
12127 (*NI)
12128 ? (*NI)->EvaluateKnownConstInt(C)
12129 : llvm::APSInt::getUnsigned(
12130 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12131 .getQuantity());
12132 ++NI;
12133 }
12134 // Mark linear parameters.
12135 auto *SI = Attr->steps_begin();
12136 auto *MI = Attr->modifiers_begin();
12137 for (const Expr *E : Attr->linears()) {
12138 E = E->IgnoreParenImpCasts();
12139 unsigned Pos;
12140 bool IsReferenceType = false;
12141 // Rescaling factor needed to compute the linear parameter
12142 // value in the mangled name.
12143 unsigned PtrRescalingFactor = 1;
12144 if (isa<CXXThisExpr>(E)) {
12145 Pos = ParamPositions[FD];
12146 auto *P = cast<PointerType>(E->getType());
12147 PtrRescalingFactor = CGM.getContext()
12148 .getTypeSizeInChars(P->getPointeeType())
12149 .getQuantity();
12150 } else {
12151 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12152 ->getCanonicalDecl();
12153 auto It = ParamPositions.find(PVD);
12154 assert(It != ParamPositions.end() && "Function parameter not found");
12155 Pos = It->second;
12156 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12157 PtrRescalingFactor = CGM.getContext()
12158 .getTypeSizeInChars(P->getPointeeType())
12159 .getQuantity();
12160 else if (PVD->getType()->isReferenceType()) {
12161 IsReferenceType = true;
12162 PtrRescalingFactor =
12163 CGM.getContext()
12164 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12165 .getQuantity();
12166 }
12167 }
12168 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12169 if (*MI == OMPC_LINEAR_ref)
12170 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12171 else if (*MI == OMPC_LINEAR_uval)
12172 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12173 else if (IsReferenceType)
12174 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12175 else
12176 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12177 // Assuming a stride of 1, for `linear` without modifiers.
12178 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12179 if (*SI) {
12181 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12182 if (const auto *DRE =
12183 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12184 if (const auto *StridePVD =
12185 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12186 ParamAttr.HasVarStride = true;
12187 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12188 assert(It != ParamPositions.end() &&
12189 "Function parameter not found");
12190 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12191 }
12192 }
12193 } else {
12194 ParamAttr.StrideOrArg = Result.Val.getInt();
12195 }
12196 }
12197 // If we are using a linear clause on a pointer, we need to
12198 // rescale the value of linear_step with the byte size of the
12199 // pointee type.
12200 if (!ParamAttr.HasVarStride &&
12201 (ParamAttr.Kind ==
12202 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12203 ParamAttr.Kind ==
12204 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12205 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12206 ++SI;
12207 ++MI;
12208 }
12209 llvm::APSInt VLENVal;
12210 SourceLocation ExprLoc;
12211 const Expr *VLENExpr = Attr->getSimdlen();
12212 if (VLENExpr) {
12213 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12214 ExprLoc = VLENExpr->getExprLoc();
12215 }
12216 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12217 convertDeclareSimdBranch(Attr->getBranchState());
12218 if (CGM.getTriple().isX86()) {
12219 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12220 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12221 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElts, VLENVal, ParamAttrs,
12222 State);
12223 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12224 unsigned VLEN = VLENVal.getExtValue();
12225 // Get basic data for building the vector signature.
12226 const auto Data = getNDSWDS(FD, ParamAttrs);
12227 const unsigned NDS = std::get<0>(Data);
12228 const unsigned WDS = std::get<1>(Data);
12229 const bool OutputBecomesInput = std::get<2>(Data);
12230 if (CGM.getTarget().hasFeature("sve")) {
12231 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 's'))
12232 OMPBuilder.emitAArch64DeclareSimdFunction(
12233 Fn, VLEN, ParamAttrs, State, 's', NDS, OutputBecomesInput);
12234 } else if (CGM.getTarget().hasFeature("neon")) {
12235 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 'n'))
12236 OMPBuilder.emitAArch64DeclareSimdFunction(
12237 Fn, VLEN, ParamAttrs, State, 'n', NDS, OutputBecomesInput);
12238 }
12239 }
12240 }
12241 FD = FD->getPreviousDecl();
12242 }
12243}
12244
12245namespace {
12246/// Cleanup action for doacross support.
12247class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12248public:
12249 static const int DoacrossFinArgs = 2;
12250
12251private:
12252 llvm::FunctionCallee RTLFn;
12253 llvm::Value *Args[DoacrossFinArgs];
12254
12255public:
12256 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12257 ArrayRef<llvm::Value *> CallArgs)
12258 : RTLFn(RTLFn) {
12259 assert(CallArgs.size() == DoacrossFinArgs);
12260 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12261 }
12262 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12263 if (!CGF.HaveInsertPoint())
12264 return;
12265 CGF.EmitRuntimeCall(RTLFn, Args);
12266 }
12267};
12268} // namespace
12269
12271 const OMPLoopDirective &D,
12272 ArrayRef<Expr *> NumIterations) {
12273 if (!CGF.HaveInsertPoint())
12274 return;
12275
12276 ASTContext &C = CGM.getContext();
12277 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12278 RecordDecl *RD;
12279 if (KmpDimTy.isNull()) {
12280 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12281 // kmp_int64 lo; // lower
12282 // kmp_int64 up; // upper
12283 // kmp_int64 st; // stride
12284 // };
12285 RD = C.buildImplicitRecord("kmp_dim");
12286 RD->startDefinition();
12287 addFieldToRecordDecl(C, RD, Int64Ty);
12288 addFieldToRecordDecl(C, RD, Int64Ty);
12289 addFieldToRecordDecl(C, RD, Int64Ty);
12290 RD->completeDefinition();
12291 KmpDimTy = C.getCanonicalTagType(RD);
12292 } else {
12293 RD = KmpDimTy->castAsRecordDecl();
12294 }
12295 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12296 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12298
12299 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12300 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12301 enum { LowerFD = 0, UpperFD, StrideFD };
12302 // Fill dims with data.
12303 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12304 LValue DimsLVal = CGF.MakeAddrLValue(
12305 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12306 // dims.upper = num_iterations;
12307 LValue UpperLVal = CGF.EmitLValueForField(
12308 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12309 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12310 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12311 Int64Ty, NumIterations[I]->getExprLoc());
12312 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12313 // dims.stride = 1;
12314 LValue StrideLVal = CGF.EmitLValueForField(
12315 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12316 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12317 StrideLVal);
12318 }
12319
12320 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12321 // kmp_int32 num_dims, struct kmp_dim * dims);
12322 llvm::Value *Args[] = {
12323 emitUpdateLocation(CGF, D.getBeginLoc()),
12324 getThreadID(CGF, D.getBeginLoc()),
12325 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12327 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12328 CGM.VoidPtrTy)};
12329
12330 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12331 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12332 CGF.EmitRuntimeCall(RTLFn, Args);
12333 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12334 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12335 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12336 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12337 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12338 llvm::ArrayRef(FiniArgs));
12339}
12340
12341template <typename T>
12343 const T *C, llvm::Value *ULoc,
12344 llvm::Value *ThreadID) {
12345 QualType Int64Ty =
12346 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12347 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12349 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12350 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12351 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12352 const Expr *CounterVal = C->getLoopData(I);
12353 assert(CounterVal);
12354 llvm::Value *CntVal = CGF.EmitScalarConversion(
12355 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12356 CounterVal->getExprLoc());
12357 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12358 /*Volatile=*/false, Int64Ty);
12359 }
12360 llvm::Value *Args[] = {
12361 ULoc, ThreadID,
12362 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12363 llvm::FunctionCallee RTLFn;
12364 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12365 OMPDoacrossKind<T> ODK;
12366 if (ODK.isSource(C)) {
12367 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12368 OMPRTL___kmpc_doacross_post);
12369 } else {
12370 assert(ODK.isSink(C) && "Expect sink modifier.");
12371 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12372 OMPRTL___kmpc_doacross_wait);
12373 }
12374 CGF.EmitRuntimeCall(RTLFn, Args);
12375}
12376
12378 const OMPDependClause *C) {
12380 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12381 getThreadID(CGF, C->getBeginLoc()));
12382}
12383
12385 const OMPDoacrossClause *C) {
12387 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12388 getThreadID(CGF, C->getBeginLoc()));
12389}
12390
12392 llvm::FunctionCallee Callee,
12393 ArrayRef<llvm::Value *> Args) const {
12394 assert(Loc.isValid() && "Outlined function call location must be valid.");
12396
12397 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12398 if (Fn->doesNotThrow()) {
12399 CGF.EmitNounwindRuntimeCall(Fn, Args);
12400 return;
12401 }
12402 }
12403 CGF.EmitRuntimeCall(Callee, Args);
12404}
12405
12407 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12408 ArrayRef<llvm::Value *> Args) const {
12409 emitCall(CGF, Loc, OutlinedFn, Args);
12410}
12411
12413 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12414 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12416}
12417
12419 const VarDecl *NativeParam,
12420 const VarDecl *TargetParam) const {
12421 return CGF.GetAddrOfLocalVar(NativeParam);
12422}
12423
12424/// Return allocator value from expression, or return a null allocator (default
12425/// when no allocator specified).
12426static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12427 const Expr *Allocator) {
12428 llvm::Value *AllocVal;
12429 if (Allocator) {
12430 AllocVal = CGF.EmitScalarExpr(Allocator);
12431 // According to the standard, the original allocator type is a enum
12432 // (integer). Convert to pointer type, if required.
12433 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12434 CGF.getContext().VoidPtrTy,
12435 Allocator->getExprLoc());
12436 } else {
12437 // If no allocator specified, it defaults to the null allocator.
12438 AllocVal = llvm::Constant::getNullValue(
12440 }
12441 return AllocVal;
12442}
12443
12444/// Return the alignment from an allocate directive if present.
12445static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12446 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12447
12448 if (!AllocateAlignment)
12449 return nullptr;
12450
12451 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12452}
12453
12455 const VarDecl *VD) {
12456 if (!VD)
12457 return Address::invalid();
12458 Address UntiedAddr = Address::invalid();
12459 Address UntiedRealAddr = Address::invalid();
12460 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12461 if (It != FunctionToUntiedTaskStackMap.end()) {
12462 const UntiedLocalVarsAddressesMap &UntiedData =
12463 UntiedLocalVarsStack[It->second];
12464 auto I = UntiedData.find(VD);
12465 if (I != UntiedData.end()) {
12466 UntiedAddr = I->second.first;
12467 UntiedRealAddr = I->second.second;
12468 }
12469 }
12470 const VarDecl *CVD = VD->getCanonicalDecl();
12471 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12472 // Use the default allocation.
12473 if (!isAllocatableDecl(VD))
12474 return UntiedAddr;
12475 llvm::Value *Size;
12476 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12477 if (CVD->getType()->isVariablyModifiedType()) {
12478 Size = CGF.getTypeSize(CVD->getType());
12479 // Align the size: ((size + align - 1) / align) * align
12480 Size = CGF.Builder.CreateNUWAdd(
12481 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12482 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12483 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12484 } else {
12485 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12486 Size = CGM.getSize(Sz.alignTo(Align));
12487 }
12488 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12489 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12490 const Expr *Allocator = AA->getAllocator();
12491 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12492 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12494 Args.push_back(ThreadID);
12495 if (Alignment)
12496 Args.push_back(Alignment);
12497 Args.push_back(Size);
12498 Args.push_back(AllocVal);
12499 llvm::omp::RuntimeFunction FnID =
12500 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12501 llvm::Value *Addr = CGF.EmitRuntimeCall(
12502 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12503 getName({CVD->getName(), ".void.addr"}));
12504 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12505 CGM.getModule(), OMPRTL___kmpc_free);
12506 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12508 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12509 if (UntiedAddr.isValid())
12510 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12511
12512 // Cleanup action for allocate support.
12513 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12514 llvm::FunctionCallee RTLFn;
12515 SourceLocation::UIntTy LocEncoding;
12516 Address Addr;
12517 const Expr *AllocExpr;
12518
12519 public:
12520 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12521 SourceLocation::UIntTy LocEncoding, Address Addr,
12522 const Expr *AllocExpr)
12523 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12524 AllocExpr(AllocExpr) {}
12525 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12526 if (!CGF.HaveInsertPoint())
12527 return;
12528 llvm::Value *Args[3];
12529 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12530 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12532 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12533 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12534 Args[2] = AllocVal;
12535 CGF.EmitRuntimeCall(RTLFn, Args);
12536 }
12537 };
12538 Address VDAddr =
12539 UntiedRealAddr.isValid()
12540 ? UntiedRealAddr
12541 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12542 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12543 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12544 VDAddr, Allocator);
12545 if (UntiedRealAddr.isValid())
12546 if (auto *Region =
12547 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12548 Region->emitUntiedSwitch(CGF);
12549 return VDAddr;
12550 }
12551 return UntiedAddr;
12552}
12553
12555 const VarDecl *VD) const {
12556 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12557 if (It == FunctionToUntiedTaskStackMap.end())
12558 return false;
12559 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12560}
12561
12563 CodeGenModule &CGM, const OMPLoopDirective &S)
12564 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12565 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12566 if (!NeedToPush)
12567 return;
12569 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12570 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12571 for (const Stmt *Ref : C->private_refs()) {
12572 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12573 const ValueDecl *VD;
12574 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12575 VD = DRE->getDecl();
12576 } else {
12577 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12578 assert((ME->isImplicitCXXThis() ||
12579 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12580 "Expected member of current class.");
12581 VD = ME->getMemberDecl();
12582 }
12583 DS.insert(VD);
12584 }
12585 }
12586}
12587
12589 if (!NeedToPush)
12590 return;
12591 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12592}
12593
12595 CodeGenFunction &CGF,
12596 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12597 std::pair<Address, Address>> &LocalVars)
12598 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12599 if (!NeedToPush)
12600 return;
12601 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12602 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12603 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12604}
12605
12607 if (!NeedToPush)
12608 return;
12609 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12610}
12611
12613 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12614
12615 return llvm::any_of(
12616 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12617 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12618}
12619
12620void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12621 const OMPExecutableDirective &S,
12622 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12623 const {
12624 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12625 // Vars in target/task regions must be excluded completely.
12626 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12627 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12629 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12630 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12631 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12632 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12633 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12634 }
12635 }
12636 // Exclude vars in private clauses.
12637 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12638 for (const Expr *Ref : C->varlist()) {
12639 if (!Ref->getType()->isScalarType())
12640 continue;
12641 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12642 if (!DRE)
12643 continue;
12644 NeedToCheckForLPCs.insert(DRE->getDecl());
12645 }
12646 }
12647 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12648 for (const Expr *Ref : C->varlist()) {
12649 if (!Ref->getType()->isScalarType())
12650 continue;
12651 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12652 if (!DRE)
12653 continue;
12654 NeedToCheckForLPCs.insert(DRE->getDecl());
12655 }
12656 }
12657 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12658 for (const Expr *Ref : C->varlist()) {
12659 if (!Ref->getType()->isScalarType())
12660 continue;
12661 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12662 if (!DRE)
12663 continue;
12664 NeedToCheckForLPCs.insert(DRE->getDecl());
12665 }
12666 }
12667 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12668 for (const Expr *Ref : C->varlist()) {
12669 if (!Ref->getType()->isScalarType())
12670 continue;
12671 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12672 if (!DRE)
12673 continue;
12674 NeedToCheckForLPCs.insert(DRE->getDecl());
12675 }
12676 }
12677 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12678 for (const Expr *Ref : C->varlist()) {
12679 if (!Ref->getType()->isScalarType())
12680 continue;
12681 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12682 if (!DRE)
12683 continue;
12684 NeedToCheckForLPCs.insert(DRE->getDecl());
12685 }
12686 }
12687 for (const Decl *VD : NeedToCheckForLPCs) {
12688 for (const LastprivateConditionalData &Data :
12689 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12690 if (Data.DeclToUniqueName.count(VD) > 0) {
12691 if (!Data.Disabled)
12692 NeedToAddForLPCsAsDisabled.insert(VD);
12693 break;
12694 }
12695 }
12696 }
12697}
12698
12699CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12700 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12701 : CGM(CGF.CGM),
12702 Action((CGM.getLangOpts().OpenMP >= 50 &&
12703 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12704 [](const OMPLastprivateClause *C) {
12705 return C->getKind() ==
12706 OMPC_LASTPRIVATE_conditional;
12707 }))
12708 ? ActionToDo::PushAsLastprivateConditional
12709 : ActionToDo::DoNotPush) {
12710 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12711 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12712 return;
12713 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12714 "Expected a push action.");
12716 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12717 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12718 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12719 continue;
12720
12721 for (const Expr *Ref : C->varlist()) {
12722 Data.DeclToUniqueName.insert(std::make_pair(
12723 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12724 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12725 }
12726 }
12727 Data.IVLVal = IVLVal;
12728 Data.Fn = CGF.CurFn;
12729}
12730
12731CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12733 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12734 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12735 if (CGM.getLangOpts().OpenMP < 50)
12736 return;
12737 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12738 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12739 if (!NeedToAddForLPCsAsDisabled.empty()) {
12740 Action = ActionToDo::DisableLastprivateConditional;
12741 LastprivateConditionalData &Data =
12743 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12744 Data.DeclToUniqueName.try_emplace(VD);
12745 Data.Fn = CGF.CurFn;
12746 Data.Disabled = true;
12747 }
12748}
12749
12750CGOpenMPRuntime::LastprivateConditionalRAII
12752 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12753 return LastprivateConditionalRAII(CGF, S);
12754}
12755
12757 if (CGM.getLangOpts().OpenMP < 50)
12758 return;
12759 if (Action == ActionToDo::DisableLastprivateConditional) {
12760 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12761 "Expected list of disabled private vars.");
12762 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12763 }
12764 if (Action == ActionToDo::PushAsLastprivateConditional) {
12765 assert(
12766 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12767 "Expected list of lastprivate conditional vars.");
12768 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12769 }
12770}
12771
12773 const VarDecl *VD) {
12774 ASTContext &C = CGM.getContext();
12775 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12776 QualType NewType;
12777 const FieldDecl *VDField;
12778 const FieldDecl *FiredField;
12779 LValue BaseLVal;
12780 auto VI = I->getSecond().find(VD);
12781 if (VI == I->getSecond().end()) {
12782 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12783 RD->startDefinition();
12784 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12785 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12786 RD->completeDefinition();
12787 NewType = C.getCanonicalTagType(RD);
12788 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12789 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12790 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12791 } else {
12792 NewType = std::get<0>(VI->getSecond());
12793 VDField = std::get<1>(VI->getSecond());
12794 FiredField = std::get<2>(VI->getSecond());
12795 BaseLVal = std::get<3>(VI->getSecond());
12796 }
12797 LValue FiredLVal =
12798 CGF.EmitLValueForField(BaseLVal, FiredField);
12800 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12801 FiredLVal);
12802 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12803}
12804
12805namespace {
12806/// Checks if the lastprivate conditional variable is referenced in LHS.
12807class LastprivateConditionalRefChecker final
12808 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12810 const Expr *FoundE = nullptr;
12811 const Decl *FoundD = nullptr;
12812 StringRef UniqueDeclName;
12813 LValue IVLVal;
12814 llvm::Function *FoundFn = nullptr;
12815 SourceLocation Loc;
12816
12817public:
12818 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12820 llvm::reverse(LPM)) {
12821 auto It = D.DeclToUniqueName.find(E->getDecl());
12822 if (It == D.DeclToUniqueName.end())
12823 continue;
12824 if (D.Disabled)
12825 return false;
12826 FoundE = E;
12827 FoundD = E->getDecl()->getCanonicalDecl();
12828 UniqueDeclName = It->second;
12829 IVLVal = D.IVLVal;
12830 FoundFn = D.Fn;
12831 break;
12832 }
12833 return FoundE == E;
12834 }
12835 bool VisitMemberExpr(const MemberExpr *E) {
12837 return false;
12839 llvm::reverse(LPM)) {
12840 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12841 if (It == D.DeclToUniqueName.end())
12842 continue;
12843 if (D.Disabled)
12844 return false;
12845 FoundE = E;
12846 FoundD = E->getMemberDecl()->getCanonicalDecl();
12847 UniqueDeclName = It->second;
12848 IVLVal = D.IVLVal;
12849 FoundFn = D.Fn;
12850 break;
12851 }
12852 return FoundE == E;
12853 }
12854 bool VisitStmt(const Stmt *S) {
12855 for (const Stmt *Child : S->children()) {
12856 if (!Child)
12857 continue;
12858 if (const auto *E = dyn_cast<Expr>(Child))
12859 if (!E->isGLValue())
12860 continue;
12861 if (Visit(Child))
12862 return true;
12863 }
12864 return false;
12865 }
12866 explicit LastprivateConditionalRefChecker(
12867 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12868 : LPM(LPM) {}
12869 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12870 getFoundData() const {
12871 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12872 }
12873};
12874} // namespace
12875
12877 LValue IVLVal,
12878 StringRef UniqueDeclName,
12879 LValue LVal,
12880 SourceLocation Loc) {
12881 // Last updated loop counter for the lastprivate conditional var.
12882 // int<xx> last_iv = 0;
12883 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12884 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12885 LLIVTy, getName({UniqueDeclName, "iv"}));
12886 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12887 IVLVal.getAlignment().getAsAlign());
12888 LValue LastIVLVal =
12889 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12890
12891 // Last value of the lastprivate conditional.
12892 // decltype(priv_a) last_a;
12893 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12894 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12895 cast<llvm::GlobalVariable>(Last)->setAlignment(
12896 LVal.getAlignment().getAsAlign());
12897 LValue LastLVal =
12898 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12899
12900 // Global loop counter. Required to handle inner parallel-for regions.
12901 // iv
12902 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12903
12904 // #pragma omp critical(a)
12905 // if (last_iv <= iv) {
12906 // last_iv = iv;
12907 // last_a = priv_a;
12908 // }
12909 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12910 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12911 Action.Enter(CGF);
12912 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12913 // (last_iv <= iv) ? Check if the variable is updated and store new
12914 // value in global var.
12915 llvm::Value *CmpRes;
12916 if (IVLVal.getType()->isSignedIntegerType()) {
12917 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12918 } else {
12919 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12920 "Loop iteration variable must be integer.");
12921 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12922 }
12923 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12924 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12925 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12926 // {
12927 CGF.EmitBlock(ThenBB);
12928
12929 // last_iv = iv;
12930 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12931
12932 // last_a = priv_a;
12933 switch (CGF.getEvaluationKind(LVal.getType())) {
12934 case TEK_Scalar: {
12935 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12936 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12937 break;
12938 }
12939 case TEK_Complex: {
12940 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12941 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12942 break;
12943 }
12944 case TEK_Aggregate:
12945 llvm_unreachable(
12946 "Aggregates are not supported in lastprivate conditional.");
12947 }
12948 // }
12949 CGF.EmitBranch(ExitBB);
12950 // There is no need to emit line number for unconditional branch.
12952 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12953 };
12954
12955 if (CGM.getLangOpts().OpenMPSimd) {
12956 // Do not emit as a critical region as no parallel region could be emitted.
12957 RegionCodeGenTy ThenRCG(CodeGen);
12958 ThenRCG(CGF);
12959 } else {
12960 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12961 }
12962}
12963
12965 const Expr *LHS) {
12966 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12967 return;
12968 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12969 if (!Checker.Visit(LHS))
12970 return;
12971 const Expr *FoundE;
12972 const Decl *FoundD;
12973 StringRef UniqueDeclName;
12974 LValue IVLVal;
12975 llvm::Function *FoundFn;
12976 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12977 Checker.getFoundData();
12978 if (FoundFn != CGF.CurFn) {
12979 // Special codegen for inner parallel regions.
12980 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12981 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12982 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12983 "Lastprivate conditional is not found in outer region.");
12984 QualType StructTy = std::get<0>(It->getSecond());
12985 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12986 LValue PrivLVal = CGF.EmitLValue(FoundE);
12988 PrivLVal.getAddress(),
12989 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12990 CGF.ConvertTypeForMem(StructTy));
12991 LValue BaseLVal =
12992 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12993 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12994 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12995 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12996 FiredLVal, llvm::AtomicOrdering::Unordered,
12997 /*IsVolatile=*/true, /*isInit=*/false);
12998 return;
12999 }
13000
13001 // Private address of the lastprivate conditional in the current context.
13002 // priv_a
13003 LValue LVal = CGF.EmitLValue(FoundE);
13004 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13005 FoundE->getExprLoc());
13006}
13007
13010 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13011 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13012 return;
13013 auto Range = llvm::reverse(LastprivateConditionalStack);
13014 auto It = llvm::find_if(
13015 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13016 if (It == Range.end() || It->Fn != CGF.CurFn)
13017 return;
13018 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13019 assert(LPCI != LastprivateConditionalToTypes.end() &&
13020 "Lastprivates must be registered already.");
13022 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13023 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13024 for (const auto &Pair : It->DeclToUniqueName) {
13025 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13026 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13027 continue;
13028 auto I = LPCI->getSecond().find(Pair.first);
13029 assert(I != LPCI->getSecond().end() &&
13030 "Lastprivate must be rehistered already.");
13031 // bool Cmp = priv_a.Fired != 0;
13032 LValue BaseLVal = std::get<3>(I->getSecond());
13033 LValue FiredLVal =
13034 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13035 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13036 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13037 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13038 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13039 // if (Cmp) {
13040 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13041 CGF.EmitBlock(ThenBB);
13042 Address Addr = CGF.GetAddrOfLocalVar(VD);
13043 LValue LVal;
13044 if (VD->getType()->isReferenceType())
13045 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13047 else
13048 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13050 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13051 D.getBeginLoc());
13053 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13054 // }
13055 }
13056}
13057
13059 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13060 SourceLocation Loc) {
13061 if (CGF.getLangOpts().OpenMP < 50)
13062 return;
13063 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13064 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13065 "Unknown lastprivate conditional variable.");
13066 StringRef UniqueName = It->second;
13067 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13068 // The variable was not updated in the region - exit.
13069 if (!GV)
13070 return;
13071 LValue LPLVal = CGF.MakeRawAddrLValue(
13072 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13073 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13074 CGF.EmitStoreOfScalar(Res, PrivLVal);
13075}
13076
13079 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13080 const RegionCodeGenTy &CodeGen) {
13081 llvm_unreachable("Not supported in SIMD-only mode");
13082}
13083
13086 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13087 const RegionCodeGenTy &CodeGen) {
13088 llvm_unreachable("Not supported in SIMD-only mode");
13089}
13090
13092 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13093 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13094 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13095 bool Tied, unsigned &NumberOfParts) {
13096 llvm_unreachable("Not supported in SIMD-only mode");
13097}
13098
13100 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13101 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13102 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13103 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13104 llvm_unreachable("Not supported in SIMD-only mode");
13105}
13106
13108 CodeGenFunction &CGF, StringRef CriticalName,
13109 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13110 const Expr *Hint) {
13111 llvm_unreachable("Not supported in SIMD-only mode");
13112}
13113
13115 const RegionCodeGenTy &MasterOpGen,
13116 SourceLocation Loc) {
13117 llvm_unreachable("Not supported in SIMD-only mode");
13118}
13119
13121 const RegionCodeGenTy &MasterOpGen,
13122 SourceLocation Loc,
13123 const Expr *Filter) {
13124 llvm_unreachable("Not supported in SIMD-only mode");
13125}
13126
13128 SourceLocation Loc) {
13129 llvm_unreachable("Not supported in SIMD-only mode");
13130}
13131
13133 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13134 SourceLocation Loc) {
13135 llvm_unreachable("Not supported in SIMD-only mode");
13136}
13137
13139 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13140 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13142 ArrayRef<const Expr *> AssignmentOps) {
13143 llvm_unreachable("Not supported in SIMD-only mode");
13144}
13145
13147 const RegionCodeGenTy &OrderedOpGen,
13148 SourceLocation Loc,
13149 bool IsThreads) {
13150 llvm_unreachable("Not supported in SIMD-only mode");
13151}
13152
13154 SourceLocation Loc,
13156 bool EmitChecks,
13157 bool ForceSimpleCall) {
13158 llvm_unreachable("Not supported in SIMD-only mode");
13159}
13160
13163 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13164 bool Ordered, const DispatchRTInput &DispatchValues) {
13165 llvm_unreachable("Not supported in SIMD-only mode");
13166}
13167
13169 SourceLocation Loc) {
13170 llvm_unreachable("Not supported in SIMD-only mode");
13171}
13172
13175 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13176 llvm_unreachable("Not supported in SIMD-only mode");
13177}
13178
13181 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13182 llvm_unreachable("Not supported in SIMD-only mode");
13183}
13184
13186 SourceLocation Loc,
13187 unsigned IVSize,
13188 bool IVSigned) {
13189 llvm_unreachable("Not supported in SIMD-only mode");
13190}
13191
13193 SourceLocation Loc,
13194 OpenMPDirectiveKind DKind) {
13195 llvm_unreachable("Not supported in SIMD-only mode");
13196}
13197
13199 SourceLocation Loc,
13200 unsigned IVSize, bool IVSigned,
13201 Address IL, Address LB,
13202 Address UB, Address ST) {
13203 llvm_unreachable("Not supported in SIMD-only mode");
13204}
13205
13207 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13209 SourceLocation SeverityLoc, const Expr *Message,
13210 SourceLocation MessageLoc) {
13211 llvm_unreachable("Not supported in SIMD-only mode");
13212}
13213
13215 ProcBindKind ProcBind,
13216 SourceLocation Loc) {
13217 llvm_unreachable("Not supported in SIMD-only mode");
13218}
13219
13221 const VarDecl *VD,
13222 Address VDAddr,
13223 SourceLocation Loc) {
13224 llvm_unreachable("Not supported in SIMD-only mode");
13225}
13226
13228 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13229 CodeGenFunction *CGF) {
13230 llvm_unreachable("Not supported in SIMD-only mode");
13231}
13232
13234 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13235 llvm_unreachable("Not supported in SIMD-only mode");
13236}
13237
13240 SourceLocation Loc,
13241 llvm::AtomicOrdering AO) {
13242 llvm_unreachable("Not supported in SIMD-only mode");
13243}
13244
13246 const OMPExecutableDirective &D,
13247 llvm::Function *TaskFunction,
13248 QualType SharedsTy, Address Shareds,
13249 const Expr *IfCond,
13250 const OMPTaskDataTy &Data) {
13251 llvm_unreachable("Not supported in SIMD-only mode");
13252}
13253
13256 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13257 const Expr *IfCond, const OMPTaskDataTy &Data) {
13258 llvm_unreachable("Not supported in SIMD-only mode");
13259}
13260
13264 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13265 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13266 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13267 ReductionOps, Options);
13268}
13269
13272 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13273 llvm_unreachable("Not supported in SIMD-only mode");
13274}
13275
13277 SourceLocation Loc,
13278 bool IsWorksharingReduction) {
13279 llvm_unreachable("Not supported in SIMD-only mode");
13280}
13281
13283 SourceLocation Loc,
13284 ReductionCodeGen &RCG,
13285 unsigned N) {
13286 llvm_unreachable("Not supported in SIMD-only mode");
13287}
13288
13290 SourceLocation Loc,
13291 llvm::Value *ReductionsPtr,
13292 LValue SharedLVal) {
13293 llvm_unreachable("Not supported in SIMD-only mode");
13294}
13295
13297 SourceLocation Loc,
13298 const OMPTaskDataTy &Data) {
13299 llvm_unreachable("Not supported in SIMD-only mode");
13300}
13301
13304 OpenMPDirectiveKind CancelRegion) {
13305 llvm_unreachable("Not supported in SIMD-only mode");
13306}
13307
13309 SourceLocation Loc, const Expr *IfCond,
13310 OpenMPDirectiveKind CancelRegion) {
13311 llvm_unreachable("Not supported in SIMD-only mode");
13312}
13313
13315 const OMPExecutableDirective &D, StringRef ParentName,
13316 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13317 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13318 llvm_unreachable("Not supported in SIMD-only mode");
13319}
13320
13323 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13324 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13325 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13326 const OMPLoopDirective &D)>
13327 SizeEmitter) {
13328 llvm_unreachable("Not supported in SIMD-only mode");
13329}
13330
13332 llvm_unreachable("Not supported in SIMD-only mode");
13333}
13334
13336 llvm_unreachable("Not supported in SIMD-only mode");
13337}
13338
13340 return false;
13341}
13342
13344 const OMPExecutableDirective &D,
13345 SourceLocation Loc,
13346 llvm::Function *OutlinedFn,
13347 ArrayRef<llvm::Value *> CapturedVars) {
13348 llvm_unreachable("Not supported in SIMD-only mode");
13349}
13350
13352 const Expr *NumTeams,
13353 const Expr *ThreadLimit,
13354 SourceLocation Loc) {
13355 llvm_unreachable("Not supported in SIMD-only mode");
13356}
13357
13359 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13360 const Expr *Device, const RegionCodeGenTy &CodeGen,
13362 llvm_unreachable("Not supported in SIMD-only mode");
13363}
13364
13366 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13367 const Expr *Device) {
13368 llvm_unreachable("Not supported in SIMD-only mode");
13369}
13370
13372 const OMPLoopDirective &D,
13373 ArrayRef<Expr *> NumIterations) {
13374 llvm_unreachable("Not supported in SIMD-only mode");
13375}
13376
13378 const OMPDependClause *C) {
13379 llvm_unreachable("Not supported in SIMD-only mode");
13380}
13381
13383 const OMPDoacrossClause *C) {
13384 llvm_unreachable("Not supported in SIMD-only mode");
13385}
13386
13387const VarDecl *
13389 const VarDecl *NativeParam) const {
13390 llvm_unreachable("Not supported in SIMD-only mode");
13391}
13392
13393Address
13395 const VarDecl *NativeParam,
13396 const VarDecl *TargetParam) const {
13397 llvm_unreachable("Not supported in SIMD-only mode");
13398}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool getAArch64MTV(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc, unsigned UserVLEN, unsigned WDS, char ISA)
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static unsigned getAArch64LS(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static llvm::OpenMPIRBuilder::DeclareSimdBranch convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static mlir::omp::DeclareTargetCaptureClause convertCaptureClause(OMPDeclareTargetDeclAttr::MapTypeTy mapTy)
static bool isAssumedToBeNotEmitted(const ValueDecl *vd, bool isDevice)
Returns true if the declaration should be skipped based on its device_type attribute and the current ...
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
Result
Implement __builtin_bit_cast and related operations.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
llvm::json::Array Array
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
This represents clause 'affinity' in the 'pragma omp task'-based directives.
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
ArrayRef< MappableComponent > MappableExprComponentListRef
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents clause 'map' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:227
SourceManager & getSourceManager()
Definition ASTContext.h:872
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:965
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5392
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3784
Attr - This represents one attribute.
Definition Attr.h:46
Represents a base class of a C++ class.
Definition DeclCXX.h:146
Represents a C++ constructor within a class.
Definition DeclCXX.h:2620
Represents a C++ destructor within a class.
Definition DeclCXX.h:2882
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2271
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2295
Represents a C++ struct/union/class.
Definition DeclCXX.h:258
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1790
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2131
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3960
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3994
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1391
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:4000
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3988
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3991
This captures a statement into a function.
Definition Stmt.h:3947
const Capture * const_capture_iterator
Definition Stmt.h:4081
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4098
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4068
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4051
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1517
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4093
capture_range captures()
Definition Stmt.h:4085
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
std::string SampleProfileFile
Name of the profile file to use with -fprofile-sample-use.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:146
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:213
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:251
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:397
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:288
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, const VarDecl *VD)
Register VTable to OpenMP offload entry.
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitAndRegisterVTable(CodeGenModule &CGM, CXXRecordDecl *CXXRecord, const VarDecl *VD)
Emit and register VTable for the C++ class in OpenMP offload entry.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
virtual void registerVTable(const OMPExecutableDirective &D)
Emit code for registering vtable by scanning through map clause in OpenMP target region.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3420
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2415
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::Function * GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, const OMPExecutableDirective &D)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1483
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2299
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3429
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2272
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5765
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:181
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:257
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2524
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5274
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:238
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5939
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:230
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2218
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2776
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3439
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:308
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1600
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:663
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:194
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:204
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1658
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5543
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1716
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1823
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:643
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2108
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2967
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1873
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:747
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::GlobalVariable * GetAddrOfVTable(const CXXRecordDecl *RD)
GetAddrOfVTable - Get the address of the VTable for the given record decl.
Definition CGVTables.cpp:41
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:646
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:355
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:350
Address getAddress() const
Definition CGValue.h:373
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:358
QualType getType() const
Definition CGValue.h:303
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:347
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1462
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1273
ValueDecl * getDecl()
Definition Expr.h:1341
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:581
bool hasAttrs() const
Definition DeclBase.h:526
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:547
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1100
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:567
SourceLocation getLocation() const
Definition DeclBase.h:447
DeclContext * getDeclContext()
Definition DeclBase.h:456
AttrVec & getAttrs()
Definition DeclBase.h:532
bool hasAttr() const
Definition DeclBase.h:585
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:991
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3124
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:677
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:675
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3102
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3097
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3695
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:282
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4327
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4063
Represents a member of a struct/union/class.
Definition Decl.h:3178
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3263
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3414
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4695
Represents a function declaration or definition.
Definition Decl.h:2018
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2815
QualType getReturnType() const
Definition Decl.h:2863
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2792
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3742
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3821
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5597
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:980
An lvalue reference type, per C++11 [dcl.ref].
Definition TypeBase.h:3679
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3367
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3450
Expr * getBase() const
Definition Expr.h:3444
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5600
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'threadset' clause in the 'pragma omp task ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1181
Represents a parameter to a function.
Definition Decl.h:1808
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3390
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1187
QualType withRestrict() const
Definition TypeBase.h:1190
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8445
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8485
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8630
QualType getCanonicalType() const
Definition TypeBase.h:8497
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1560
Represents a struct/union/class.
Definition Decl.h:4343
field_iterator field_end() const
Definition Decl.h:4549
field_range fields() const
Definition Decl.h:4546
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5286
bool field_empty() const
Definition Decl.h:4554
field_iterator field_begin() const
Definition Decl.cpp:5270
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:304
StmtClass getStmtClass() const
Definition Stmt.h:1503
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:210
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4901
bool isUnion() const
Definition Decl.h:3946
The base class of the type hierarchy.
Definition TypeBase.h:1875
bool isVoidType() const
Definition TypeBase.h:9048
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9235
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2266
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8781
bool isPointerType() const
Definition TypeBase.h:8682
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:9092
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9342
bool isReferenceType() const
Definition TypeBase.h:8706
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
bool isLValueReferenceType() const
Definition TypeBase.h:8710
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2503
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3181
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9228
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2862
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9328
bool isFloatingType() const
Definition Type.cpp:2389
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2332
bool isAnyPointerType() const
Definition TypeBase.h:8690
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9275
bool isRecordType() const
Definition TypeBase.h:8809
bool isUnionType() const
Definition Type.cpp:755
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:924
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2236
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2345
const Expr * getInit() const
Definition Decl.h:1381
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1230
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1308
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2354
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1275
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1371
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:4028
Expr * getSizeExpr() const
Definition TypeBase.h:4042
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
@ Address
A pointer to a ValueDecl.
Definition Primitives.h:28
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:830
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
static bool classof(const OMPClause *T)
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:273
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:125
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:6001
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:564
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:136
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:140
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1763
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
Extra information about a function prototype.
Definition TypeBase.h:5454
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.