clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress();
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 Config.setDefaultTargetAS(
1043 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1044 Config.setRuntimeCC(CGM.getRuntimeCC());
1045
1046 OMPBuilder.setConfig(Config);
1047 OMPBuilder.initialize();
1048 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1049 CGM.getLangOpts().OpenMPIsTargetDevice
1050 ? CGM.getLangOpts().OMPHostIRFile
1051 : StringRef{});
1052
1053 // The user forces the compiler to behave as if omp requires
1054 // unified_shared_memory was given.
1055 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1058 }
1059}
1060
1062 InternalVars.clear();
1063 // Clean non-target variable declarations possibly used only in debug info.
1064 for (const auto &Data : EmittedNonTargetVariables) {
1065 if (!Data.getValue().pointsToAliveValue())
1066 continue;
1067 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1068 if (!GV)
1069 continue;
1070 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1071 continue;
1072 GV->eraseFromParent();
1073 }
1074}
1075
1077 return OMPBuilder.createPlatformSpecificName(Parts);
1078}
1079
1080static llvm::Function *
1082 const Expr *CombinerInitializer, const VarDecl *In,
1083 const VarDecl *Out, bool IsCombiner) {
1084 // void .omp_combiner.(Ty *in, Ty *out);
1085 ASTContext &C = CGM.getContext();
1086 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1087 FunctionArgList Args;
1088 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1089 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1090 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1091 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1092 Args.push_back(&OmpOutParm);
1093 Args.push_back(&OmpInParm);
1094 const CGFunctionInfo &FnInfo =
1095 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1096 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1097 std::string Name = CGM.getOpenMPRuntime().getName(
1098 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1099 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1100 Name, &CGM.getModule());
1101 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1102 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1103 Fn->removeFnAttr(llvm::Attribute::NoInline);
1104 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1105 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1106 }
1107 CodeGenFunction CGF(CGM);
1108 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1109 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1110 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1111 Out->getLocation());
1113 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1114 Scope.addPrivate(
1115 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1116 .getAddress());
1117 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1118 Scope.addPrivate(
1119 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1120 .getAddress());
1121 (void)Scope.Privatize();
1122 if (!IsCombiner && Out->hasInit() &&
1123 !CGF.isTrivialInitializer(Out->getInit())) {
1124 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1125 Out->getType().getQualifiers(),
1126 /*IsInitializer=*/true);
1127 }
1128 if (CombinerInitializer)
1129 CGF.EmitIgnoredExpr(CombinerInitializer);
1130 Scope.ForceCleanup();
1131 CGF.FinishFunction();
1132 return Fn;
1133}
1134
1137 if (UDRMap.count(D) > 0)
1138 return;
1139 llvm::Function *Combiner = emitCombinerOrInitializer(
1140 CGM, D->getType(), D->getCombiner(),
1143 /*IsCombiner=*/true);
1144 llvm::Function *Initializer = nullptr;
1145 if (const Expr *Init = D->getInitializer()) {
1147 CGM, D->getType(),
1149 : nullptr,
1152 /*IsCombiner=*/false);
1153 }
1154 UDRMap.try_emplace(D, Combiner, Initializer);
1155 if (CGF)
1156 FunctionUDRMap[CGF->CurFn].push_back(D);
1157}
1158
1159std::pair<llvm::Function *, llvm::Function *>
1161 auto I = UDRMap.find(D);
1162 if (I != UDRMap.end())
1163 return I->second;
1164 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1165 return UDRMap.lookup(D);
1166}
1167
1168namespace {
1169// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1170// Builder if one is present.
1171struct PushAndPopStackRAII {
1172 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1173 bool HasCancel, llvm::omp::Directive Kind)
1174 : OMPBuilder(OMPBuilder) {
1175 if (!OMPBuilder)
1176 return;
1177
1178 // The following callback is the crucial part of clangs cleanup process.
1179 //
1180 // NOTE:
1181 // Once the OpenMPIRBuilder is used to create parallel regions (and
1182 // similar), the cancellation destination (Dest below) is determined via
1183 // IP. That means if we have variables to finalize we split the block at IP,
1184 // use the new block (=BB) as destination to build a JumpDest (via
1185 // getJumpDestInCurrentScope(BB)) which then is fed to
1186 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1187 // to push & pop an FinalizationInfo object.
1188 // The FiniCB will still be needed but at the point where the
1189 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1190 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1191 assert(IP.getBlock()->end() == IP.getPoint() &&
1192 "Clang CG should cause non-terminated block!");
1193 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1194 CGF.Builder.restoreIP(IP);
1196 CGF.getOMPCancelDestination(OMPD_parallel);
1197 CGF.EmitBranchThroughCleanup(Dest);
1198 return llvm::Error::success();
1199 };
1200
1201 // TODO: Remove this once we emit parallel regions through the
1202 // OpenMPIRBuilder as it can do this setup internally.
1203 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1204 OMPBuilder->pushFinalizationCB(std::move(FI));
1205 }
1206 ~PushAndPopStackRAII() {
1207 if (OMPBuilder)
1208 OMPBuilder->popFinalizationCB();
1209 }
1210 llvm::OpenMPIRBuilder *OMPBuilder;
1211};
1212} // namespace
1213
1215 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1216 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1217 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1218 assert(ThreadIDVar->getType()->isPointerType() &&
1219 "thread id variable must be of type kmp_int32 *");
1220 CodeGenFunction CGF(CGM, true);
1221 bool HasCancel = false;
1222 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1225 HasCancel = OPD->hasCancel();
1226 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1227 HasCancel = OPSD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240
1241 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1242 // parallel region to make cancellation barriers work properly.
1243 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1244 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1245 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1246 HasCancel, OutlinedHelperName);
1247 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1248 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1249}
1250
1251std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1252 std::string Suffix = getName({"omp_outlined"});
1253 return (Name + Suffix).str();
1254}
1255
1257 return getOutlinedHelperName(CGF.CurFn->getName());
1258}
1259
1260std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1261 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1262 return (Name + Suffix).str();
1263}
1264
1267 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1268 const RegionCodeGenTy &CodeGen) {
1269 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1271 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1272 CodeGen);
1273}
1274
1277 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1278 const RegionCodeGenTy &CodeGen) {
1279 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1282 CodeGen);
1283}
1284
1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289 bool Tied, unsigned &NumberOfParts) {
1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291 PrePostActionTy &) {
1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294 llvm::Value *TaskArgs[] = {
1295 UpLoc, ThreadID,
1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297 TaskTVar->getType()->castAs<PointerType>())
1298 .getPointer(CGF)};
1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 CGM.getModule(), OMPRTL___kmpc_omp_task),
1301 TaskArgs);
1302 };
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304 UntiedCodeGen);
1305 CodeGen.setAction(Action);
1306 assert(!ThreadIDVar->getType()->isPointerType() &&
1307 "thread id variable must be of type kmp_int32 for tasks");
1308 const OpenMPDirectiveKind Region =
1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310 : OMPD_task;
1311 const CapturedStmt *CS = D.getCapturedStmt(Region);
1312 bool HasCancel = false;
1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1321
1322 CodeGenFunction CGF(CGM, true);
1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324 InnermostKind, HasCancel, Action);
1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327 if (!Tied)
1328 NumberOfParts = Action.getNumberOfParts();
1329 return Res;
1330}
1331
1333 bool AtCurrentPoint) {
1334 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1335 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1336
1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338 if (AtCurrentPoint) {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1340 CGF.Builder.GetInsertBlock());
1341 } else {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1343 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1344 }
1345}
1346
1348 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1349 if (Elem.ServiceInsertPt) {
1350 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1351 Elem.ServiceInsertPt = nullptr;
1352 Ptr->eraseFromParent();
1353 }
1354}
1355
1357 SourceLocation Loc,
1358 SmallString<128> &Buffer) {
1359 llvm::raw_svector_ostream OS(Buffer);
1360 // Build debug location
1362 OS << ";";
1363 if (auto *DbgInfo = CGF.getDebugInfo())
1364 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1365 else
1366 OS << PLoc.getFilename();
1367 OS << ";";
1368 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1369 OS << FD->getQualifiedNameAsString();
1370 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1371 return OS.str();
1372}
1373
1375 SourceLocation Loc,
1376 unsigned Flags, bool EmitLoc) {
1377 uint32_t SrcLocStrSize;
1378 llvm::Constant *SrcLocStr;
1379 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1380 llvm::codegenoptions::NoDebugInfo) ||
1381 Loc.isInvalid()) {
1382 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1383 } else {
1384 std::string FunctionName;
1385 std::string FileName;
1386 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1387 FunctionName = FD->getQualifiedNameAsString();
1389 if (auto *DbgInfo = CGF.getDebugInfo())
1390 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1391 else
1392 FileName = PLoc.getFilename();
1393 unsigned Line = PLoc.getLine();
1394 unsigned Column = PLoc.getColumn();
1395 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1396 Column, SrcLocStrSize);
1397 }
1398 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1399 return OMPBuilder.getOrCreateIdent(
1400 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1401}
1402
1404 SourceLocation Loc) {
1405 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1406 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1407 // the clang invariants used below might be broken.
1408 if (CGM.getLangOpts().OpenMPIRBuilder) {
1409 SmallString<128> Buffer;
1410 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1411 uint32_t SrcLocStrSize;
1412 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1413 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1414 return OMPBuilder.getOrCreateThreadID(
1415 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1416 }
1417
1418 llvm::Value *ThreadID = nullptr;
1419 // Check whether we've already cached a load of the thread id in this
1420 // function.
1421 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1422 if (I != OpenMPLocThreadIDMap.end()) {
1423 ThreadID = I->second.ThreadID;
1424 if (ThreadID != nullptr)
1425 return ThreadID;
1426 }
1427 // If exceptions are enabled, do not use parameter to avoid possible crash.
1428 if (auto *OMPRegionInfo =
1429 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1430 if (OMPRegionInfo->getThreadIDVariable()) {
1431 // Check if this an outlined function with thread id passed as argument.
1432 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1433 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1434 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1435 !CGF.getLangOpts().CXXExceptions ||
1436 CGF.Builder.GetInsertBlock() == TopBlock ||
1437 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1438 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1439 TopBlock ||
1440 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1441 CGF.Builder.GetInsertBlock()) {
1442 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1443 // If value loaded in entry block, cache it and use it everywhere in
1444 // function.
1445 if (CGF.Builder.GetInsertBlock() == TopBlock)
1446 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1447 return ThreadID;
1448 }
1449 }
1450 }
1451
1452 // This is not an outlined function region - need to call __kmpc_int32
1453 // kmpc_global_thread_num(ident_t *loc).
1454 // Generate thread id value and cache this value for use across the
1455 // function.
1456 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1457 if (!Elem.ServiceInsertPt)
1459 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1460 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1462 llvm::CallInst *Call = CGF.Builder.CreateCall(
1463 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1464 OMPRTL___kmpc_global_thread_num),
1465 emitUpdateLocation(CGF, Loc));
1466 Call->setCallingConv(CGF.getRuntimeCC());
1467 Elem.ThreadID = Call;
1468 return Call;
1469}
1470
1472 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1473 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1475 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1476 }
1477 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1478 for (const auto *D : I->second)
1479 UDRMap.erase(D);
1480 FunctionUDRMap.erase(I);
1481 }
1482 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1483 for (const auto *D : I->second)
1484 UDMMap.erase(D);
1485 FunctionUDMMap.erase(I);
1486 }
1489}
1490
1492 return OMPBuilder.IdentPtr;
1493}
1494
1495static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1498 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1499 if (!DevTy)
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1501
1502 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1503 case OMPDeclareTargetDeclAttr::DT_Host:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_NoHost:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_Any:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1511 break;
1512 default:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1514 break;
1515 }
1516}
1517
1518static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1521 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1522 if (!MapType)
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1524 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1533 break;
1534 default:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1536 break;
1537 }
1538}
1539
1540static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1541 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1542 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1543
1544 auto FileInfoCallBack = [&]() {
1546 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1547
1548 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1549 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1550
1551 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1552 };
1553
1554 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1555 *CGM.getFileSystem(), ParentName);
1556}
1557
1559 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1560
1561 auto LinkageForVariable = [&VD, this]() {
1562 return CGM.getLLVMLinkageVarDefinition(VD);
1563 };
1564
1565 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1566
1567 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1568 CGM.getContext().getPointerType(VD->getType()));
1569 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1571 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1572 VD->isExternallyVisible(),
1574 VD->getCanonicalDecl()->getBeginLoc()),
1575 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1576 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1577 LinkageForVariable);
1578
1579 if (!addr)
1580 return ConstantAddress::invalid();
1581 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1582}
1583
1584llvm::Constant *
1586 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1587 !CGM.getContext().getTargetInfo().isTLSSupported());
1588 // Lookup the entry, lazily creating it if necessary.
1589 std::string Suffix = getName({"cache", ""});
1590 return OMPBuilder.getOrCreateInternalVariable(
1591 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1592}
1593
1595 const VarDecl *VD,
1596 Address VDAddr,
1597 SourceLocation Loc) {
1598 if (CGM.getLangOpts().OpenMPUseTLS &&
1599 CGM.getContext().getTargetInfo().isTLSSupported())
1600 return VDAddr;
1601
1602 llvm::Type *VarTy = VDAddr.getElementType();
1603 llvm::Value *Args[] = {
1604 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1605 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1606 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1608 return Address(
1609 CGF.EmitRuntimeCall(
1610 OMPBuilder.getOrCreateRuntimeFunction(
1611 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1612 Args),
1613 CGF.Int8Ty, VDAddr.getAlignment());
1614}
1615
1617 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1618 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1619 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1620 // library.
1621 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1622 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1623 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1624 OMPLoc);
1625 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1626 // to register constructor/destructor for variable.
1627 llvm::Value *Args[] = {
1628 OMPLoc,
1629 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1630 Ctor, CopyCtor, Dtor};
1631 CGF.EmitRuntimeCall(
1632 OMPBuilder.getOrCreateRuntimeFunction(
1633 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1634 Args);
1635}
1636
1638 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1639 bool PerformInit, CodeGenFunction *CGF) {
1640 if (CGM.getLangOpts().OpenMPUseTLS &&
1641 CGM.getContext().getTargetInfo().isTLSSupported())
1642 return nullptr;
1643
1644 VD = VD->getDefinition(CGM.getContext());
1645 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1646 QualType ASTTy = VD->getType();
1647
1648 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1649 const Expr *Init = VD->getAnyInitializer();
1650 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1651 // Generate function that re-emits the declaration's initializer into the
1652 // threadprivate copy of the variable VD
1653 CodeGenFunction CtorCGF(CGM);
1654 FunctionArgList Args;
1655 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1656 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1658 Args.push_back(&Dst);
1659
1660 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1661 CGM.getContext().VoidPtrTy, Args);
1662 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1663 std::string Name = getName({"__kmpc_global_ctor_", ""});
1664 llvm::Function *Fn =
1665 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1666 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1667 Args, Loc, Loc);
1668 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1669 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1670 CGM.getContext().VoidPtrTy, Dst.getLocation());
1671 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1672 VDAddr.getAlignment());
1673 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1674 /*IsInitializer=*/true);
1675 ArgVal = CtorCGF.EmitLoadOfScalar(
1676 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1677 CGM.getContext().VoidPtrTy, Dst.getLocation());
1678 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1679 CtorCGF.FinishFunction();
1680 Ctor = Fn;
1681 }
1683 // Generate function that emits destructor call for the threadprivate copy
1684 // of the variable VD
1685 CodeGenFunction DtorCGF(CGM);
1686 FunctionArgList Args;
1687 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1688 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1690 Args.push_back(&Dst);
1691
1692 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1693 CGM.getContext().VoidTy, Args);
1694 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1695 std::string Name = getName({"__kmpc_global_dtor_", ""});
1696 llvm::Function *Fn =
1697 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1698 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1699 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1700 Loc, Loc);
1701 // Create a scope with an artificial location for the body of this function.
1702 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1703 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1704 DtorCGF.GetAddrOfLocalVar(&Dst),
1705 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1706 DtorCGF.emitDestroy(
1707 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1708 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1709 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1710 DtorCGF.FinishFunction();
1711 Dtor = Fn;
1712 }
1713 // Do not emit init function if it is not required.
1714 if (!Ctor && !Dtor)
1715 return nullptr;
1716
1717 // Copying constructor for the threadprivate variable.
1718 // Must be NULL - reserved by runtime, but currently it requires that this
1719 // parameter is always NULL. Otherwise it fires assertion.
1720 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1721 if (Ctor == nullptr) {
1722 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1723 }
1724 if (Dtor == nullptr) {
1725 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1726 }
1727 if (!CGF) {
1728 auto *InitFunctionTy =
1729 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1730 std::string Name = getName({"__omp_threadprivate_init_", ""});
1731 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1732 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1733 CodeGenFunction InitCGF(CGM);
1734 FunctionArgList ArgList;
1735 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1736 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1737 Loc, Loc);
1738 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1739 InitCGF.FinishFunction();
1740 return InitFunction;
1741 }
1742 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 }
1744 return nullptr;
1745}
1746
1748 llvm::GlobalValue *GV) {
1749 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1750 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1751
1752 // We only need to handle active 'indirect' declare target functions.
1753 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1754 return;
1755
1756 // Get a mangled name to store the new device global in.
1757 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1759 SmallString<128> Name;
1760 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1761
1762 // We need to generate a new global to hold the address of the indirectly
1763 // called device function. Doing this allows us to keep the visibility and
1764 // linkage of the associated function unchanged while allowing the runtime to
1765 // access its value.
1766 llvm::GlobalValue *Addr = GV;
1767 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1768 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1769 CGM.getLLVMContext(),
1770 CGM.getModule().getDataLayout().getProgramAddressSpace());
1771 Addr = new llvm::GlobalVariable(
1772 CGM.getModule(), FnPtrTy,
1773 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1774 nullptr, llvm::GlobalValue::NotThreadLocal,
1775 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1776 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1777 }
1778
1779 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1780 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1781 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1782 llvm::GlobalValue::WeakODRLinkage);
1783}
1784
1786 QualType VarType,
1787 StringRef Name) {
1788 std::string Suffix = getName({"artificial", ""});
1789 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1790 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1791 VarLVType, Twine(Name).concat(Suffix).str());
1792 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1793 CGM.getTarget().isTLSSupported()) {
1794 GAddr->setThreadLocal(/*Val=*/true);
1795 return Address(GAddr, GAddr->getValueType(),
1796 CGM.getContext().getTypeAlignInChars(VarType));
1797 }
1798 std::string CacheSuffix = getName({"cache", ""});
1799 llvm::Value *Args[] = {
1802 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1803 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1804 /*isSigned=*/false),
1805 OMPBuilder.getOrCreateInternalVariable(
1806 CGM.VoidPtrPtrTy,
1807 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1808 return Address(
1810 CGF.EmitRuntimeCall(
1811 OMPBuilder.getOrCreateRuntimeFunction(
1812 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1813 Args),
1814 CGF.Builder.getPtrTy(0)),
1815 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1816}
1817
1819 const RegionCodeGenTy &ThenGen,
1820 const RegionCodeGenTy &ElseGen) {
1821 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1822
1823 // If the condition constant folds and can be elided, try to avoid emitting
1824 // the condition and the dead arm of the if/else.
1825 bool CondConstant;
1826 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1827 if (CondConstant)
1828 ThenGen(CGF);
1829 else
1830 ElseGen(CGF);
1831 return;
1832 }
1833
1834 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1835 // emit the conditional branch.
1836 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1837 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1838 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1839 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1840
1841 // Emit the 'then' code.
1842 CGF.EmitBlock(ThenBlock);
1843 ThenGen(CGF);
1844 CGF.EmitBranch(ContBlock);
1845 // Emit the 'else' code if present.
1846 // There is no need to emit line number for unconditional branch.
1848 CGF.EmitBlock(ElseBlock);
1849 ElseGen(CGF);
1850 // There is no need to emit line number for unconditional branch.
1852 CGF.EmitBranch(ContBlock);
1853 // Emit the continuation block for code after the if.
1854 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1855}
1856
1858 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1859 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1860 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1861 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1862 if (!CGF.HaveInsertPoint())
1863 return;
1864 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1865 auto &M = CGM.getModule();
1866 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1867 this](CodeGenFunction &CGF, PrePostActionTy &) {
1868 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1869 llvm::Value *Args[] = {
1870 RTLoc,
1871 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1872 OutlinedFn};
1874 RealArgs.append(std::begin(Args), std::end(Args));
1875 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1876
1877 llvm::FunctionCallee RTLFn =
1878 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1879 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1880 };
1881 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1882 this](CodeGenFunction &CGF, PrePostActionTy &) {
1884 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1885 // Build calls:
1886 // __kmpc_serialized_parallel(&Loc, GTid);
1887 llvm::Value *Args[] = {RTLoc, ThreadID};
1888 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1889 M, OMPRTL___kmpc_serialized_parallel),
1890 Args);
1891
1892 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1893 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1894 RawAddress ZeroAddrBound =
1896 /*Name=*/".bound.zero.addr");
1897 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1899 // ThreadId for serialized parallels is 0.
1900 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1901 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1902 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1903
1904 // Ensure we do not inline the function. This is trivially true for the ones
1905 // passed to __kmpc_fork_call but the ones called in serialized regions
1906 // could be inlined. This is not a perfect but it is closer to the invariant
1907 // we want, namely, every data environment starts with a new function.
1908 // TODO: We should pass the if condition to the runtime function and do the
1909 // handling there. Much cleaner code.
1910 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1911 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1912 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1913
1914 // __kmpc_end_serialized_parallel(&Loc, GTid);
1915 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1916 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1917 M, OMPRTL___kmpc_end_serialized_parallel),
1918 EndArgs);
1919 };
1920 if (IfCond) {
1921 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1922 } else {
1923 RegionCodeGenTy ThenRCG(ThenGen);
1924 ThenRCG(CGF);
1925 }
1926}
1927
1928// If we're inside an (outlined) parallel region, use the region info's
1929// thread-ID variable (it is passed in a first argument of the outlined function
1930// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1931// regular serial code region, get thread ID by calling kmp_int32
1932// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1933// return the address of that temp.
1935 SourceLocation Loc) {
1936 if (auto *OMPRegionInfo =
1937 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1938 if (OMPRegionInfo->getThreadIDVariable())
1939 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1940
1941 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1942 QualType Int32Ty =
1943 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1944 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1945 CGF.EmitStoreOfScalar(ThreadID,
1946 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1947
1948 return ThreadIDTemp;
1949}
1950
1951llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1952 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1953 std::string Name = getName({Prefix, "var"});
1954 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1955}
1956
1957namespace {
1958/// Common pre(post)-action for different OpenMP constructs.
1959class CommonActionTy final : public PrePostActionTy {
1960 llvm::FunctionCallee EnterCallee;
1961 ArrayRef<llvm::Value *> EnterArgs;
1962 llvm::FunctionCallee ExitCallee;
1963 ArrayRef<llvm::Value *> ExitArgs;
1964 bool Conditional;
1965 llvm::BasicBlock *ContBlock = nullptr;
1966
1967public:
1968 CommonActionTy(llvm::FunctionCallee EnterCallee,
1969 ArrayRef<llvm::Value *> EnterArgs,
1970 llvm::FunctionCallee ExitCallee,
1971 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1972 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1973 ExitArgs(ExitArgs), Conditional(Conditional) {}
1974 void Enter(CodeGenFunction &CGF) override {
1975 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1976 if (Conditional) {
1977 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1978 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1979 ContBlock = CGF.createBasicBlock("omp_if.end");
1980 // Generate the branch (If-stmt)
1981 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1982 CGF.EmitBlock(ThenBlock);
1983 }
1984 }
1985 void Done(CodeGenFunction &CGF) {
1986 // Emit the rest of blocks/branches
1987 CGF.EmitBranch(ContBlock);
1988 CGF.EmitBlock(ContBlock, true);
1989 }
1990 void Exit(CodeGenFunction &CGF) override {
1991 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1992 }
1993};
1994} // anonymous namespace
1995
1997 StringRef CriticalName,
1998 const RegionCodeGenTy &CriticalOpGen,
1999 SourceLocation Loc, const Expr *Hint) {
2000 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2001 // CriticalOpGen();
2002 // __kmpc_end_critical(ident_t *, gtid, Lock);
2003 // Prepare arguments and build a call to __kmpc_critical
2004 if (!CGF.HaveInsertPoint())
2005 return;
2006 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2007 CGM.getModule(),
2008 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2009 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2010 unsigned LockVarArgIdx = 2;
2011 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2012 RuntimeFcn.getFunctionType()
2013 ->getParamType(LockVarArgIdx)
2014 ->getPointerAddressSpace())
2015 LockVar = CGF.Builder.CreateAddrSpaceCast(
2016 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2017 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2018 LockVar};
2019 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2020 std::end(Args));
2021 if (Hint) {
2022 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2023 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2024 }
2025 CommonActionTy Action(RuntimeFcn, EnterArgs,
2026 OMPBuilder.getOrCreateRuntimeFunction(
2027 CGM.getModule(), OMPRTL___kmpc_end_critical),
2028 Args);
2029 CriticalOpGen.setAction(Action);
2030 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2031}
2032
2034 const RegionCodeGenTy &MasterOpGen,
2035 SourceLocation Loc) {
2036 if (!CGF.HaveInsertPoint())
2037 return;
2038 // if(__kmpc_master(ident_t *, gtid)) {
2039 // MasterOpGen();
2040 // __kmpc_end_master(ident_t *, gtid);
2041 // }
2042 // Prepare arguments and build a call to __kmpc_master
2043 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2044 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2045 CGM.getModule(), OMPRTL___kmpc_master),
2046 Args,
2047 OMPBuilder.getOrCreateRuntimeFunction(
2048 CGM.getModule(), OMPRTL___kmpc_end_master),
2049 Args,
2050 /*Conditional=*/true);
2051 MasterOpGen.setAction(Action);
2052 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2053 Action.Done(CGF);
2054}
2055
2057 const RegionCodeGenTy &MaskedOpGen,
2058 SourceLocation Loc, const Expr *Filter) {
2059 if (!CGF.HaveInsertPoint())
2060 return;
2061 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2062 // MaskedOpGen();
2063 // __kmpc_end_masked(iden_t *, gtid);
2064 // }
2065 // Prepare arguments and build a call to __kmpc_masked
2066 llvm::Value *FilterVal = Filter
2067 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2068 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2069 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2070 FilterVal};
2071 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2072 getThreadID(CGF, Loc)};
2073 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2074 CGM.getModule(), OMPRTL___kmpc_masked),
2075 Args,
2076 OMPBuilder.getOrCreateRuntimeFunction(
2077 CGM.getModule(), OMPRTL___kmpc_end_masked),
2078 ArgsEnd,
2079 /*Conditional=*/true);
2080 MaskedOpGen.setAction(Action);
2081 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2082 Action.Done(CGF);
2083}
2084
2086 SourceLocation Loc) {
2087 if (!CGF.HaveInsertPoint())
2088 return;
2089 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2090 OMPBuilder.createTaskyield(CGF.Builder);
2091 } else {
2092 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2093 llvm::Value *Args[] = {
2094 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2095 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2096 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2097 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2098 Args);
2099 }
2100
2101 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2102 Region->emitUntiedSwitch(CGF);
2103}
2104
2106 const RegionCodeGenTy &TaskgroupOpGen,
2107 SourceLocation Loc) {
2108 if (!CGF.HaveInsertPoint())
2109 return;
2110 // __kmpc_taskgroup(ident_t *, gtid);
2111 // TaskgroupOpGen();
2112 // __kmpc_end_taskgroup(ident_t *, gtid);
2113 // Prepare arguments and build a call to __kmpc_taskgroup
2114 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2115 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2116 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2117 Args,
2118 OMPBuilder.getOrCreateRuntimeFunction(
2119 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2120 Args);
2121 TaskgroupOpGen.setAction(Action);
2122 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2123}
2124
2125/// Given an array of pointers to variables, project the address of a
2126/// given variable.
2128 unsigned Index, const VarDecl *Var) {
2129 // Pull out the pointer to the variable.
2130 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2131 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2132
2133 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2134 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2135}
2136
2138 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2139 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2140 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2141 SourceLocation Loc) {
2142 ASTContext &C = CGM.getContext();
2143 // void copy_func(void *LHSArg, void *RHSArg);
2144 FunctionArgList Args;
2145 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2147 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2149 Args.push_back(&LHSArg);
2150 Args.push_back(&RHSArg);
2151 const auto &CGFI =
2152 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2153 std::string Name =
2154 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2155 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2156 llvm::GlobalValue::InternalLinkage, Name,
2157 &CGM.getModule());
2159 Fn->setDoesNotRecurse();
2160 CodeGenFunction CGF(CGM);
2161 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2162 // Dest = (void*[n])(LHSArg);
2163 // Src = (void*[n])(RHSArg);
2165 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2166 CGF.Builder.getPtrTy(0)),
2167 ArgsElemType, CGF.getPointerAlign());
2169 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2170 CGF.Builder.getPtrTy(0)),
2171 ArgsElemType, CGF.getPointerAlign());
2172 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2173 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2174 // ...
2175 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2176 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2177 const auto *DestVar =
2178 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2179 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2180
2181 const auto *SrcVar =
2182 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2183 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2184
2185 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2186 QualType Type = VD->getType();
2187 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2188 }
2189 CGF.FinishFunction();
2190 return Fn;
2191}
2192
2194 const RegionCodeGenTy &SingleOpGen,
2195 SourceLocation Loc,
2196 ArrayRef<const Expr *> CopyprivateVars,
2197 ArrayRef<const Expr *> SrcExprs,
2198 ArrayRef<const Expr *> DstExprs,
2199 ArrayRef<const Expr *> AssignmentOps) {
2200 if (!CGF.HaveInsertPoint())
2201 return;
2202 assert(CopyprivateVars.size() == SrcExprs.size() &&
2203 CopyprivateVars.size() == DstExprs.size() &&
2204 CopyprivateVars.size() == AssignmentOps.size());
2205 ASTContext &C = CGM.getContext();
2206 // int32 did_it = 0;
2207 // if(__kmpc_single(ident_t *, gtid)) {
2208 // SingleOpGen();
2209 // __kmpc_end_single(ident_t *, gtid);
2210 // did_it = 1;
2211 // }
2212 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2213 // <copy_func>, did_it);
2214
2215 Address DidIt = Address::invalid();
2216 if (!CopyprivateVars.empty()) {
2217 // int32 did_it = 0;
2218 QualType KmpInt32Ty =
2219 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2220 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2221 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2222 }
2223 // Prepare arguments and build a call to __kmpc_single
2224 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2225 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2226 CGM.getModule(), OMPRTL___kmpc_single),
2227 Args,
2228 OMPBuilder.getOrCreateRuntimeFunction(
2229 CGM.getModule(), OMPRTL___kmpc_end_single),
2230 Args,
2231 /*Conditional=*/true);
2232 SingleOpGen.setAction(Action);
2233 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2234 if (DidIt.isValid()) {
2235 // did_it = 1;
2236 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2237 }
2238 Action.Done(CGF);
2239 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2240 // <copy_func>, did_it);
2241 if (DidIt.isValid()) {
2242 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2243 QualType CopyprivateArrayTy = C.getConstantArrayType(
2244 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2245 /*IndexTypeQuals=*/0);
2246 // Create a list of all private variables for copyprivate.
2247 Address CopyprivateList =
2248 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2249 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2250 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2251 CGF.Builder.CreateStore(
2253 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2254 CGF.VoidPtrTy),
2255 Elem);
2256 }
2257 // Build function that copies private values from single region to all other
2258 // threads in the corresponding parallel region.
2259 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2260 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2261 SrcExprs, DstExprs, AssignmentOps, Loc);
2262 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2264 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2265 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2266 llvm::Value *Args[] = {
2267 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2268 getThreadID(CGF, Loc), // i32 <gtid>
2269 BufSize, // size_t <buf_size>
2270 CL.emitRawPointer(CGF), // void *<copyprivate list>
2271 CpyFn, // void (*) (void *, void *) <copy_func>
2272 DidItVal // i32 did_it
2273 };
2274 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2275 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2276 Args);
2277 }
2278}
2279
2281 const RegionCodeGenTy &OrderedOpGen,
2282 SourceLocation Loc, bool IsThreads) {
2283 if (!CGF.HaveInsertPoint())
2284 return;
2285 // __kmpc_ordered(ident_t *, gtid);
2286 // OrderedOpGen();
2287 // __kmpc_end_ordered(ident_t *, gtid);
2288 // Prepare arguments and build a call to __kmpc_ordered
2289 if (IsThreads) {
2290 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2291 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2292 CGM.getModule(), OMPRTL___kmpc_ordered),
2293 Args,
2294 OMPBuilder.getOrCreateRuntimeFunction(
2295 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2296 Args);
2297 OrderedOpGen.setAction(Action);
2298 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2299 return;
2300 }
2301 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2302}
2303
2305 unsigned Flags;
2306 if (Kind == OMPD_for)
2307 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2308 else if (Kind == OMPD_sections)
2309 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2310 else if (Kind == OMPD_single)
2311 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2312 else if (Kind == OMPD_barrier)
2313 Flags = OMP_IDENT_BARRIER_EXPL;
2314 else
2315 Flags = OMP_IDENT_BARRIER_IMPL;
2316 return Flags;
2317}
2318
2320 CodeGenFunction &CGF, const OMPLoopDirective &S,
2321 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2322 // Check if the loop directive is actually a doacross loop directive. In this
2323 // case choose static, 1 schedule.
2324 if (llvm::any_of(
2325 S.getClausesOfKind<OMPOrderedClause>(),
2326 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2327 ScheduleKind = OMPC_SCHEDULE_static;
2328 // Chunk size is 1 in this case.
2329 llvm::APInt ChunkSize(32, 1);
2330 ChunkExpr = IntegerLiteral::Create(
2331 CGF.getContext(), ChunkSize,
2332 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2333 SourceLocation());
2334 }
2335}
2336
2338 OpenMPDirectiveKind Kind, bool EmitChecks,
2339 bool ForceSimpleCall) {
2340 // Check if we should use the OMPBuilder
2341 auto *OMPRegionInfo =
2342 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2343 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2344 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2345 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2346 EmitChecks));
2347 CGF.Builder.restoreIP(AfterIP);
2348 return;
2349 }
2350
2351 if (!CGF.HaveInsertPoint())
2352 return;
2353 // Build call __kmpc_cancel_barrier(loc, thread_id);
2354 // Build call __kmpc_barrier(loc, thread_id);
2355 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2356 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2357 // thread_id);
2358 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2359 getThreadID(CGF, Loc)};
2360 if (OMPRegionInfo) {
2361 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2362 llvm::Value *Result = CGF.EmitRuntimeCall(
2363 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2364 OMPRTL___kmpc_cancel_barrier),
2365 Args);
2366 if (EmitChecks) {
2367 // if (__kmpc_cancel_barrier()) {
2368 // exit from construct;
2369 // }
2370 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2371 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2372 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2373 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2374 CGF.EmitBlock(ExitBB);
2375 // exit from construct;
2376 CodeGenFunction::JumpDest CancelDestination =
2377 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2378 CGF.EmitBranchThroughCleanup(CancelDestination);
2379 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2380 }
2381 return;
2382 }
2383 }
2384 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2385 CGM.getModule(), OMPRTL___kmpc_barrier),
2386 Args);
2387}
2388
2390 Expr *ME, bool IsFatal) {
2391 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2392 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2393 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2394 // *message)
2395 llvm::Value *Args[] = {
2396 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2397 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2398 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2399 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2400 CGM.getModule(), OMPRTL___kmpc_error),
2401 Args);
2402}
2403
2404/// Map the OpenMP loop schedule to the runtime enumeration.
2405static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2406 bool Chunked, bool Ordered) {
2407 switch (ScheduleKind) {
2408 case OMPC_SCHEDULE_static:
2409 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2410 : (Ordered ? OMP_ord_static : OMP_sch_static);
2411 case OMPC_SCHEDULE_dynamic:
2412 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2413 case OMPC_SCHEDULE_guided:
2414 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2415 case OMPC_SCHEDULE_runtime:
2416 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2417 case OMPC_SCHEDULE_auto:
2418 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2420 assert(!Chunked && "chunk was specified but schedule kind not known");
2421 return Ordered ? OMP_ord_static : OMP_sch_static;
2422 }
2423 llvm_unreachable("Unexpected runtime schedule");
2424}
2425
2426/// Map the OpenMP distribute schedule to the runtime enumeration.
2427static OpenMPSchedType
2429 // only static is allowed for dist_schedule
2430 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2431}
2432
2434 bool Chunked) const {
2435 OpenMPSchedType Schedule =
2436 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2437 return Schedule == OMP_sch_static;
2438}
2439
2441 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2442 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2443 return Schedule == OMP_dist_sch_static;
2444}
2445
2447 bool Chunked) const {
2448 OpenMPSchedType Schedule =
2449 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2450 return Schedule == OMP_sch_static_chunked;
2451}
2452
2454 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2455 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2456 return Schedule == OMP_dist_sch_static_chunked;
2457}
2458
2460 OpenMPSchedType Schedule =
2461 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2462 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2463 return Schedule != OMP_sch_static;
2464}
2465
2466static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2469 int Modifier = 0;
2470 switch (M1) {
2471 case OMPC_SCHEDULE_MODIFIER_monotonic:
2472 Modifier = OMP_sch_modifier_monotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475 Modifier = OMP_sch_modifier_nonmonotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_simd:
2478 if (Schedule == OMP_sch_static_chunked)
2479 Schedule = OMP_sch_static_balanced_chunked;
2480 break;
2483 break;
2484 }
2485 switch (M2) {
2486 case OMPC_SCHEDULE_MODIFIER_monotonic:
2487 Modifier = OMP_sch_modifier_monotonic;
2488 break;
2489 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2490 Modifier = OMP_sch_modifier_nonmonotonic;
2491 break;
2492 case OMPC_SCHEDULE_MODIFIER_simd:
2493 if (Schedule == OMP_sch_static_chunked)
2494 Schedule = OMP_sch_static_balanced_chunked;
2495 break;
2498 break;
2499 }
2500 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2501 // If the static schedule kind is specified or if the ordered clause is
2502 // specified, and if the nonmonotonic modifier is not specified, the effect is
2503 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2504 // modifier is specified, the effect is as if the nonmonotonic modifier is
2505 // specified.
2506 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2507 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2508 Schedule == OMP_sch_static_balanced_chunked ||
2509 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2510 Schedule == OMP_dist_sch_static_chunked ||
2511 Schedule == OMP_dist_sch_static))
2512 Modifier = OMP_sch_modifier_nonmonotonic;
2513 }
2514 return Schedule | Modifier;
2515}
2516
2519 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2520 bool Ordered, const DispatchRTInput &DispatchValues) {
2521 if (!CGF.HaveInsertPoint())
2522 return;
2523 OpenMPSchedType Schedule = getRuntimeSchedule(
2524 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2525 assert(Ordered ||
2526 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2527 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2528 Schedule != OMP_sch_static_balanced_chunked));
2529 // Call __kmpc_dispatch_init(
2530 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2531 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2532 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2533
2534 // If the Chunk was not specified in the clause - use default value 1.
2535 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2536 : CGF.Builder.getIntN(IVSize, 1);
2537 llvm::Value *Args[] = {
2538 emitUpdateLocation(CGF, Loc),
2539 getThreadID(CGF, Loc),
2540 CGF.Builder.getInt32(addMonoNonMonoModifier(
2541 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2542 DispatchValues.LB, // Lower
2543 DispatchValues.UB, // Upper
2544 CGF.Builder.getIntN(IVSize, 1), // Stride
2545 Chunk // Chunk
2546 };
2547 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2548 Args);
2549}
2550
2552 SourceLocation Loc) {
2553 if (!CGF.HaveInsertPoint())
2554 return;
2555 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2556 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2557 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2558}
2559
2561 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2562 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2564 const CGOpenMPRuntime::StaticRTInput &Values) {
2565 if (!CGF.HaveInsertPoint())
2566 return;
2567
2568 assert(!Values.Ordered);
2569 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2570 Schedule == OMP_sch_static_balanced_chunked ||
2571 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2572 Schedule == OMP_dist_sch_static ||
2573 Schedule == OMP_dist_sch_static_chunked);
2574
2575 // Call __kmpc_for_static_init(
2576 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2577 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2578 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2579 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2580 llvm::Value *Chunk = Values.Chunk;
2581 if (Chunk == nullptr) {
2582 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2583 Schedule == OMP_dist_sch_static) &&
2584 "expected static non-chunked schedule");
2585 // If the Chunk was not specified in the clause - use default value 1.
2586 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2587 } else {
2588 assert((Schedule == OMP_sch_static_chunked ||
2589 Schedule == OMP_sch_static_balanced_chunked ||
2590 Schedule == OMP_ord_static_chunked ||
2591 Schedule == OMP_dist_sch_static_chunked) &&
2592 "expected static chunked schedule");
2593 }
2594 llvm::Value *Args[] = {
2595 UpdateLocation,
2596 ThreadId,
2597 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2598 M2)), // Schedule type
2599 Values.IL.emitRawPointer(CGF), // &isLastIter
2600 Values.LB.emitRawPointer(CGF), // &LB
2601 Values.UB.emitRawPointer(CGF), // &UB
2602 Values.ST.emitRawPointer(CGF), // &Stride
2603 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2604 Chunk // Chunk
2605 };
2606 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2607}
2608
2610 SourceLocation Loc,
2611 OpenMPDirectiveKind DKind,
2612 const OpenMPScheduleTy &ScheduleKind,
2613 const StaticRTInput &Values) {
2614 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2615 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2616 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2617 "Expected loop-based or sections-based directive.");
2618 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2620 ? OMP_IDENT_WORK_LOOP
2621 : OMP_IDENT_WORK_SECTIONS);
2622 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2623 llvm::FunctionCallee StaticInitFunction =
2624 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2625 false);
2627 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2628 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2629}
2630
2634 const CGOpenMPRuntime::StaticRTInput &Values) {
2635 OpenMPSchedType ScheduleNum =
2636 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2637 llvm::Value *UpdatedLocation =
2638 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2639 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2640 llvm::FunctionCallee StaticInitFunction;
2641 bool isGPUDistribute =
2642 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2643 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2644 Values.IVSize, Values.IVSigned, isGPUDistribute);
2645
2646 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2647 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2649}
2650
2652 SourceLocation Loc,
2653 OpenMPDirectiveKind DKind) {
2654 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2655 DKind == OMPD_sections) &&
2656 "Expected distribute, for, or sections directive kind");
2657 if (!CGF.HaveInsertPoint())
2658 return;
2659 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2660 llvm::Value *Args[] = {
2661 emitUpdateLocation(CGF, Loc,
2663 (DKind == OMPD_target_teams_loop)
2664 ? OMP_IDENT_WORK_DISTRIBUTE
2665 : isOpenMPLoopDirective(DKind)
2666 ? OMP_IDENT_WORK_LOOP
2667 : OMP_IDENT_WORK_SECTIONS),
2668 getThreadID(CGF, Loc)};
2670 if (isOpenMPDistributeDirective(DKind) &&
2671 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2672 CGF.EmitRuntimeCall(
2673 OMPBuilder.getOrCreateRuntimeFunction(
2674 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2675 Args);
2676 else
2677 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2678 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2679 Args);
2680}
2681
2683 SourceLocation Loc,
2684 unsigned IVSize,
2685 bool IVSigned) {
2686 if (!CGF.HaveInsertPoint())
2687 return;
2688 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2689 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2690 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2691 Args);
2692}
2693
2695 SourceLocation Loc, unsigned IVSize,
2696 bool IVSigned, Address IL,
2697 Address LB, Address UB,
2698 Address ST) {
2699 // Call __kmpc_dispatch_next(
2700 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2701 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2702 // kmp_int[32|64] *p_stride);
2703 llvm::Value *Args[] = {
2704 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2705 IL.emitRawPointer(CGF), // &isLastIter
2706 LB.emitRawPointer(CGF), // &Lower
2707 UB.emitRawPointer(CGF), // &Upper
2708 ST.emitRawPointer(CGF) // &Stride
2709 };
2710 llvm::Value *Call = CGF.EmitRuntimeCall(
2711 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2712 return CGF.EmitScalarConversion(
2713 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2714 CGF.getContext().BoolTy, Loc);
2715}
2716
2718 const Expr *Message,
2719 SourceLocation Loc) {
2720 if (!Message)
2721 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2722 return CGF.EmitScalarExpr(Message);
2723}
2724
2725llvm::Value *
2727 SourceLocation Loc) {
2728 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2729 // as if sev-level is fatal."
2730 return llvm::ConstantInt::get(CGM.Int32Ty,
2731 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2732}
2733
2735 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2737 SourceLocation SeverityLoc, const Expr *Message,
2738 SourceLocation MessageLoc) {
2739 if (!CGF.HaveInsertPoint())
2740 return;
2742 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2743 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2744 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2745 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2746 // messsage) if strict modifier is used.
2747 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2748 if (Modifier == OMPC_NUMTHREADS_strict) {
2749 FnID = OMPRTL___kmpc_push_num_threads_strict;
2750 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2751 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2752 }
2753 CGF.EmitRuntimeCall(
2754 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2755}
2756
2758 ProcBindKind ProcBind,
2759 SourceLocation Loc) {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2763 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2764 llvm::Value *Args[] = {
2765 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2766 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2767 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2768 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2769 Args);
2770}
2771
2773 SourceLocation Loc, llvm::AtomicOrdering AO) {
2774 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2775 OMPBuilder.createFlush(CGF.Builder);
2776 } else {
2777 if (!CGF.HaveInsertPoint())
2778 return;
2779 // Build call void __kmpc_flush(ident_t *loc)
2780 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2781 CGM.getModule(), OMPRTL___kmpc_flush),
2782 emitUpdateLocation(CGF, Loc));
2783 }
2784}
2785
2786namespace {
2787/// Indexes of fields for type kmp_task_t.
2788enum KmpTaskTFields {
2789 /// List of shared variables.
2790 KmpTaskTShareds,
2791 /// Task routine.
2792 KmpTaskTRoutine,
2793 /// Partition id for the untied tasks.
2794 KmpTaskTPartId,
2795 /// Function with call of destructors for private variables.
2796 Data1,
2797 /// Task priority.
2798 Data2,
2799 /// (Taskloops only) Lower bound.
2800 KmpTaskTLowerBound,
2801 /// (Taskloops only) Upper bound.
2802 KmpTaskTUpperBound,
2803 /// (Taskloops only) Stride.
2804 KmpTaskTStride,
2805 /// (Taskloops only) Is last iteration flag.
2806 KmpTaskTLastIter,
2807 /// (Taskloops only) Reduction data.
2808 KmpTaskTReductions,
2809};
2810} // anonymous namespace
2811
2813 // If we are in simd mode or there are no entries, we don't need to do
2814 // anything.
2815 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2816 return;
2817
2818 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2819 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2820 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2821 SourceLocation Loc;
2822 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2823 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2824 E = CGM.getContext().getSourceManager().fileinfo_end();
2825 I != E; ++I) {
2826 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2827 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2828 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2829 I->getFirst(), EntryInfo.Line, 1);
2830 break;
2831 }
2832 }
2833 }
2834 switch (Kind) {
2835 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2836 CGM.getDiags().Report(Loc,
2837 diag::err_target_region_offloading_entry_incorrect)
2838 << EntryInfo.ParentName;
2839 } break;
2840 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2841 CGM.getDiags().Report(
2842 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2843 << EntryInfo.ParentName;
2844 } break;
2845 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2846 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2847 } break;
2848 }
2849 };
2850
2851 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2852}
2853
2855 if (!KmpRoutineEntryPtrTy) {
2856 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2857 ASTContext &C = CGM.getContext();
2858 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2860 KmpRoutineEntryPtrQTy = C.getPointerType(
2861 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2862 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2863 }
2864}
2865
2866namespace {
2867struct PrivateHelpersTy {
2868 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2869 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2870 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2871 PrivateElemInit(PrivateElemInit) {}
2872 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2873 const Expr *OriginalRef = nullptr;
2874 const VarDecl *Original = nullptr;
2875 const VarDecl *PrivateCopy = nullptr;
2876 const VarDecl *PrivateElemInit = nullptr;
2877 bool isLocalPrivate() const {
2878 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2879 }
2880};
2881typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2882} // anonymous namespace
2883
2884static bool isAllocatableDecl(const VarDecl *VD) {
2885 const VarDecl *CVD = VD->getCanonicalDecl();
2886 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2887 return false;
2888 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2889 // Use the default allocation.
2890 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2891 !AA->getAllocator());
2892}
2893
2894static RecordDecl *
2896 if (!Privates.empty()) {
2897 ASTContext &C = CGM.getContext();
2898 // Build struct .kmp_privates_t. {
2899 // /* private vars */
2900 // };
2901 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2902 RD->startDefinition();
2903 for (const auto &Pair : Privates) {
2904 const VarDecl *VD = Pair.second.Original;
2906 // If the private variable is a local variable with lvalue ref type,
2907 // allocate the pointer instead of the pointee type.
2908 if (Pair.second.isLocalPrivate()) {
2909 if (VD->getType()->isLValueReferenceType())
2910 Type = C.getPointerType(Type);
2911 if (isAllocatableDecl(VD))
2912 Type = C.getPointerType(Type);
2913 }
2915 if (VD->hasAttrs()) {
2916 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2917 E(VD->getAttrs().end());
2918 I != E; ++I)
2919 FD->addAttr(*I);
2920 }
2921 }
2922 RD->completeDefinition();
2923 return RD;
2924 }
2925 return nullptr;
2926}
2927
2928static RecordDecl *
2930 QualType KmpInt32Ty,
2931 QualType KmpRoutineEntryPointerQTy) {
2932 ASTContext &C = CGM.getContext();
2933 // Build struct kmp_task_t {
2934 // void * shareds;
2935 // kmp_routine_entry_t routine;
2936 // kmp_int32 part_id;
2937 // kmp_cmplrdata_t data1;
2938 // kmp_cmplrdata_t data2;
2939 // For taskloops additional fields:
2940 // kmp_uint64 lb;
2941 // kmp_uint64 ub;
2942 // kmp_int64 st;
2943 // kmp_int32 liter;
2944 // void * reductions;
2945 // };
2946 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2947 UD->startDefinition();
2948 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2949 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2950 UD->completeDefinition();
2951 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2952 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2953 RD->startDefinition();
2954 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2955 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2956 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2957 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2958 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2959 if (isOpenMPTaskLoopDirective(Kind)) {
2960 QualType KmpUInt64Ty =
2961 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2962 QualType KmpInt64Ty =
2963 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2964 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2965 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2966 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2967 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2968 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2969 }
2970 RD->completeDefinition();
2971 return RD;
2972}
2973
2974static RecordDecl *
2976 ArrayRef<PrivateDataTy> Privates) {
2977 ASTContext &C = CGM.getContext();
2978 // Build struct kmp_task_t_with_privates {
2979 // kmp_task_t task_data;
2980 // .kmp_privates_t. privates;
2981 // };
2982 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2983 RD->startDefinition();
2984 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2985 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2986 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2987 RD->completeDefinition();
2988 return RD;
2989}
2990
2991/// Emit a proxy function which accepts kmp_task_t as the second
2992/// argument.
2993/// \code
2994/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2995/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2996/// For taskloops:
2997/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2998/// tt->reductions, tt->shareds);
2999/// return 0;
3000/// }
3001/// \endcode
3002static llvm::Function *
3004 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3005 QualType KmpTaskTWithPrivatesPtrQTy,
3006 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3007 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3008 llvm::Value *TaskPrivatesMap) {
3009 ASTContext &C = CGM.getContext();
3010 FunctionArgList Args;
3011 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3013 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3014 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3016 Args.push_back(&GtidArg);
3017 Args.push_back(&TaskTypeArg);
3018 const auto &TaskEntryFnInfo =
3019 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3020 llvm::FunctionType *TaskEntryTy =
3021 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3022 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3023 auto *TaskEntry = llvm::Function::Create(
3024 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3025 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3026 TaskEntry->setDoesNotRecurse();
3027 CodeGenFunction CGF(CGM);
3028 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3029 Loc, Loc);
3030
3031 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3032 // tt,
3033 // For taskloops:
3034 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3035 // tt->task_data.shareds);
3036 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3037 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3038 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3039 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3040 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3041 const auto *KmpTaskTWithPrivatesQTyRD =
3042 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3043 LValue Base =
3044 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3045 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3046 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3047 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3048 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3049
3050 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3051 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3052 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3053 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3054 CGF.ConvertTypeForMem(SharedsPtrTy));
3055
3056 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3057 llvm::Value *PrivatesParam;
3058 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3059 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3060 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3061 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3062 } else {
3063 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3064 }
3065
3066 llvm::Value *CommonArgs[] = {
3067 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3068 CGF.Builder
3069 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3070 CGF.VoidPtrTy, CGF.Int8Ty)
3071 .emitRawPointer(CGF)};
3072 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3073 std::end(CommonArgs));
3074 if (isOpenMPTaskLoopDirective(Kind)) {
3075 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3076 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3077 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3078 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3079 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3080 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3081 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3082 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3083 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3084 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3085 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3086 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3087 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3088 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3089 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3090 CallArgs.push_back(LBParam);
3091 CallArgs.push_back(UBParam);
3092 CallArgs.push_back(StParam);
3093 CallArgs.push_back(LIParam);
3094 CallArgs.push_back(RParam);
3095 }
3096 CallArgs.push_back(SharedsParam);
3097
3098 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3099 CallArgs);
3100 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3101 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3102 CGF.FinishFunction();
3103 return TaskEntry;
3104}
3105
3107 SourceLocation Loc,
3108 QualType KmpInt32Ty,
3109 QualType KmpTaskTWithPrivatesPtrQTy,
3110 QualType KmpTaskTWithPrivatesQTy) {
3111 ASTContext &C = CGM.getContext();
3112 FunctionArgList Args;
3113 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3115 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3116 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3118 Args.push_back(&GtidArg);
3119 Args.push_back(&TaskTypeArg);
3120 const auto &DestructorFnInfo =
3121 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3122 llvm::FunctionType *DestructorFnTy =
3123 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3124 std::string Name =
3125 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3126 auto *DestructorFn =
3127 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3128 Name, &CGM.getModule());
3129 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3130 DestructorFnInfo);
3131 DestructorFn->setDoesNotRecurse();
3132 CodeGenFunction CGF(CGM);
3133 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3134 Args, Loc, Loc);
3135
3136 LValue Base = CGF.EmitLoadOfPointerLValue(
3137 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3138 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3139 const auto *KmpTaskTWithPrivatesQTyRD =
3140 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3141 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3142 Base = CGF.EmitLValueForField(Base, *FI);
3143 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3144 if (QualType::DestructionKind DtorKind =
3145 Field->getType().isDestructedType()) {
3146 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3147 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3148 }
3149 }
3150 CGF.FinishFunction();
3151 return DestructorFn;
3152}
3153
3154/// Emit a privates mapping function for correct handling of private and
3155/// firstprivate variables.
3156/// \code
3157/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3158/// **noalias priv1,..., <tyn> **noalias privn) {
3159/// *priv1 = &.privates.priv1;
3160/// ...;
3161/// *privn = &.privates.privn;
3162/// }
3163/// \endcode
3164static llvm::Value *
3166 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3167 ArrayRef<PrivateDataTy> Privates) {
3168 ASTContext &C = CGM.getContext();
3169 FunctionArgList Args;
3170 ImplicitParamDecl TaskPrivatesArg(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3174 Args.push_back(&TaskPrivatesArg);
3175 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3176 unsigned Counter = 1;
3177 for (const Expr *E : Data.PrivateVars) {
3178 Args.push_back(ImplicitParamDecl::Create(
3179 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3180 C.getPointerType(C.getPointerType(E->getType()))
3181 .withConst()
3182 .withRestrict(),
3184 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3185 PrivateVarsPos[VD] = Counter;
3186 ++Counter;
3187 }
3188 for (const Expr *E : Data.FirstprivateVars) {
3189 Args.push_back(ImplicitParamDecl::Create(
3190 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3191 C.getPointerType(C.getPointerType(E->getType()))
3192 .withConst()
3193 .withRestrict(),
3195 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3196 PrivateVarsPos[VD] = Counter;
3197 ++Counter;
3198 }
3199 for (const Expr *E : Data.LastprivateVars) {
3200 Args.push_back(ImplicitParamDecl::Create(
3201 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3202 C.getPointerType(C.getPointerType(E->getType()))
3203 .withConst()
3204 .withRestrict(),
3206 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3207 PrivateVarsPos[VD] = Counter;
3208 ++Counter;
3209 }
3210 for (const VarDecl *VD : Data.PrivateLocals) {
3212 if (VD->getType()->isLValueReferenceType())
3213 Ty = C.getPointerType(Ty);
3214 if (isAllocatableDecl(VD))
3215 Ty = C.getPointerType(Ty);
3216 Args.push_back(ImplicitParamDecl::Create(
3217 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3218 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3220 PrivateVarsPos[VD] = Counter;
3221 ++Counter;
3222 }
3223 const auto &TaskPrivatesMapFnInfo =
3224 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3225 llvm::FunctionType *TaskPrivatesMapTy =
3226 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3227 std::string Name =
3228 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3229 auto *TaskPrivatesMap = llvm::Function::Create(
3230 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3231 &CGM.getModule());
3232 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3233 TaskPrivatesMapFnInfo);
3234 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3235 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3236 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3237 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3238 }
3239 CodeGenFunction CGF(CGM);
3240 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3241 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3242
3243 // *privi = &.privates.privi;
3244 LValue Base = CGF.EmitLoadOfPointerLValue(
3245 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3246 TaskPrivatesArg.getType()->castAs<PointerType>());
3247 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3248 Counter = 0;
3249 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3250 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3251 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3252 LValue RefLVal =
3253 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3254 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3255 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3256 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3257 ++Counter;
3258 }
3259 CGF.FinishFunction();
3260 return TaskPrivatesMap;
3261}
3262
3263/// Emit initialization for private variables in task-based directives.
3265 const OMPExecutableDirective &D,
3266 Address KmpTaskSharedsPtr, LValue TDBase,
3267 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3268 QualType SharedsTy, QualType SharedsPtrTy,
3269 const OMPTaskDataTy &Data,
3270 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3271 ASTContext &C = CGF.getContext();
3272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3273 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3274 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3275 ? OMPD_taskloop
3276 : OMPD_task;
3277 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3278 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3279 LValue SrcBase;
3280 bool IsTargetTask =
3281 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3282 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3283 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3284 // PointersArray, SizesArray, and MappersArray. The original variables for
3285 // these arrays are not captured and we get their addresses explicitly.
3286 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3287 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3288 SrcBase = CGF.MakeAddrLValue(
3290 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3291 CGF.ConvertTypeForMem(SharedsTy)),
3292 SharedsTy);
3293 }
3294 FI = FI->getType()->castAsRecordDecl()->field_begin();
3295 for (const PrivateDataTy &Pair : Privates) {
3296 // Do not initialize private locals.
3297 if (Pair.second.isLocalPrivate()) {
3298 ++FI;
3299 continue;
3300 }
3301 const VarDecl *VD = Pair.second.PrivateCopy;
3302 const Expr *Init = VD->getAnyInitializer();
3303 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3304 !CGF.isTrivialInitializer(Init)))) {
3305 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3306 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3307 const VarDecl *OriginalVD = Pair.second.Original;
3308 // Check if the variable is the target-based BasePointersArray,
3309 // PointersArray, SizesArray, or MappersArray.
3310 LValue SharedRefLValue;
3311 QualType Type = PrivateLValue.getType();
3312 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3313 if (IsTargetTask && !SharedField) {
3314 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3315 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3316 cast<CapturedDecl>(OriginalVD->getDeclContext())
3317 ->getNumParams() == 0 &&
3319 cast<CapturedDecl>(OriginalVD->getDeclContext())
3320 ->getDeclContext()) &&
3321 "Expected artificial target data variable.");
3322 SharedRefLValue =
3323 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3324 } else if (ForDup) {
3325 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3326 SharedRefLValue = CGF.MakeAddrLValue(
3327 SharedRefLValue.getAddress().withAlignment(
3328 C.getDeclAlign(OriginalVD)),
3329 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3330 SharedRefLValue.getTBAAInfo());
3331 } else if (CGF.LambdaCaptureFields.count(
3332 Pair.second.Original->getCanonicalDecl()) > 0 ||
3333 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3334 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3335 } else {
3336 // Processing for implicitly captured variables.
3337 InlinedOpenMPRegionRAII Region(
3338 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3339 /*HasCancel=*/false, /*NoInheritance=*/true);
3340 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3341 }
3342 if (Type->isArrayType()) {
3343 // Initialize firstprivate array.
3345 // Perform simple memcpy.
3346 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3347 } else {
3348 // Initialize firstprivate array using element-by-element
3349 // initialization.
3351 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3352 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3353 Address SrcElement) {
3354 // Clean up any temporaries needed by the initialization.
3355 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3356 InitScope.addPrivate(Elem, SrcElement);
3357 (void)InitScope.Privatize();
3358 // Emit initialization for single element.
3359 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3360 CGF, &CapturesInfo);
3361 CGF.EmitAnyExprToMem(Init, DestElement,
3362 Init->getType().getQualifiers(),
3363 /*IsInitializer=*/false);
3364 });
3365 }
3366 } else {
3367 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3368 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3369 (void)InitScope.Privatize();
3370 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3371 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3372 /*capturedByInit=*/false);
3373 }
3374 } else {
3375 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3376 }
3377 }
3378 ++FI;
3379 }
3380}
3381
3382/// Check if duplication function is required for taskloops.
3384 ArrayRef<PrivateDataTy> Privates) {
3385 bool InitRequired = false;
3386 for (const PrivateDataTy &Pair : Privates) {
3387 if (Pair.second.isLocalPrivate())
3388 continue;
3389 const VarDecl *VD = Pair.second.PrivateCopy;
3390 const Expr *Init = VD->getAnyInitializer();
3391 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3393 if (InitRequired)
3394 break;
3395 }
3396 return InitRequired;
3397}
3398
3399
3400/// Emit task_dup function (for initialization of
3401/// private/firstprivate/lastprivate vars and last_iter flag)
3402/// \code
3403/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3404/// lastpriv) {
3405/// // setup lastprivate flag
3406/// task_dst->last = lastpriv;
3407/// // could be constructor calls here...
3408/// }
3409/// \endcode
3410static llvm::Value *
3412 const OMPExecutableDirective &D,
3413 QualType KmpTaskTWithPrivatesPtrQTy,
3414 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3415 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3416 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3417 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3418 ASTContext &C = CGM.getContext();
3419 FunctionArgList Args;
3420 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3421 KmpTaskTWithPrivatesPtrQTy,
3423 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3424 KmpTaskTWithPrivatesPtrQTy,
3426 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3428 Args.push_back(&DstArg);
3429 Args.push_back(&SrcArg);
3430 Args.push_back(&LastprivArg);
3431 const auto &TaskDupFnInfo =
3432 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3433 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3434 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3435 auto *TaskDup = llvm::Function::Create(
3436 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3437 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3438 TaskDup->setDoesNotRecurse();
3439 CodeGenFunction CGF(CGM);
3440 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3441 Loc);
3442
3443 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3444 CGF.GetAddrOfLocalVar(&DstArg),
3445 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3446 // task_dst->liter = lastpriv;
3447 if (WithLastIter) {
3448 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3449 LValue Base = CGF.EmitLValueForField(
3450 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3451 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3452 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3453 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3454 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3455 }
3456
3457 // Emit initial values for private copies (if any).
3458 assert(!Privates.empty());
3459 Address KmpTaskSharedsPtr = Address::invalid();
3460 if (!Data.FirstprivateVars.empty()) {
3461 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3462 CGF.GetAddrOfLocalVar(&SrcArg),
3463 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3464 LValue Base = CGF.EmitLValueForField(
3465 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3466 KmpTaskSharedsPtr = Address(
3468 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3469 KmpTaskTShareds)),
3470 Loc),
3471 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3472 }
3473 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3474 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3475 CGF.FinishFunction();
3476 return TaskDup;
3477}
3478
3479/// Checks if destructor function is required to be generated.
3480/// \return true if cleanups are required, false otherwise.
3481static bool
3482checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3483 ArrayRef<PrivateDataTy> Privates) {
3484 for (const PrivateDataTy &P : Privates) {
3485 if (P.second.isLocalPrivate())
3486 continue;
3487 QualType Ty = P.second.Original->getType().getNonReferenceType();
3488 if (Ty.isDestructedType())
3489 return true;
3490 }
3491 return false;
3492}
3493
3494namespace {
3495/// Loop generator for OpenMP iterator expression.
3496class OMPIteratorGeneratorScope final
3498 CodeGenFunction &CGF;
3499 const OMPIteratorExpr *E = nullptr;
3500 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3501 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3502 OMPIteratorGeneratorScope() = delete;
3503 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3504
3505public:
3506 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3507 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3508 if (!E)
3509 return;
3510 SmallVector<llvm::Value *, 4> Uppers;
3511 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3512 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3513 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3514 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 addPrivate(
3517 HelperData.CounterVD,
3518 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3519 }
3520 Privatize();
3521
3522 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3523 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3524 LValue CLVal =
3525 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3526 HelperData.CounterVD->getType());
3527 // Counter = 0;
3528 CGF.EmitStoreOfScalar(
3529 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3530 CLVal);
3531 CodeGenFunction::JumpDest &ContDest =
3532 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3533 CodeGenFunction::JumpDest &ExitDest =
3534 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3535 // N = <number-of_iterations>;
3536 llvm::Value *N = Uppers[I];
3537 // cont:
3538 // if (Counter < N) goto body; else goto exit;
3539 CGF.EmitBlock(ContDest.getBlock());
3540 auto *CVal =
3541 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3542 llvm::Value *Cmp =
3543 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3544 ? CGF.Builder.CreateICmpSLT(CVal, N)
3545 : CGF.Builder.CreateICmpULT(CVal, N);
3546 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3547 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3548 // body:
3549 CGF.EmitBlock(BodyBB);
3550 // Iteri = Begini + Counter * Stepi;
3551 CGF.EmitIgnoredExpr(HelperData.Update);
3552 }
3553 }
3554 ~OMPIteratorGeneratorScope() {
3555 if (!E)
3556 return;
3557 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3558 // Counter = Counter + 1;
3559 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3560 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3561 // goto cont;
3562 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3563 // exit:
3564 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3565 }
3566 }
3567};
3568} // namespace
3569
3570static std::pair<llvm::Value *, llvm::Value *>
3572 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3573 llvm::Value *Addr;
3574 if (OASE) {
3575 const Expr *Base = OASE->getBase();
3576 Addr = CGF.EmitScalarExpr(Base);
3577 } else {
3578 Addr = CGF.EmitLValue(E).getPointer(CGF);
3579 }
3580 llvm::Value *SizeVal;
3581 QualType Ty = E->getType();
3582 if (OASE) {
3583 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3584 for (const Expr *SE : OASE->getDimensions()) {
3585 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3586 Sz = CGF.EmitScalarConversion(
3587 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3588 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3589 }
3590 } else if (const auto *ASE =
3591 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3592 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3593 Address UpAddrAddress = UpAddrLVal.getAddress();
3594 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3595 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3596 /*Idx0=*/1);
3597 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3598 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3599 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3600 } else {
3601 SizeVal = CGF.getTypeSize(Ty);
3602 }
3603 return std::make_pair(Addr, SizeVal);
3604}
3605
3606/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3607static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3608 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3609 if (KmpTaskAffinityInfoTy.isNull()) {
3610 RecordDecl *KmpAffinityInfoRD =
3611 C.buildImplicitRecord("kmp_task_affinity_info_t");
3612 KmpAffinityInfoRD->startDefinition();
3613 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3614 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3615 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3616 KmpAffinityInfoRD->completeDefinition();
3617 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3618 }
3619}
3620
3623 const OMPExecutableDirective &D,
3624 llvm::Function *TaskFunction, QualType SharedsTy,
3625 Address Shareds, const OMPTaskDataTy &Data) {
3626 ASTContext &C = CGM.getContext();
3628 // Aggregate privates and sort them by the alignment.
3629 const auto *I = Data.PrivateCopies.begin();
3630 for (const Expr *E : Data.PrivateVars) {
3631 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3632 Privates.emplace_back(
3633 C.getDeclAlign(VD),
3634 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3635 /*PrivateElemInit=*/nullptr));
3636 ++I;
3637 }
3638 I = Data.FirstprivateCopies.begin();
3639 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3640 for (const Expr *E : Data.FirstprivateVars) {
3641 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3642 Privates.emplace_back(
3643 C.getDeclAlign(VD),
3644 PrivateHelpersTy(
3645 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3646 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3647 ++I;
3648 ++IElemInitRef;
3649 }
3650 I = Data.LastprivateCopies.begin();
3651 for (const Expr *E : Data.LastprivateVars) {
3652 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3653 Privates.emplace_back(
3654 C.getDeclAlign(VD),
3655 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3656 /*PrivateElemInit=*/nullptr));
3657 ++I;
3658 }
3659 for (const VarDecl *VD : Data.PrivateLocals) {
3660 if (isAllocatableDecl(VD))
3661 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3662 else
3663 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3664 }
3665 llvm::stable_sort(Privates,
3666 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3667 return L.first > R.first;
3668 });
3669 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3670 // Build type kmp_routine_entry_t (if not built yet).
3671 emitKmpRoutineEntryT(KmpInt32Ty);
3672 // Build type kmp_task_t (if not built yet).
3673 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3674 if (SavedKmpTaskloopTQTy.isNull()) {
3675 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3676 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3677 }
3679 } else {
3680 assert((D.getDirectiveKind() == OMPD_task ||
3681 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3682 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3683 "Expected taskloop, task or target directive");
3684 if (SavedKmpTaskTQTy.isNull()) {
3685 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3686 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3687 }
3689 }
3690 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3691 // Build particular struct kmp_task_t for the given task.
3692 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3694 CanQualType KmpTaskTWithPrivatesQTy =
3695 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3696 QualType KmpTaskTWithPrivatesPtrQTy =
3697 C.getPointerType(KmpTaskTWithPrivatesQTy);
3698 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3699 llvm::Value *KmpTaskTWithPrivatesTySize =
3700 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3701 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3702
3703 // Emit initial values for private copies (if any).
3704 llvm::Value *TaskPrivatesMap = nullptr;
3705 llvm::Type *TaskPrivatesMapTy =
3706 std::next(TaskFunction->arg_begin(), 3)->getType();
3707 if (!Privates.empty()) {
3708 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3709 TaskPrivatesMap =
3710 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3711 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3712 TaskPrivatesMap, TaskPrivatesMapTy);
3713 } else {
3714 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3715 cast<llvm::PointerType>(TaskPrivatesMapTy));
3716 }
3717 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3718 // kmp_task_t *tt);
3719 llvm::Function *TaskEntry = emitProxyTaskFunction(
3720 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3721 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3722 TaskPrivatesMap);
3723
3724 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3725 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3726 // kmp_routine_entry_t *task_entry);
3727 // Task flags. Format is taken from
3728 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3729 // description of kmp_tasking_flags struct.
3730 enum {
3731 TiedFlag = 0x1,
3732 FinalFlag = 0x2,
3733 DestructorsFlag = 0x8,
3734 PriorityFlag = 0x20,
3735 DetachableFlag = 0x40,
3736 FreeAgentFlag = 0x80,
3737 };
3738 unsigned Flags = Data.Tied ? TiedFlag : 0;
3739 bool NeedsCleanup = false;
3740 if (!Privates.empty()) {
3741 NeedsCleanup =
3742 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3743 if (NeedsCleanup)
3744 Flags = Flags | DestructorsFlag;
3745 }
3746 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3747 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3748 if (Kind == OMPC_THREADSET_omp_pool)
3749 Flags = Flags | FreeAgentFlag;
3750 }
3751 if (Data.Priority.getInt())
3752 Flags = Flags | PriorityFlag;
3753 if (D.hasClausesOfKind<OMPDetachClause>())
3754 Flags = Flags | DetachableFlag;
3755 llvm::Value *TaskFlags =
3756 Data.Final.getPointer()
3757 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3758 CGF.Builder.getInt32(FinalFlag),
3759 CGF.Builder.getInt32(/*C=*/0))
3760 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3761 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3762 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3764 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3766 TaskEntry, KmpRoutineEntryPtrTy)};
3767 llvm::Value *NewTask;
3768 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3769 // Check if we have any device clause associated with the directive.
3770 const Expr *Device = nullptr;
3771 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3772 Device = C->getDevice();
3773 // Emit device ID if any otherwise use default value.
3774 llvm::Value *DeviceID;
3775 if (Device)
3776 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3777 CGF.Int64Ty, /*isSigned=*/true);
3778 else
3779 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3780 AllocArgs.push_back(DeviceID);
3781 NewTask = CGF.EmitRuntimeCall(
3782 OMPBuilder.getOrCreateRuntimeFunction(
3783 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3784 AllocArgs);
3785 } else {
3786 NewTask =
3787 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3788 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3789 AllocArgs);
3790 }
3791 // Emit detach clause initialization.
3792 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3793 // task_descriptor);
3794 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3795 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3796 LValue EvtLVal = CGF.EmitLValue(Evt);
3797
3798 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3799 // int gtid, kmp_task_t *task);
3800 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3801 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3802 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3803 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3804 OMPBuilder.getOrCreateRuntimeFunction(
3805 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3806 {Loc, Tid, NewTask});
3807 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3808 Evt->getExprLoc());
3809 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3810 }
3811 // Process affinity clauses.
3812 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3813 // Process list of affinity data.
3814 ASTContext &C = CGM.getContext();
3815 Address AffinitiesArray = Address::invalid();
3816 // Calculate number of elements to form the array of affinity data.
3817 llvm::Value *NumOfElements = nullptr;
3818 unsigned NumAffinities = 0;
3819 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3820 if (const Expr *Modifier = C->getModifier()) {
3821 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3822 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3823 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3824 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3825 NumOfElements =
3826 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3827 }
3828 } else {
3829 NumAffinities += C->varlist_size();
3830 }
3831 }
3833 // Fields ids in kmp_task_affinity_info record.
3834 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3835
3836 QualType KmpTaskAffinityInfoArrayTy;
3837 if (NumOfElements) {
3838 NumOfElements = CGF.Builder.CreateNUWAdd(
3839 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3840 auto *OVE = new (C) OpaqueValueExpr(
3841 Loc,
3842 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3843 VK_PRValue);
3844 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3845 RValue::get(NumOfElements));
3846 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3848 /*IndexTypeQuals=*/0);
3849 // Properly emit variable-sized array.
3850 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3852 CGF.EmitVarDecl(*PD);
3853 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3854 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3855 /*isSigned=*/false);
3856 } else {
3857 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3859 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3860 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3861 AffinitiesArray =
3862 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3863 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3864 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3865 /*isSigned=*/false);
3866 }
3867
3868 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3869 // Fill array by elements without iterators.
3870 unsigned Pos = 0;
3871 bool HasIterator = false;
3872 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3873 if (C->getModifier()) {
3874 HasIterator = true;
3875 continue;
3876 }
3877 for (const Expr *E : C->varlist()) {
3878 llvm::Value *Addr;
3879 llvm::Value *Size;
3880 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3881 LValue Base =
3882 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3884 // affs[i].base_addr = &<Affinities[i].second>;
3885 LValue BaseAddrLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3887 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3888 BaseAddrLVal);
3889 // affs[i].len = sizeof(<Affinities[i].second>);
3890 LValue LenLVal = CGF.EmitLValueForField(
3891 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3892 CGF.EmitStoreOfScalar(Size, LenLVal);
3893 ++Pos;
3894 }
3895 }
3896 LValue PosLVal;
3897 if (HasIterator) {
3898 PosLVal = CGF.MakeAddrLValue(
3899 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3900 C.getSizeType());
3901 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3902 }
3903 // Process elements with iterators.
3904 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3905 const Expr *Modifier = C->getModifier();
3906 if (!Modifier)
3907 continue;
3908 OMPIteratorGeneratorScope IteratorScope(
3909 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3910 for (const Expr *E : C->varlist()) {
3911 llvm::Value *Addr;
3912 llvm::Value *Size;
3913 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3914 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3915 LValue Base =
3916 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3918 // affs[i].base_addr = &<Affinities[i].second>;
3919 LValue BaseAddrLVal = CGF.EmitLValueForField(
3920 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3921 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3922 BaseAddrLVal);
3923 // affs[i].len = sizeof(<Affinities[i].second>);
3924 LValue LenLVal = CGF.EmitLValueForField(
3925 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3926 CGF.EmitStoreOfScalar(Size, LenLVal);
3927 Idx = CGF.Builder.CreateNUWAdd(
3928 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3929 CGF.EmitStoreOfScalar(Idx, PosLVal);
3930 }
3931 }
3932 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3933 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3934 // naffins, kmp_task_affinity_info_t *affin_list);
3935 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3936 llvm::Value *GTid = getThreadID(CGF, Loc);
3937 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3938 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3939 // FIXME: Emit the function and ignore its result for now unless the
3940 // runtime function is properly implemented.
3941 (void)CGF.EmitRuntimeCall(
3942 OMPBuilder.getOrCreateRuntimeFunction(
3943 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3944 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3945 }
3946 llvm::Value *NewTaskNewTaskTTy =
3948 NewTask, KmpTaskTWithPrivatesPtrTy);
3949 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3950 KmpTaskTWithPrivatesQTy);
3951 LValue TDBase =
3952 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3953 // Fill the data in the resulting kmp_task_t record.
3954 // Copy shareds if there are any.
3955 Address KmpTaskSharedsPtr = Address::invalid();
3956 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3957 KmpTaskSharedsPtr = Address(
3958 CGF.EmitLoadOfScalar(
3960 TDBase,
3961 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3962 Loc),
3963 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3964 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3965 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3966 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3967 }
3968 // Emit initial values for private copies (if any).
3970 if (!Privates.empty()) {
3971 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3972 SharedsTy, SharedsPtrTy, Data, Privates,
3973 /*ForDup=*/false);
3974 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3975 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3976 Result.TaskDupFn = emitTaskDupFunction(
3977 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3978 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3979 /*WithLastIter=*/!Data.LastprivateVars.empty());
3980 }
3981 }
3982 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3983 enum { Priority = 0, Destructors = 1 };
3984 // Provide pointer to function with destructors for privates.
3985 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3986 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3987 assert(KmpCmplrdataUD->isUnion());
3988 if (NeedsCleanup) {
3989 llvm::Value *DestructorFn = emitDestructorsFunction(
3990 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3991 KmpTaskTWithPrivatesQTy);
3992 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3993 LValue DestructorsLV = CGF.EmitLValueForField(
3994 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3996 DestructorFn, KmpRoutineEntryPtrTy),
3997 DestructorsLV);
3998 }
3999 // Set priority.
4000 if (Data.Priority.getInt()) {
4001 LValue Data2LV = CGF.EmitLValueForField(
4002 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4003 LValue PriorityLV = CGF.EmitLValueForField(
4004 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4005 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4006 }
4007 Result.NewTask = NewTask;
4008 Result.TaskEntry = TaskEntry;
4009 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4010 Result.TDBase = TDBase;
4011 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4012 return Result;
4013}
4014
4015/// Translates internal dependency kind into the runtime kind.
4017 RTLDependenceKindTy DepKind;
4018 switch (K) {
4019 case OMPC_DEPEND_in:
4020 DepKind = RTLDependenceKindTy::DepIn;
4021 break;
4022 // Out and InOut dependencies must use the same code.
4023 case OMPC_DEPEND_out:
4024 case OMPC_DEPEND_inout:
4025 DepKind = RTLDependenceKindTy::DepInOut;
4026 break;
4027 case OMPC_DEPEND_mutexinoutset:
4028 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4029 break;
4030 case OMPC_DEPEND_inoutset:
4031 DepKind = RTLDependenceKindTy::DepInOutSet;
4032 break;
4033 case OMPC_DEPEND_outallmemory:
4034 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4035 break;
4036 case OMPC_DEPEND_source:
4037 case OMPC_DEPEND_sink:
4038 case OMPC_DEPEND_depobj:
4039 case OMPC_DEPEND_inoutallmemory:
4041 llvm_unreachable("Unknown task dependence type");
4042 }
4043 return DepKind;
4044}
4045
4046/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4047static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4048 QualType &FlagsTy) {
4049 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4050 if (KmpDependInfoTy.isNull()) {
4051 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4052 KmpDependInfoRD->startDefinition();
4053 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4054 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4055 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4056 KmpDependInfoRD->completeDefinition();
4057 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4058 }
4059}
4060
4061std::pair<llvm::Value *, LValue>
4063 SourceLocation Loc) {
4064 ASTContext &C = CGM.getContext();
4065 QualType FlagsTy;
4066 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4067 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4068 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4070 DepobjLVal.getAddress().withElementType(
4071 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4072 KmpDependInfoPtrTy->castAs<PointerType>());
4073 Address DepObjAddr = CGF.Builder.CreateGEP(
4074 CGF, Base.getAddress(),
4075 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4076 LValue NumDepsBase = CGF.MakeAddrLValue(
4077 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4078 // NumDeps = deps[i].base_addr;
4079 LValue BaseAddrLVal = CGF.EmitLValueForField(
4080 NumDepsBase,
4081 *std::next(KmpDependInfoRD->field_begin(),
4082 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4083 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4084 return std::make_pair(NumDeps, Base);
4085}
4086
4087static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4088 llvm::PointerUnion<unsigned *, LValue *> Pos,
4090 Address DependenciesArray) {
4091 CodeGenModule &CGM = CGF.CGM;
4092 ASTContext &C = CGM.getContext();
4093 QualType FlagsTy;
4094 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4095 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4096 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4097
4098 OMPIteratorGeneratorScope IteratorScope(
4099 CGF, cast_or_null<OMPIteratorExpr>(
4100 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4101 : nullptr));
4102 for (const Expr *E : Data.DepExprs) {
4103 llvm::Value *Addr;
4104 llvm::Value *Size;
4105
4106 // The expression will be a nullptr in the 'omp_all_memory' case.
4107 if (E) {
4108 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4109 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4110 } else {
4111 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4112 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4113 }
4114 LValue Base;
4115 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4116 Base = CGF.MakeAddrLValue(
4117 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4118 } else {
4119 assert(E && "Expected a non-null expression");
4120 LValue &PosLVal = *cast<LValue *>(Pos);
4121 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4122 Base = CGF.MakeAddrLValue(
4123 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4124 }
4125 // deps[i].base_addr = &<Dependencies[i].second>;
4126 LValue BaseAddrLVal = CGF.EmitLValueForField(
4127 Base,
4128 *std::next(KmpDependInfoRD->field_begin(),
4129 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4130 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4131 // deps[i].len = sizeof(<Dependencies[i].second>);
4132 LValue LenLVal = CGF.EmitLValueForField(
4133 Base, *std::next(KmpDependInfoRD->field_begin(),
4134 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4135 CGF.EmitStoreOfScalar(Size, LenLVal);
4136 // deps[i].flags = <Dependencies[i].first>;
4137 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4138 LValue FlagsLVal = CGF.EmitLValueForField(
4139 Base,
4140 *std::next(KmpDependInfoRD->field_begin(),
4141 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4143 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4144 FlagsLVal);
4145 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4146 ++(*P);
4147 } else {
4148 LValue &PosLVal = *cast<LValue *>(Pos);
4149 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4150 Idx = CGF.Builder.CreateNUWAdd(Idx,
4151 llvm::ConstantInt::get(Idx->getType(), 1));
4152 CGF.EmitStoreOfScalar(Idx, PosLVal);
4153 }
4154 }
4155}
4156
4160 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4161 "Expected depobj dependency kind.");
4163 SmallVector<LValue, 4> SizeLVals;
4164 ASTContext &C = CGF.getContext();
4165 {
4166 OMPIteratorGeneratorScope IteratorScope(
4167 CGF, cast_or_null<OMPIteratorExpr>(
4168 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4169 : nullptr));
4170 for (const Expr *E : Data.DepExprs) {
4171 llvm::Value *NumDeps;
4172 LValue Base;
4173 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4174 std::tie(NumDeps, Base) =
4175 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4176 LValue NumLVal = CGF.MakeAddrLValue(
4177 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4178 C.getUIntPtrType());
4179 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4180 NumLVal.getAddress());
4181 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4182 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4183 CGF.EmitStoreOfScalar(Add, NumLVal);
4184 SizeLVals.push_back(NumLVal);
4185 }
4186 }
4187 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4188 llvm::Value *Size =
4189 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4190 Sizes.push_back(Size);
4191 }
4192 return Sizes;
4193}
4194
4197 LValue PosLVal,
4199 Address DependenciesArray) {
4200 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4201 "Expected depobj dependency kind.");
4202 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4203 {
4204 OMPIteratorGeneratorScope IteratorScope(
4205 CGF, cast_or_null<OMPIteratorExpr>(
4206 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4207 : nullptr));
4208 for (const Expr *E : Data.DepExprs) {
4209 llvm::Value *NumDeps;
4210 LValue Base;
4211 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4212 std::tie(NumDeps, Base) =
4213 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4214
4215 // memcopy dependency data.
4216 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4217 ElSize,
4218 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4219 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4220 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4221 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4222
4223 // Increase pos.
4224 // pos += size;
4225 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4226 CGF.EmitStoreOfScalar(Add, PosLVal);
4227 }
4228 }
4229}
4230
4231std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4233 SourceLocation Loc) {
4234 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4235 return D.DepExprs.empty();
4236 }))
4237 return std::make_pair(nullptr, Address::invalid());
4238 // Process list of dependencies.
4239 ASTContext &C = CGM.getContext();
4240 Address DependenciesArray = Address::invalid();
4241 llvm::Value *NumOfElements = nullptr;
4242 unsigned NumDependencies = std::accumulate(
4243 Dependencies.begin(), Dependencies.end(), 0,
4244 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4245 return D.DepKind == OMPC_DEPEND_depobj
4246 ? V
4247 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4248 });
4249 QualType FlagsTy;
4250 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4251 bool HasDepobjDeps = false;
4252 bool HasRegularWithIterators = false;
4253 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4254 llvm::Value *NumOfRegularWithIterators =
4255 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4256 // Calculate number of depobj dependencies and regular deps with the
4257 // iterators.
4258 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4259 if (D.DepKind == OMPC_DEPEND_depobj) {
4262 for (llvm::Value *Size : Sizes) {
4263 NumOfDepobjElements =
4264 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4265 }
4266 HasDepobjDeps = true;
4267 continue;
4268 }
4269 // Include number of iterations, if any.
4270
4271 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4272 llvm::Value *ClauseIteratorSpace =
4273 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4274 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4275 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4276 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4277 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4278 }
4279 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4280 ClauseIteratorSpace,
4281 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4282 NumOfRegularWithIterators =
4283 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4284 HasRegularWithIterators = true;
4285 continue;
4286 }
4287 }
4288
4289 QualType KmpDependInfoArrayTy;
4290 if (HasDepobjDeps || HasRegularWithIterators) {
4291 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4292 /*isSigned=*/false);
4293 if (HasDepobjDeps) {
4294 NumOfElements =
4295 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4296 }
4297 if (HasRegularWithIterators) {
4298 NumOfElements =
4299 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4300 }
4301 auto *OVE = new (C) OpaqueValueExpr(
4302 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4303 VK_PRValue);
4304 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4305 RValue::get(NumOfElements));
4306 KmpDependInfoArrayTy =
4307 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4308 /*IndexTypeQuals=*/0);
4309 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4310 // Properly emit variable-sized array.
4311 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4313 CGF.EmitVarDecl(*PD);
4314 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4315 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4316 /*isSigned=*/false);
4317 } else {
4318 KmpDependInfoArrayTy = C.getConstantArrayType(
4319 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4320 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4321 DependenciesArray =
4322 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4323 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4324 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4325 /*isSigned=*/false);
4326 }
4327 unsigned Pos = 0;
4328 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4329 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4330 continue;
4331 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4332 }
4333 // Copy regular dependencies with iterators.
4334 LValue PosLVal = CGF.MakeAddrLValue(
4335 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4336 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4337 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4338 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4339 continue;
4340 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4341 }
4342 // Copy final depobj arrays without iterators.
4343 if (HasDepobjDeps) {
4344 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4345 if (Dep.DepKind != OMPC_DEPEND_depobj)
4346 continue;
4347 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4348 }
4349 }
4350 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4352 return std::make_pair(NumOfElements, DependenciesArray);
4353}
4354
4356 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4357 SourceLocation Loc) {
4358 if (Dependencies.DepExprs.empty())
4359 return Address::invalid();
4360 // Process list of dependencies.
4361 ASTContext &C = CGM.getContext();
4362 Address DependenciesArray = Address::invalid();
4363 unsigned NumDependencies = Dependencies.DepExprs.size();
4364 QualType FlagsTy;
4365 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4366 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4367
4368 llvm::Value *Size;
4369 // Define type kmp_depend_info[<Dependencies.size()>];
4370 // For depobj reserve one extra element to store the number of elements.
4371 // It is required to handle depobj(x) update(in) construct.
4372 // kmp_depend_info[<Dependencies.size()>] deps;
4373 llvm::Value *NumDepsVal;
4374 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4375 if (const auto *IE =
4376 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4377 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4378 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4379 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4380 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4381 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4382 }
4383 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4384 NumDepsVal);
4385 CharUnits SizeInBytes =
4386 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4387 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4388 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4389 NumDepsVal =
4390 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4391 } else {
4392 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4393 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4394 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4395 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4396 Size = CGM.getSize(Sz.alignTo(Align));
4397 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4398 }
4399 // Need to allocate on the dynamic memory.
4400 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4401 // Use default allocator.
4402 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4403 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4404
4405 llvm::Value *Addr =
4406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4407 CGM.getModule(), OMPRTL___kmpc_alloc),
4408 Args, ".dep.arr.addr");
4409 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4411 Addr, CGF.Builder.getPtrTy(0));
4412 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4413 // Write number of elements in the first element of array for depobj.
4414 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4415 // deps[i].base_addr = NumDependencies;
4416 LValue BaseAddrLVal = CGF.EmitLValueForField(
4417 Base,
4418 *std::next(KmpDependInfoRD->field_begin(),
4419 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4420 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4421 llvm::PointerUnion<unsigned *, LValue *> Pos;
4422 unsigned Idx = 1;
4423 LValue PosLVal;
4424 if (Dependencies.IteratorExpr) {
4425 PosLVal = CGF.MakeAddrLValue(
4426 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4427 C.getSizeType());
4428 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4429 /*IsInit=*/true);
4430 Pos = &PosLVal;
4431 } else {
4432 Pos = &Idx;
4433 }
4434 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4435 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4436 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4437 CGF.Int8Ty);
4438 return DependenciesArray;
4439}
4440
4442 SourceLocation Loc) {
4443 ASTContext &C = CGM.getContext();
4444 QualType FlagsTy;
4445 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4446 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4447 C.VoidPtrTy.castAs<PointerType>());
4448 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4450 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4452 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4453 Addr.getElementType(), Addr.emitRawPointer(CGF),
4454 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4455 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4456 CGF.VoidPtrTy);
4457 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4458 // Use default allocator.
4459 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4460 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4461
4462 // _kmpc_free(gtid, addr, nullptr);
4463 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4464 CGM.getModule(), OMPRTL___kmpc_free),
4465 Args);
4466}
4467
4469 OpenMPDependClauseKind NewDepKind,
4470 SourceLocation Loc) {
4471 ASTContext &C = CGM.getContext();
4472 QualType FlagsTy;
4473 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4474 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4475 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4476 llvm::Value *NumDeps;
4477 LValue Base;
4478 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4479
4480 Address Begin = Base.getAddress();
4481 // Cast from pointer to array type to pointer to single element.
4482 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4483 Begin.emitRawPointer(CGF), NumDeps);
4484 // The basic structure here is a while-do loop.
4485 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4486 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4487 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4488 CGF.EmitBlock(BodyBB);
4489 llvm::PHINode *ElementPHI =
4490 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4491 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4492 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4493 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4494 Base.getTBAAInfo());
4495 // deps[i].flags = NewDepKind;
4496 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4497 LValue FlagsLVal = CGF.EmitLValueForField(
4498 Base, *std::next(KmpDependInfoRD->field_begin(),
4499 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4501 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4502 FlagsLVal);
4503
4504 // Shift the address forward by one element.
4505 llvm::Value *ElementNext =
4506 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4507 .emitRawPointer(CGF);
4508 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4509 llvm::Value *IsEmpty =
4510 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4511 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4512 // Done.
4513 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4514}
4515
4517 const OMPExecutableDirective &D,
4518 llvm::Function *TaskFunction,
4519 QualType SharedsTy, Address Shareds,
4520 const Expr *IfCond,
4521 const OMPTaskDataTy &Data) {
4522 if (!CGF.HaveInsertPoint())
4523 return;
4524
4526 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4527 llvm::Value *NewTask = Result.NewTask;
4528 llvm::Function *TaskEntry = Result.TaskEntry;
4529 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4530 LValue TDBase = Result.TDBase;
4531 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4532 // Process list of dependences.
4533 Address DependenciesArray = Address::invalid();
4534 llvm::Value *NumOfElements;
4535 std::tie(NumOfElements, DependenciesArray) =
4536 emitDependClause(CGF, Data.Dependences, Loc);
4537
4538 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4539 // libcall.
4540 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4541 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4542 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4543 // list is not empty
4544 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4545 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4546 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4547 llvm::Value *DepTaskArgs[7];
4548 if (!Data.Dependences.empty()) {
4549 DepTaskArgs[0] = UpLoc;
4550 DepTaskArgs[1] = ThreadID;
4551 DepTaskArgs[2] = NewTask;
4552 DepTaskArgs[3] = NumOfElements;
4553 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4554 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4555 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4556 }
4557 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4558 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4559 if (!Data.Tied) {
4560 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4562 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4563 }
4564 if (!Data.Dependences.empty()) {
4565 CGF.EmitRuntimeCall(
4566 OMPBuilder.getOrCreateRuntimeFunction(
4567 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4568 DepTaskArgs);
4569 } else {
4570 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4571 CGM.getModule(), OMPRTL___kmpc_omp_task),
4572 TaskArgs);
4573 }
4574 // Check if parent region is untied and build return for untied task;
4575 if (auto *Region =
4576 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4577 Region->emitUntiedSwitch(CGF);
4578 };
4579
4580 llvm::Value *DepWaitTaskArgs[7];
4581 if (!Data.Dependences.empty()) {
4582 DepWaitTaskArgs[0] = UpLoc;
4583 DepWaitTaskArgs[1] = ThreadID;
4584 DepWaitTaskArgs[2] = NumOfElements;
4585 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4586 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4587 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4588 DepWaitTaskArgs[6] =
4589 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4590 }
4591 auto &M = CGM.getModule();
4592 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4593 TaskEntry, &Data, &DepWaitTaskArgs,
4594 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4595 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4596 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4597 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4598 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4599 // is specified.
4600 if (!Data.Dependences.empty())
4601 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4602 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4603 DepWaitTaskArgs);
4604 // Call proxy_task_entry(gtid, new_task);
4605 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4606 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4607 Action.Enter(CGF);
4608 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4609 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4610 OutlinedFnArgs);
4611 };
4612
4613 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4614 // kmp_task_t *new_task);
4615 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4616 // kmp_task_t *new_task);
4618 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4619 M, OMPRTL___kmpc_omp_task_begin_if0),
4620 TaskArgs,
4621 OMPBuilder.getOrCreateRuntimeFunction(
4622 M, OMPRTL___kmpc_omp_task_complete_if0),
4623 TaskArgs);
4624 RCG.setAction(Action);
4625 RCG(CGF);
4626 };
4627
4628 if (IfCond) {
4629 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4630 } else {
4631 RegionCodeGenTy ThenRCG(ThenCodeGen);
4632 ThenRCG(CGF);
4633 }
4634}
4635
4637 const OMPLoopDirective &D,
4638 llvm::Function *TaskFunction,
4639 QualType SharedsTy, Address Shareds,
4640 const Expr *IfCond,
4641 const OMPTaskDataTy &Data) {
4642 if (!CGF.HaveInsertPoint())
4643 return;
4645 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4646 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4647 // libcall.
4648 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4649 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4650 // sched, kmp_uint64 grainsize, void *task_dup);
4651 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4652 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4653 llvm::Value *IfVal;
4654 if (IfCond) {
4655 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4656 /*isSigned=*/true);
4657 } else {
4658 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4659 }
4660
4661 LValue LBLVal = CGF.EmitLValueForField(
4662 Result.TDBase,
4663 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4664 const auto *LBVar =
4665 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4666 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4667 /*IsInitializer=*/true);
4668 LValue UBLVal = CGF.EmitLValueForField(
4669 Result.TDBase,
4670 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4671 const auto *UBVar =
4672 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4673 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4674 /*IsInitializer=*/true);
4675 LValue StLVal = CGF.EmitLValueForField(
4676 Result.TDBase,
4677 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4678 const auto *StVar =
4679 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4680 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4681 /*IsInitializer=*/true);
4682 // Store reductions address.
4683 LValue RedLVal = CGF.EmitLValueForField(
4684 Result.TDBase,
4685 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4686 if (Data.Reductions) {
4687 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4688 } else {
4689 CGF.EmitNullInitialization(RedLVal.getAddress(),
4690 CGF.getContext().VoidPtrTy);
4691 }
4692 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4694 UpLoc,
4695 ThreadID,
4696 Result.NewTask,
4697 IfVal,
4698 LBLVal.getPointer(CGF),
4699 UBLVal.getPointer(CGF),
4700 CGF.EmitLoadOfScalar(StLVal, Loc),
4701 llvm::ConstantInt::getSigned(
4702 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4703 llvm::ConstantInt::getSigned(
4704 CGF.IntTy, Data.Schedule.getPointer()
4705 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4706 : NoSchedule),
4707 Data.Schedule.getPointer()
4708 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4709 /*isSigned=*/false)
4710 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4711 if (Data.HasModifier)
4712 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4713
4714 TaskArgs.push_back(Result.TaskDupFn
4716 Result.TaskDupFn, CGF.VoidPtrTy)
4717 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4718 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4719 CGM.getModule(), Data.HasModifier
4720 ? OMPRTL___kmpc_taskloop_5
4721 : OMPRTL___kmpc_taskloop),
4722 TaskArgs);
4723}
4724
4725/// Emit reduction operation for each element of array (required for
4726/// array sections) LHS op = RHS.
4727/// \param Type Type of array.
4728/// \param LHSVar Variable on the left side of the reduction operation
4729/// (references element of array in original variable).
4730/// \param RHSVar Variable on the right side of the reduction operation
4731/// (references element of array in original variable).
4732/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4733/// RHSVar.
4735 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4736 const VarDecl *RHSVar,
4737 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4738 const Expr *, const Expr *)> &RedOpGen,
4739 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4740 const Expr *UpExpr = nullptr) {
4741 // Perform element-by-element initialization.
4742 QualType ElementTy;
4743 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4744 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4745
4746 // Drill down to the base element type on both arrays.
4747 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4748 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4749
4750 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4751 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4752 // Cast from pointer to array type to pointer to single element.
4753 llvm::Value *LHSEnd =
4754 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4755 // The basic structure here is a while-do loop.
4756 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4757 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4758 llvm::Value *IsEmpty =
4759 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4760 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4761
4762 // Enter the loop body, making that address the current address.
4763 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4764 CGF.EmitBlock(BodyBB);
4765
4766 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4767
4768 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4769 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4770 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4771 Address RHSElementCurrent(
4772 RHSElementPHI, RHSAddr.getElementType(),
4773 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4774
4775 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4776 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4777 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4778 Address LHSElementCurrent(
4779 LHSElementPHI, LHSAddr.getElementType(),
4780 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4781
4782 // Emit copy.
4784 Scope.addPrivate(LHSVar, LHSElementCurrent);
4785 Scope.addPrivate(RHSVar, RHSElementCurrent);
4786 Scope.Privatize();
4787 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4788 Scope.ForceCleanup();
4789
4790 // Shift the address forward by one element.
4791 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4792 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4793 "omp.arraycpy.dest.element");
4794 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4795 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4796 "omp.arraycpy.src.element");
4797 // Check whether we've reached the end.
4798 llvm::Value *Done =
4799 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4800 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4801 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4802 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4803
4804 // Done.
4805 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4806}
4807
4808/// Emit reduction combiner. If the combiner is a simple expression emit it as
4809/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4810/// UDR combiner function.
4812 const Expr *ReductionOp) {
4813 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4814 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4815 if (const auto *DRE =
4816 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4817 if (const auto *DRD =
4818 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4819 std::pair<llvm::Function *, llvm::Function *> Reduction =
4823 CGF.EmitIgnoredExpr(ReductionOp);
4824 return;
4825 }
4826 CGF.EmitIgnoredExpr(ReductionOp);
4827}
4828
4830 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4832 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4833 ASTContext &C = CGM.getContext();
4834
4835 // void reduction_func(void *LHSArg, void *RHSArg);
4836 FunctionArgList Args;
4837 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4839 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4841 Args.push_back(&LHSArg);
4842 Args.push_back(&RHSArg);
4843 const auto &CGFI =
4844 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4845 std::string Name = getReductionFuncName(ReducerName);
4846 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4847 llvm::GlobalValue::InternalLinkage, Name,
4848 &CGM.getModule());
4849 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4850 Fn->setDoesNotRecurse();
4851 CodeGenFunction CGF(CGM);
4852 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4853
4854 // Dst = (void*[n])(LHSArg);
4855 // Src = (void*[n])(RHSArg);
4857 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4858 CGF.Builder.getPtrTy(0)),
4859 ArgsElemType, CGF.getPointerAlign());
4861 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4862 CGF.Builder.getPtrTy(0)),
4863 ArgsElemType, CGF.getPointerAlign());
4864
4865 // ...
4866 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4867 // ...
4869 const auto *IPriv = Privates.begin();
4870 unsigned Idx = 0;
4871 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4872 const auto *RHSVar =
4873 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4874 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4875 const auto *LHSVar =
4876 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4877 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4878 QualType PrivTy = (*IPriv)->getType();
4879 if (PrivTy->isVariablyModifiedType()) {
4880 // Get array size and emit VLA type.
4881 ++Idx;
4882 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4883 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4884 const VariableArrayType *VLA =
4885 CGF.getContext().getAsVariableArrayType(PrivTy);
4886 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4888 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4889 CGF.EmitVariablyModifiedType(PrivTy);
4890 }
4891 }
4892 Scope.Privatize();
4893 IPriv = Privates.begin();
4894 const auto *ILHS = LHSExprs.begin();
4895 const auto *IRHS = RHSExprs.begin();
4896 for (const Expr *E : ReductionOps) {
4897 if ((*IPriv)->getType()->isArrayType()) {
4898 // Emit reduction for array section.
4899 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4900 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4902 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4903 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4904 emitReductionCombiner(CGF, E);
4905 });
4906 } else {
4907 // Emit reduction for array subscript or single variable.
4908 emitReductionCombiner(CGF, E);
4909 }
4910 ++IPriv;
4911 ++ILHS;
4912 ++IRHS;
4913 }
4914 Scope.ForceCleanup();
4915 CGF.FinishFunction();
4916 return Fn;
4917}
4918
4920 const Expr *ReductionOp,
4921 const Expr *PrivateRef,
4922 const DeclRefExpr *LHS,
4923 const DeclRefExpr *RHS) {
4924 if (PrivateRef->getType()->isArrayType()) {
4925 // Emit reduction for array section.
4926 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4927 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4929 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4930 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4931 emitReductionCombiner(CGF, ReductionOp);
4932 });
4933 } else {
4934 // Emit reduction for array subscript or single variable.
4935 emitReductionCombiner(CGF, ReductionOp);
4936 }
4937}
4938
4939static std::string generateUniqueName(CodeGenModule &CGM,
4940 llvm::StringRef Prefix, const Expr *Ref);
4941
4943 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4944 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4945
4946 // Create a shared global variable (__shared_reduction_var) to accumulate the
4947 // final result.
4948 //
4949 // Call __kmpc_barrier to synchronize threads before initialization.
4950 //
4951 // The master thread (thread_id == 0) initializes __shared_reduction_var
4952 // with the identity value or initializer.
4953 //
4954 // Call __kmpc_barrier to synchronize before combining.
4955 // For each i:
4956 // - Thread enters critical section.
4957 // - Reads its private value from LHSExprs[i].
4958 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4959 // Privates[i]).
4960 // - Exits critical section.
4961 //
4962 // Call __kmpc_barrier after combining.
4963 //
4964 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4965 //
4966 // Final __kmpc_barrier to synchronize after broadcasting
4967 QualType PrivateType = Privates->getType();
4968 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4969
4970 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4971 std::string ReductionVarNameStr;
4972 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4973 ReductionVarNameStr =
4974 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4975 else
4976 ReductionVarNameStr = "unnamed_priv_var";
4977
4978 // Create an internal shared variable
4979 std::string SharedName =
4980 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4981 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4982 LLVMType, ".omp.reduction." + SharedName);
4983
4984 SharedVar->setAlignment(
4985 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4986
4987 Address SharedResult =
4988 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4989
4990 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4991 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4992 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4993
4994 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4995 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4996
4997 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4998 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4999 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5000
5001 CGF.EmitBlock(InitBB);
5002
5003 auto EmitSharedInit = [&]() {
5004 if (UDR) { // Check if it's a User-Defined Reduction
5005 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5006 std::pair<llvm::Function *, llvm::Function *> FnPair =
5008 llvm::Function *InitializerFn = FnPair.second;
5009 if (InitializerFn) {
5010 if (const auto *CE =
5011 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5012 const auto *OutDRE = cast<DeclRefExpr>(
5013 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5014 ->getSubExpr());
5015 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5016
5017 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5018 LocalScope.addPrivate(OutVD, SharedResult);
5019
5020 (void)LocalScope.Privatize();
5021 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5022 CE->getCallee()->IgnoreParenImpCasts())) {
5024 CGF, OVE, RValue::get(InitializerFn));
5025 CGF.EmitIgnoredExpr(CE);
5026 } else {
5027 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5028 PrivateType.getQualifiers(),
5029 /*IsInitializer=*/true);
5030 }
5031 } else {
5032 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5033 PrivateType.getQualifiers(),
5034 /*IsInitializer=*/true);
5035 }
5036 } else {
5037 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5038 PrivateType.getQualifiers(),
5039 /*IsInitializer=*/true);
5040 }
5041 } else {
5042 // EmitNullInitialization handles default construction for C++ classes
5043 // and zeroing for scalars, which is a reasonable default.
5044 CGF.EmitNullInitialization(SharedResult, PrivateType);
5045 }
5046 return; // UDR initialization handled
5047 }
5048 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5049 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5050 if (const Expr *InitExpr = VD->getInit()) {
5051 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5052 PrivateType.getQualifiers(), true);
5053 return;
5054 }
5055 }
5056 }
5057 CGF.EmitNullInitialization(SharedResult, PrivateType);
5058 };
5059 EmitSharedInit();
5060 CGF.Builder.CreateBr(InitEndBB);
5061 CGF.EmitBlock(InitEndBB);
5062
5063 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5064 CGM.getModule(), OMPRTL___kmpc_barrier),
5065 BarrierArgs);
5066
5067 const Expr *ReductionOp = ReductionOps;
5068 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5069 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5070 LValue LHSLV = CGF.EmitLValue(Privates);
5071
5072 auto EmitCriticalReduction = [&](auto ReductionGen) {
5073 std::string CriticalName = getName({"reduction_critical"});
5074 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5075 };
5076
5077 if (CurrentUDR) {
5078 // Handle user-defined reduction.
5079 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5080 Action.Enter(CGF);
5081 std::pair<llvm::Function *, llvm::Function *> FnPair =
5082 getUserDefinedReduction(CurrentUDR);
5083 if (FnPair.first) {
5084 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5085 const auto *OutDRE = cast<DeclRefExpr>(
5086 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5087 ->getSubExpr());
5088 const auto *InDRE = cast<DeclRefExpr>(
5089 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5090 ->getSubExpr());
5091 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5092 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5093 SharedLV.getAddress());
5094 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5095 LHSLV.getAddress());
5096 (void)LocalScope.Privatize();
5097 emitReductionCombiner(CGF, ReductionOp);
5098 }
5099 }
5100 };
5101 EmitCriticalReduction(ReductionGen);
5102 } else {
5103 // Handle built-in reduction operations.
5104#ifndef NDEBUG
5105 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5106 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5107 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5108
5109 const Expr *AssignRHS = nullptr;
5110 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5111 if (BinOp->getOpcode() == BO_Assign)
5112 AssignRHS = BinOp->getRHS();
5113 } else if (const auto *OpCall =
5114 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5115 if (OpCall->getOperator() == OO_Equal)
5116 AssignRHS = OpCall->getArg(1);
5117 }
5118
5119 assert(AssignRHS &&
5120 "Private Variable Reduction : Invalid ReductionOp expression");
5121#endif
5122
5123 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5124 Action.Enter(CGF);
5125 const auto *OmpOutDRE =
5126 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5127 const auto *OmpInDRE =
5128 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5129 assert(
5130 OmpOutDRE && OmpInDRE &&
5131 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5132 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5133 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5134 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5135 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5136 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5137 (void)LocalScope.Privatize();
5138 // Emit the actual reduction operation
5139 CGF.EmitIgnoredExpr(ReductionOp);
5140 };
5141 EmitCriticalReduction(ReductionGen);
5142 }
5143
5144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5145 CGM.getModule(), OMPRTL___kmpc_barrier),
5146 BarrierArgs);
5147
5148 // Broadcast final result
5149 bool IsAggregate = PrivateType->isAggregateType();
5150 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5151 llvm::Value *FinalResultVal = nullptr;
5152 Address FinalResultAddr = Address::invalid();
5153
5154 if (IsAggregate)
5155 FinalResultAddr = SharedResult;
5156 else
5157 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5158
5159 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5160 if (IsAggregate) {
5161 CGF.EmitAggregateCopy(TargetLHSLV,
5162 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5163 PrivateType, AggValueSlot::DoesNotOverlap, false);
5164 } else {
5165 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5166 }
5167 // Final synchronization barrier
5168 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5169 CGM.getModule(), OMPRTL___kmpc_barrier),
5170 BarrierArgs);
5171
5172 // Combiner with original list item
5173 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5174 PrePostActionTy &Action) {
5175 Action.Enter(CGF);
5176 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5177 cast<DeclRefExpr>(LHSExprs),
5178 cast<DeclRefExpr>(RHSExprs));
5179 };
5180 EmitCriticalReduction(OriginalListCombiner);
5181}
5182
5184 ArrayRef<const Expr *> OrgPrivates,
5185 ArrayRef<const Expr *> OrgLHSExprs,
5186 ArrayRef<const Expr *> OrgRHSExprs,
5187 ArrayRef<const Expr *> OrgReductionOps,
5188 ReductionOptionsTy Options) {
5189 if (!CGF.HaveInsertPoint())
5190 return;
5191
5192 bool WithNowait = Options.WithNowait;
5193 bool SimpleReduction = Options.SimpleReduction;
5194
5195 // Next code should be emitted for reduction:
5196 //
5197 // static kmp_critical_name lock = { 0 };
5198 //
5199 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5200 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5201 // ...
5202 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5203 // *(Type<n>-1*)rhs[<n>-1]);
5204 // }
5205 //
5206 // ...
5207 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5208 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5209 // RedList, reduce_func, &<lock>)) {
5210 // case 1:
5211 // ...
5212 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5213 // ...
5214 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5215 // break;
5216 // case 2:
5217 // ...
5218 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5219 // ...
5220 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5221 // break;
5222 // default:;
5223 // }
5224 //
5225 // if SimpleReduction is true, only the next code is generated:
5226 // ...
5227 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5228 // ...
5229
5230 ASTContext &C = CGM.getContext();
5231
5232 if (SimpleReduction) {
5234 const auto *IPriv = OrgPrivates.begin();
5235 const auto *ILHS = OrgLHSExprs.begin();
5236 const auto *IRHS = OrgRHSExprs.begin();
5237 for (const Expr *E : OrgReductionOps) {
5238 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5239 cast<DeclRefExpr>(*IRHS));
5240 ++IPriv;
5241 ++ILHS;
5242 ++IRHS;
5243 }
5244 return;
5245 }
5246
5247 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5248 // Only keep entries where the corresponding variable is not private.
5249 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5250 FilteredRHSExprs, FilteredReductionOps;
5251 for (unsigned I : llvm::seq<unsigned>(
5252 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5253 if (!Options.IsPrivateVarReduction[I]) {
5254 FilteredPrivates.emplace_back(OrgPrivates[I]);
5255 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5256 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5257 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5258 }
5259 }
5260 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5261 // processing.
5262 ArrayRef<const Expr *> Privates = FilteredPrivates;
5263 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5264 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5265 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5266
5267 // 1. Build a list of reduction variables.
5268 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5269 auto Size = RHSExprs.size();
5270 for (const Expr *E : Privates) {
5271 if (E->getType()->isVariablyModifiedType())
5272 // Reserve place for array size.
5273 ++Size;
5274 }
5275 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5276 QualType ReductionArrayTy = C.getConstantArrayType(
5277 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5278 /*IndexTypeQuals=*/0);
5279 RawAddress ReductionList =
5280 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5281 const auto *IPriv = Privates.begin();
5282 unsigned Idx = 0;
5283 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5284 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5285 CGF.Builder.CreateStore(
5287 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5288 Elem);
5289 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5290 // Store array size.
5291 ++Idx;
5292 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5293 llvm::Value *Size = CGF.Builder.CreateIntCast(
5294 CGF.getVLASize(
5295 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5296 .NumElts,
5297 CGF.SizeTy, /*isSigned=*/false);
5298 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5299 Elem);
5300 }
5301 }
5302
5303 // 2. Emit reduce_func().
5304 llvm::Function *ReductionFn = emitReductionFunction(
5305 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5306 Privates, LHSExprs, RHSExprs, ReductionOps);
5307
5308 // 3. Create static kmp_critical_name lock = { 0 };
5309 std::string Name = getName({"reduction"});
5310 llvm::Value *Lock = getCriticalRegionLock(Name);
5311
5312 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5313 // RedList, reduce_func, &<lock>);
5314 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5315 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5316 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5317 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5318 ReductionList.getPointer(), CGF.VoidPtrTy);
5319 llvm::Value *Args[] = {
5320 IdentTLoc, // ident_t *<loc>
5321 ThreadId, // i32 <gtid>
5322 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5323 ReductionArrayTySize, // size_type sizeof(RedList)
5324 RL, // void *RedList
5325 ReductionFn, // void (*) (void *, void *) <reduce_func>
5326 Lock // kmp_critical_name *&<lock>
5327 };
5328 llvm::Value *Res = CGF.EmitRuntimeCall(
5329 OMPBuilder.getOrCreateRuntimeFunction(
5330 CGM.getModule(),
5331 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5332 Args);
5333
5334 // 5. Build switch(res)
5335 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5336 llvm::SwitchInst *SwInst =
5337 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5338
5339 // 6. Build case 1:
5340 // ...
5341 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5342 // ...
5343 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5344 // break;
5345 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5346 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5347 CGF.EmitBlock(Case1BB);
5348
5349 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5350 llvm::Value *EndArgs[] = {
5351 IdentTLoc, // ident_t *<loc>
5352 ThreadId, // i32 <gtid>
5353 Lock // kmp_critical_name *&<lock>
5354 };
5355 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5356 CodeGenFunction &CGF, PrePostActionTy &Action) {
5358 const auto *IPriv = Privates.begin();
5359 const auto *ILHS = LHSExprs.begin();
5360 const auto *IRHS = RHSExprs.begin();
5361 for (const Expr *E : ReductionOps) {
5362 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5363 cast<DeclRefExpr>(*IRHS));
5364 ++IPriv;
5365 ++ILHS;
5366 ++IRHS;
5367 }
5368 };
5370 CommonActionTy Action(
5371 nullptr, {},
5372 OMPBuilder.getOrCreateRuntimeFunction(
5373 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5374 : OMPRTL___kmpc_end_reduce),
5375 EndArgs);
5376 RCG.setAction(Action);
5377 RCG(CGF);
5378
5379 CGF.EmitBranch(DefaultBB);
5380
5381 // 7. Build case 2:
5382 // ...
5383 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5384 // ...
5385 // break;
5386 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5387 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5388 CGF.EmitBlock(Case2BB);
5389
5390 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5391 CodeGenFunction &CGF, PrePostActionTy &Action) {
5392 const auto *ILHS = LHSExprs.begin();
5393 const auto *IRHS = RHSExprs.begin();
5394 const auto *IPriv = Privates.begin();
5395 for (const Expr *E : ReductionOps) {
5396 const Expr *XExpr = nullptr;
5397 const Expr *EExpr = nullptr;
5398 const Expr *UpExpr = nullptr;
5399 BinaryOperatorKind BO = BO_Comma;
5400 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5401 if (BO->getOpcode() == BO_Assign) {
5402 XExpr = BO->getLHS();
5403 UpExpr = BO->getRHS();
5404 }
5405 }
5406 // Try to emit update expression as a simple atomic.
5407 const Expr *RHSExpr = UpExpr;
5408 if (RHSExpr) {
5409 // Analyze RHS part of the whole expression.
5410 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5411 RHSExpr->IgnoreParenImpCasts())) {
5412 // If this is a conditional operator, analyze its condition for
5413 // min/max reduction operator.
5414 RHSExpr = ACO->getCond();
5415 }
5416 if (const auto *BORHS =
5417 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5418 EExpr = BORHS->getRHS();
5419 BO = BORHS->getOpcode();
5420 }
5421 }
5422 if (XExpr) {
5423 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5424 auto &&AtomicRedGen = [BO, VD,
5425 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5426 const Expr *EExpr, const Expr *UpExpr) {
5427 LValue X = CGF.EmitLValue(XExpr);
5428 RValue E;
5429 if (EExpr)
5430 E = CGF.EmitAnyExpr(EExpr);
5431 CGF.EmitOMPAtomicSimpleUpdateExpr(
5432 X, E, BO, /*IsXLHSInRHSPart=*/true,
5433 llvm::AtomicOrdering::Monotonic, Loc,
5434 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5435 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5436 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5437 CGF.emitOMPSimpleStore(
5438 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5439 VD->getType().getNonReferenceType(), Loc);
5440 PrivateScope.addPrivate(VD, LHSTemp);
5441 (void)PrivateScope.Privatize();
5442 return CGF.EmitAnyExpr(UpExpr);
5443 });
5444 };
5445 if ((*IPriv)->getType()->isArrayType()) {
5446 // Emit atomic reduction for array section.
5447 const auto *RHSVar =
5448 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5449 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5450 AtomicRedGen, XExpr, EExpr, UpExpr);
5451 } else {
5452 // Emit atomic reduction for array subscript or single variable.
5453 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5454 }
5455 } else {
5456 // Emit as a critical region.
5457 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5458 const Expr *, const Expr *) {
5459 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5460 std::string Name = RT.getName({"atomic_reduction"});
5462 CGF, Name,
5463 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5464 Action.Enter(CGF);
5465 emitReductionCombiner(CGF, E);
5466 },
5467 Loc);
5468 };
5469 if ((*IPriv)->getType()->isArrayType()) {
5470 const auto *LHSVar =
5471 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5472 const auto *RHSVar =
5473 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5474 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5475 CritRedGen);
5476 } else {
5477 CritRedGen(CGF, nullptr, nullptr, nullptr);
5478 }
5479 }
5480 ++ILHS;
5481 ++IRHS;
5482 ++IPriv;
5483 }
5484 };
5485 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5486 if (!WithNowait) {
5487 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5488 llvm::Value *EndArgs[] = {
5489 IdentTLoc, // ident_t *<loc>
5490 ThreadId, // i32 <gtid>
5491 Lock // kmp_critical_name *&<lock>
5492 };
5493 CommonActionTy Action(nullptr, {},
5494 OMPBuilder.getOrCreateRuntimeFunction(
5495 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5496 EndArgs);
5497 AtomicRCG.setAction(Action);
5498 AtomicRCG(CGF);
5499 } else {
5500 AtomicRCG(CGF);
5501 }
5502
5503 CGF.EmitBranch(DefaultBB);
5504 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5505 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5506 "PrivateVarReduction: Privates size mismatch");
5507 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5508 "PrivateVarReduction: ReductionOps size mismatch");
5509 for (unsigned I : llvm::seq<unsigned>(
5510 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5511 if (Options.IsPrivateVarReduction[I])
5512 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5513 OrgRHSExprs[I], OrgReductionOps[I]);
5514 }
5515}
5516
5517/// Generates unique name for artificial threadprivate variables.
5518/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5519static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5520 const Expr *Ref) {
5521 SmallString<256> Buffer;
5522 llvm::raw_svector_ostream Out(Buffer);
5523 const clang::DeclRefExpr *DE;
5524 const VarDecl *D = ::getBaseDecl(Ref, DE);
5525 if (!D)
5526 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5527 D = D->getCanonicalDecl();
5528 std::string Name = CGM.getOpenMPRuntime().getName(
5529 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5530 Out << Prefix << Name << "_"
5532 return std::string(Out.str());
5533}
5534
5535/// Emits reduction initializer function:
5536/// \code
5537/// void @.red_init(void* %arg, void* %orig) {
5538/// %0 = bitcast void* %arg to <type>*
5539/// store <type> <init>, <type>* %0
5540/// ret void
5541/// }
5542/// \endcode
5543static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5544 SourceLocation Loc,
5545 ReductionCodeGen &RCG, unsigned N) {
5546 ASTContext &C = CGM.getContext();
5547 QualType VoidPtrTy = C.VoidPtrTy;
5548 VoidPtrTy.addRestrict();
5549 FunctionArgList Args;
5550 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5552 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5554 Args.emplace_back(&Param);
5555 Args.emplace_back(&ParamOrig);
5556 const auto &FnInfo =
5557 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5558 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5559 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5560 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5561 Name, &CGM.getModule());
5562 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5563 Fn->setDoesNotRecurse();
5564 CodeGenFunction CGF(CGM);
5565 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5566 QualType PrivateType = RCG.getPrivateType(N);
5567 Address PrivateAddr = CGF.EmitLoadOfPointer(
5568 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5569 C.getPointerType(PrivateType)->castAs<PointerType>());
5570 llvm::Value *Size = nullptr;
5571 // If the size of the reduction item is non-constant, load it from global
5572 // threadprivate variable.
5573 if (RCG.getSizes(N).second) {
5575 CGF, CGM.getContext().getSizeType(),
5576 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5577 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5578 CGM.getContext().getSizeType(), Loc);
5579 }
5580 RCG.emitAggregateType(CGF, N, Size);
5581 Address OrigAddr = Address::invalid();
5582 // If initializer uses initializer from declare reduction construct, emit a
5583 // pointer to the address of the original reduction item (reuired by reduction
5584 // initializer)
5585 if (RCG.usesReductionInitializer(N)) {
5586 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5587 OrigAddr = CGF.EmitLoadOfPointer(
5588 SharedAddr,
5589 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5590 }
5591 // Emit the initializer:
5592 // %0 = bitcast void* %arg to <type>*
5593 // store <type> <init>, <type>* %0
5594 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5595 [](CodeGenFunction &) { return false; });
5596 CGF.FinishFunction();
5597 return Fn;
5598}
5599
5600/// Emits reduction combiner function:
5601/// \code
5602/// void @.red_comb(void* %arg0, void* %arg1) {
5603/// %lhs = bitcast void* %arg0 to <type>*
5604/// %rhs = bitcast void* %arg1 to <type>*
5605/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5606/// store <type> %2, <type>* %lhs
5607/// ret void
5608/// }
5609/// \endcode
5610static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5611 SourceLocation Loc,
5612 ReductionCodeGen &RCG, unsigned N,
5613 const Expr *ReductionOp,
5614 const Expr *LHS, const Expr *RHS,
5615 const Expr *PrivateRef) {
5616 ASTContext &C = CGM.getContext();
5617 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5618 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5619 FunctionArgList Args;
5620 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5621 C.VoidPtrTy, ImplicitParamKind::Other);
5622 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5624 Args.emplace_back(&ParamInOut);
5625 Args.emplace_back(&ParamIn);
5626 const auto &FnInfo =
5627 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5628 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5629 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5630 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5631 Name, &CGM.getModule());
5632 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5633 Fn->setDoesNotRecurse();
5634 CodeGenFunction CGF(CGM);
5635 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5636 llvm::Value *Size = nullptr;
5637 // If the size of the reduction item is non-constant, load it from global
5638 // threadprivate variable.
5639 if (RCG.getSizes(N).second) {
5641 CGF, CGM.getContext().getSizeType(),
5642 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5643 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5644 CGM.getContext().getSizeType(), Loc);
5645 }
5646 RCG.emitAggregateType(CGF, N, Size);
5647 // Remap lhs and rhs variables to the addresses of the function arguments.
5648 // %lhs = bitcast void* %arg0 to <type>*
5649 // %rhs = bitcast void* %arg1 to <type>*
5650 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5651 PrivateScope.addPrivate(
5652 LHSVD,
5653 // Pull out the pointer to the variable.
5655 CGF.GetAddrOfLocalVar(&ParamInOut)
5656 .withElementType(CGF.Builder.getPtrTy(0)),
5657 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5658 PrivateScope.addPrivate(
5659 RHSVD,
5660 // Pull out the pointer to the variable.
5662 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5663 CGF.Builder.getPtrTy(0)),
5664 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5665 PrivateScope.Privatize();
5666 // Emit the combiner body:
5667 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5668 // store <type> %2, <type>* %lhs
5670 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5671 cast<DeclRefExpr>(RHS));
5672 CGF.FinishFunction();
5673 return Fn;
5674}
5675
5676/// Emits reduction finalizer function:
5677/// \code
5678/// void @.red_fini(void* %arg) {
5679/// %0 = bitcast void* %arg to <type>*
5680/// <destroy>(<type>* %0)
5681/// ret void
5682/// }
5683/// \endcode
5684static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5685 SourceLocation Loc,
5686 ReductionCodeGen &RCG, unsigned N) {
5687 if (!RCG.needCleanups(N))
5688 return nullptr;
5689 ASTContext &C = CGM.getContext();
5690 FunctionArgList Args;
5691 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5693 Args.emplace_back(&Param);
5694 const auto &FnInfo =
5695 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5696 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5697 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5698 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5699 Name, &CGM.getModule());
5700 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5701 Fn->setDoesNotRecurse();
5702 CodeGenFunction CGF(CGM);
5703 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5704 Address PrivateAddr = CGF.EmitLoadOfPointer(
5705 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5706 llvm::Value *Size = nullptr;
5707 // If the size of the reduction item is non-constant, load it from global
5708 // threadprivate variable.
5709 if (RCG.getSizes(N).second) {
5711 CGF, CGM.getContext().getSizeType(),
5712 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5713 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5714 CGM.getContext().getSizeType(), Loc);
5715 }
5716 RCG.emitAggregateType(CGF, N, Size);
5717 // Emit the finalizer body:
5718 // <destroy>(<type>* %0)
5719 RCG.emitCleanups(CGF, N, PrivateAddr);
5720 CGF.FinishFunction(Loc);
5721 return Fn;
5722}
5723
5726 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5727 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5728 return nullptr;
5729
5730 // Build typedef struct:
5731 // kmp_taskred_input {
5732 // void *reduce_shar; // shared reduction item
5733 // void *reduce_orig; // original reduction item used for initialization
5734 // size_t reduce_size; // size of data item
5735 // void *reduce_init; // data initialization routine
5736 // void *reduce_fini; // data finalization routine
5737 // void *reduce_comb; // data combiner routine
5738 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5739 // } kmp_taskred_input_t;
5740 ASTContext &C = CGM.getContext();
5741 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5742 RD->startDefinition();
5743 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5744 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5745 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5746 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5747 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5748 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5749 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5750 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5751 RD->completeDefinition();
5752 CanQualType RDType = C.getCanonicalTagType(RD);
5753 unsigned Size = Data.ReductionVars.size();
5754 llvm::APInt ArraySize(/*numBits=*/64, Size);
5755 QualType ArrayRDType =
5756 C.getConstantArrayType(RDType, ArraySize, nullptr,
5757 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5758 // kmp_task_red_input_t .rd_input.[Size];
5759 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5760 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5761 Data.ReductionCopies, Data.ReductionOps);
5762 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5763 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5764 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5765 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5766 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5767 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5768 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5769 ".rd_input.gep.");
5770 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5771 // ElemLVal.reduce_shar = &Shareds[Cnt];
5772 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5773 RCG.emitSharedOrigLValue(CGF, Cnt);
5774 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5775 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5776 // ElemLVal.reduce_orig = &Origs[Cnt];
5777 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5778 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5779 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5780 RCG.emitAggregateType(CGF, Cnt);
5781 llvm::Value *SizeValInChars;
5782 llvm::Value *SizeVal;
5783 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5784 // We use delayed creation/initialization for VLAs and array sections. It is
5785 // required because runtime does not provide the way to pass the sizes of
5786 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5787 // threadprivate global variables are used to store these values and use
5788 // them in the functions.
5789 bool DelayedCreation = !!SizeVal;
5790 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5791 /*isSigned=*/false);
5792 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5793 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5794 // ElemLVal.reduce_init = init;
5795 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5796 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5797 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5798 // ElemLVal.reduce_fini = fini;
5799 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5800 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5801 llvm::Value *FiniAddr =
5802 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5803 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5804 // ElemLVal.reduce_comb = comb;
5805 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5806 llvm::Value *CombAddr = emitReduceCombFunction(
5807 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5808 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5809 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5810 // ElemLVal.flags = 0;
5811 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5812 if (DelayedCreation) {
5814 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5815 FlagsLVal);
5816 } else
5817 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5818 }
5819 if (Data.IsReductionWithTaskMod) {
5820 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5821 // is_ws, int num, void *data);
5822 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5823 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5824 CGM.IntTy, /*isSigned=*/true);
5825 llvm::Value *Args[] = {
5826 IdentTLoc, GTid,
5827 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5828 /*isSigned=*/true),
5829 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5831 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5832 return CGF.EmitRuntimeCall(
5833 OMPBuilder.getOrCreateRuntimeFunction(
5834 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5835 Args);
5836 }
5837 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5838 llvm::Value *Args[] = {
5839 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5840 /*isSigned=*/true),
5841 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5843 CGM.VoidPtrTy)};
5844 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5845 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5846 Args);
5847}
5848
5850 SourceLocation Loc,
5851 bool IsWorksharingReduction) {
5852 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5853 // is_ws, int num, void *data);
5854 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5855 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5856 CGM.IntTy, /*isSigned=*/true);
5857 llvm::Value *Args[] = {IdentTLoc, GTid,
5858 llvm::ConstantInt::get(CGM.IntTy,
5859 IsWorksharingReduction ? 1 : 0,
5860 /*isSigned=*/true)};
5861 (void)CGF.EmitRuntimeCall(
5862 OMPBuilder.getOrCreateRuntimeFunction(
5863 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5864 Args);
5865}
5866
5868 SourceLocation Loc,
5869 ReductionCodeGen &RCG,
5870 unsigned N) {
5871 auto Sizes = RCG.getSizes(N);
5872 // Emit threadprivate global variable if the type is non-constant
5873 // (Sizes.second = nullptr).
5874 if (Sizes.second) {
5875 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5876 /*isSigned=*/false);
5878 CGF, CGM.getContext().getSizeType(),
5879 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5880 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5881 }
5882}
5883
5885 SourceLocation Loc,
5886 llvm::Value *ReductionsPtr,
5887 LValue SharedLVal) {
5888 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5889 // *d);
5890 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5891 CGM.IntTy,
5892 /*isSigned=*/true),
5893 ReductionsPtr,
5895 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5896 return Address(
5897 CGF.EmitRuntimeCall(
5898 OMPBuilder.getOrCreateRuntimeFunction(
5899 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5900 Args),
5901 CGF.Int8Ty, SharedLVal.getAlignment());
5902}
5903
5905 const OMPTaskDataTy &Data) {
5906 if (!CGF.HaveInsertPoint())
5907 return;
5908
5909 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5910 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5911 OMPBuilder.createTaskwait(CGF.Builder);
5912 } else {
5913 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5914 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5915 auto &M = CGM.getModule();
5916 Address DependenciesArray = Address::invalid();
5917 llvm::Value *NumOfElements;
5918 std::tie(NumOfElements, DependenciesArray) =
5919 emitDependClause(CGF, Data.Dependences, Loc);
5920 if (!Data.Dependences.empty()) {
5921 llvm::Value *DepWaitTaskArgs[7];
5922 DepWaitTaskArgs[0] = UpLoc;
5923 DepWaitTaskArgs[1] = ThreadID;
5924 DepWaitTaskArgs[2] = NumOfElements;
5925 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5926 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5927 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5928 DepWaitTaskArgs[6] =
5929 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5930
5931 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5932
5933 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5934 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5935 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5936 // kmp_int32 has_no_wait); if dependence info is specified.
5937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5938 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5939 DepWaitTaskArgs);
5940
5941 } else {
5942
5943 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5944 // global_tid);
5945 llvm::Value *Args[] = {UpLoc, ThreadID};
5946 // Ignore return result until untied tasks are supported.
5947 CGF.EmitRuntimeCall(
5948 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5949 Args);
5950 }
5951 }
5952
5953 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5954 Region->emitUntiedSwitch(CGF);
5955}
5956
5958 OpenMPDirectiveKind InnerKind,
5959 const RegionCodeGenTy &CodeGen,
5960 bool HasCancel) {
5961 if (!CGF.HaveInsertPoint())
5962 return;
5963 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5964 InnerKind != OMPD_critical &&
5965 InnerKind != OMPD_master &&
5966 InnerKind != OMPD_masked);
5967 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5968}
5969
5970namespace {
5971enum RTCancelKind {
5972 CancelNoreq = 0,
5973 CancelParallel = 1,
5974 CancelLoop = 2,
5975 CancelSections = 3,
5976 CancelTaskgroup = 4
5977};
5978} // anonymous namespace
5979
5980static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5981 RTCancelKind CancelKind = CancelNoreq;
5982 if (CancelRegion == OMPD_parallel)
5983 CancelKind = CancelParallel;
5984 else if (CancelRegion == OMPD_for)
5985 CancelKind = CancelLoop;
5986 else if (CancelRegion == OMPD_sections)
5987 CancelKind = CancelSections;
5988 else {
5989 assert(CancelRegion == OMPD_taskgroup);
5990 CancelKind = CancelTaskgroup;
5991 }
5992 return CancelKind;
5993}
5994
5997 OpenMPDirectiveKind CancelRegion) {
5998 if (!CGF.HaveInsertPoint())
5999 return;
6000 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6001 // global_tid, kmp_int32 cncl_kind);
6002 if (auto *OMPRegionInfo =
6003 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6004 // For 'cancellation point taskgroup', the task region info may not have a
6005 // cancel. This may instead happen in another adjacent task.
6006 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6007 llvm::Value *Args[] = {
6008 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6009 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6010 // Ignore return result until untied tasks are supported.
6011 llvm::Value *Result = CGF.EmitRuntimeCall(
6012 OMPBuilder.getOrCreateRuntimeFunction(
6013 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6014 Args);
6015 // if (__kmpc_cancellationpoint()) {
6016 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6017 // exit from construct;
6018 // }
6019 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6020 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6021 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6022 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6023 CGF.EmitBlock(ExitBB);
6024 if (CancelRegion == OMPD_parallel)
6025 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6026 // exit from construct;
6027 CodeGenFunction::JumpDest CancelDest =
6028 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6029 CGF.EmitBranchThroughCleanup(CancelDest);
6030 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6031 }
6032 }
6033}
6034
6036 const Expr *IfCond,
6037 OpenMPDirectiveKind CancelRegion) {
6038 if (!CGF.HaveInsertPoint())
6039 return;
6040 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6041 // kmp_int32 cncl_kind);
6042 auto &M = CGM.getModule();
6043 if (auto *OMPRegionInfo =
6044 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6045 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6046 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6047 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6048 llvm::Value *Args[] = {
6049 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6050 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6051 // Ignore return result until untied tasks are supported.
6052 llvm::Value *Result = CGF.EmitRuntimeCall(
6053 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6054 // if (__kmpc_cancel()) {
6055 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6056 // exit from construct;
6057 // }
6058 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6059 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6060 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6061 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6062 CGF.EmitBlock(ExitBB);
6063 if (CancelRegion == OMPD_parallel)
6064 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6065 // exit from construct;
6066 CodeGenFunction::JumpDest CancelDest =
6067 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6068 CGF.EmitBranchThroughCleanup(CancelDest);
6069 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6070 };
6071 if (IfCond) {
6072 emitIfClause(CGF, IfCond, ThenGen,
6073 [](CodeGenFunction &, PrePostActionTy &) {});
6074 } else {
6075 RegionCodeGenTy ThenRCG(ThenGen);
6076 ThenRCG(CGF);
6077 }
6078 }
6079}
6080
6081namespace {
6082/// Cleanup action for uses_allocators support.
6083class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6085
6086public:
6087 OMPUsesAllocatorsActionTy(
6088 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6089 : Allocators(Allocators) {}
6090 void Enter(CodeGenFunction &CGF) override {
6091 if (!CGF.HaveInsertPoint())
6092 return;
6093 for (const auto &AllocatorData : Allocators) {
6095 CGF, AllocatorData.first, AllocatorData.second);
6096 }
6097 }
6098 void Exit(CodeGenFunction &CGF) override {
6099 if (!CGF.HaveInsertPoint())
6100 return;
6101 for (const auto &AllocatorData : Allocators) {
6103 AllocatorData.first);
6104 }
6105 }
6106};
6107} // namespace
6108
6110 const OMPExecutableDirective &D, StringRef ParentName,
6111 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6112 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6113 assert(!ParentName.empty() && "Invalid target entry parent name!");
6116 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6117 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6118 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6119 if (!D.AllocatorTraits)
6120 continue;
6121 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6122 }
6123 }
6124 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6125 CodeGen.setAction(UsesAllocatorAction);
6126 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6127 IsOffloadEntry, CodeGen);
6128}
6129
6131 const Expr *Allocator,
6132 const Expr *AllocatorTraits) {
6133 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6134 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6135 // Use default memspace handle.
6136 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6137 llvm::Value *NumTraits = llvm::ConstantInt::get(
6139 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6140 ->getSize()
6141 .getLimitedValue());
6142 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6144 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6145 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6146 AllocatorTraitsLVal.getBaseInfo(),
6147 AllocatorTraitsLVal.getTBAAInfo());
6148 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6149
6150 llvm::Value *AllocatorVal =
6151 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6152 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6153 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6154 // Store to allocator.
6156 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6157 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6158 AllocatorVal =
6159 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6160 Allocator->getType(), Allocator->getExprLoc());
6161 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6162}
6163
6165 const Expr *Allocator) {
6166 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6167 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6168 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6169 llvm::Value *AllocatorVal =
6170 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6171 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6172 CGF.getContext().VoidPtrTy,
6173 Allocator->getExprLoc());
6174 (void)CGF.EmitRuntimeCall(
6175 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6176 OMPRTL___kmpc_destroy_allocator),
6177 {ThreadId, AllocatorVal});
6178}
6179
6182 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6183 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6184 "invalid default attrs structure");
6185 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6186 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6187
6188 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6189 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6190 /*UpperBoundOnly=*/true);
6191
6192 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6193 for (auto *A : C->getAttrs()) {
6194 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6195 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6196 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6197 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6198 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6199 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6200 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6201 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6202 &AttrMaxThreadsVal);
6203 else
6204 continue;
6205
6206 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6207 if (AttrMaxThreadsVal > 0)
6208 MaxThreadsVal = MaxThreadsVal > 0
6209 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6210 : AttrMaxThreadsVal;
6211 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6212 if (AttrMaxBlocksVal > 0)
6213 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6214 : AttrMaxBlocksVal;
6215 }
6216 }
6217}
6218
6220 const OMPExecutableDirective &D, StringRef ParentName,
6221 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6222 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6223
6224 llvm::TargetRegionEntryInfo EntryInfo =
6225 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6226
6227 CodeGenFunction CGF(CGM, true);
6228 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6229 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6230 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6231
6232 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6233 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6234 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6235 };
6236
6237 cantFail(OMPBuilder.emitTargetRegionFunction(
6238 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6239 OutlinedFnID));
6240
6241 if (!OutlinedFn)
6242 return;
6243
6244 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6245
6246 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6247 for (auto *A : C->getAttrs()) {
6248 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6249 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6250 }
6251 }
6252}
6253
6254/// Checks if the expression is constant or does not have non-trivial function
6255/// calls.
6256static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6257 // We can skip constant expressions.
6258 // We can skip expressions with trivial calls or simple expressions.
6260 !E->hasNonTrivialCall(Ctx)) &&
6261 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6262}
6263
6265 const Stmt *Body) {
6266 const Stmt *Child = Body->IgnoreContainers();
6267 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6268 Child = nullptr;
6269 for (const Stmt *S : C->body()) {
6270 if (const auto *E = dyn_cast<Expr>(S)) {
6271 if (isTrivial(Ctx, E))
6272 continue;
6273 }
6274 // Some of the statements can be ignored.
6277 continue;
6278 // Analyze declarations.
6279 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6280 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6281 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6282 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6283 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6284 isa<UsingDirectiveDecl>(D) ||
6285 isa<OMPDeclareReductionDecl>(D) ||
6286 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6287 return true;
6288 const auto *VD = dyn_cast<VarDecl>(D);
6289 if (!VD)
6290 return false;
6291 return VD->hasGlobalStorage() || !VD->isUsed();
6292 }))
6293 continue;
6294 }
6295 // Found multiple children - cannot get the one child only.
6296 if (Child)
6297 return nullptr;
6298 Child = S;
6299 }
6300 if (Child)
6301 Child = Child->IgnoreContainers();
6302 }
6303 return Child;
6304}
6305
6307 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6308 int32_t &MaxTeamsVal) {
6309
6310 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6311 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6312 "Expected target-based executable directive.");
6313 switch (DirectiveKind) {
6314 case OMPD_target: {
6315 const auto *CS = D.getInnermostCapturedStmt();
6316 const auto *Body =
6317 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6318 const Stmt *ChildStmt =
6320 if (const auto *NestedDir =
6321 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6322 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6323 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6324 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6325 ->getNumTeams()
6326 .front();
6327 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6328 if (auto Constant =
6329 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6330 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6331 return NumTeams;
6332 }
6333 MinTeamsVal = MaxTeamsVal = 0;
6334 return nullptr;
6335 }
6336 MinTeamsVal = MaxTeamsVal = 1;
6337 return nullptr;
6338 }
6339 // A value of -1 is used to check if we need to emit no teams region
6340 MinTeamsVal = MaxTeamsVal = -1;
6341 return nullptr;
6342 }
6343 case OMPD_target_teams_loop:
6344 case OMPD_target_teams:
6345 case OMPD_target_teams_distribute:
6346 case OMPD_target_teams_distribute_simd:
6347 case OMPD_target_teams_distribute_parallel_for:
6348 case OMPD_target_teams_distribute_parallel_for_simd: {
6349 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6350 const Expr *NumTeams =
6351 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6352 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6353 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6354 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6355 return NumTeams;
6356 }
6357 MinTeamsVal = MaxTeamsVal = 0;
6358 return nullptr;
6359 }
6360 case OMPD_target_parallel:
6361 case OMPD_target_parallel_for:
6362 case OMPD_target_parallel_for_simd:
6363 case OMPD_target_parallel_loop:
6364 case OMPD_target_simd:
6365 MinTeamsVal = MaxTeamsVal = 1;
6366 return nullptr;
6367 case OMPD_parallel:
6368 case OMPD_for:
6369 case OMPD_parallel_for:
6370 case OMPD_parallel_loop:
6371 case OMPD_parallel_master:
6372 case OMPD_parallel_sections:
6373 case OMPD_for_simd:
6374 case OMPD_parallel_for_simd:
6375 case OMPD_cancel:
6376 case OMPD_cancellation_point:
6377 case OMPD_ordered:
6378 case OMPD_threadprivate:
6379 case OMPD_allocate:
6380 case OMPD_task:
6381 case OMPD_simd:
6382 case OMPD_tile:
6383 case OMPD_unroll:
6384 case OMPD_sections:
6385 case OMPD_section:
6386 case OMPD_single:
6387 case OMPD_master:
6388 case OMPD_critical:
6389 case OMPD_taskyield:
6390 case OMPD_barrier:
6391 case OMPD_taskwait:
6392 case OMPD_taskgroup:
6393 case OMPD_atomic:
6394 case OMPD_flush:
6395 case OMPD_depobj:
6396 case OMPD_scan:
6397 case OMPD_teams:
6398 case OMPD_target_data:
6399 case OMPD_target_exit_data:
6400 case OMPD_target_enter_data:
6401 case OMPD_distribute:
6402 case OMPD_distribute_simd:
6403 case OMPD_distribute_parallel_for:
6404 case OMPD_distribute_parallel_for_simd:
6405 case OMPD_teams_distribute:
6406 case OMPD_teams_distribute_simd:
6407 case OMPD_teams_distribute_parallel_for:
6408 case OMPD_teams_distribute_parallel_for_simd:
6409 case OMPD_target_update:
6410 case OMPD_declare_simd:
6411 case OMPD_declare_variant:
6412 case OMPD_begin_declare_variant:
6413 case OMPD_end_declare_variant:
6414 case OMPD_declare_target:
6415 case OMPD_end_declare_target:
6416 case OMPD_declare_reduction:
6417 case OMPD_declare_mapper:
6418 case OMPD_taskloop:
6419 case OMPD_taskloop_simd:
6420 case OMPD_master_taskloop:
6421 case OMPD_master_taskloop_simd:
6422 case OMPD_parallel_master_taskloop:
6423 case OMPD_parallel_master_taskloop_simd:
6424 case OMPD_requires:
6425 case OMPD_metadirective:
6426 case OMPD_unknown:
6427 break;
6428 default:
6429 break;
6430 }
6431 llvm_unreachable("Unexpected directive kind.");
6432}
6433
6435 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6436 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6437 "Clauses associated with the teams directive expected to be emitted "
6438 "only for the host!");
6439 CGBuilderTy &Bld = CGF.Builder;
6440 int32_t MinNT = -1, MaxNT = -1;
6441 const Expr *NumTeams =
6442 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6443 if (NumTeams != nullptr) {
6444 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6445
6446 switch (DirectiveKind) {
6447 case OMPD_target: {
6448 const auto *CS = D.getInnermostCapturedStmt();
6449 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6450 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6451 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6452 /*IgnoreResultAssign*/ true);
6453 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6454 /*isSigned=*/true);
6455 }
6456 case OMPD_target_teams:
6457 case OMPD_target_teams_distribute:
6458 case OMPD_target_teams_distribute_simd:
6459 case OMPD_target_teams_distribute_parallel_for:
6460 case OMPD_target_teams_distribute_parallel_for_simd: {
6461 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6462 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6463 /*IgnoreResultAssign*/ true);
6464 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6465 /*isSigned=*/true);
6466 }
6467 default:
6468 break;
6469 }
6470 }
6471
6472 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6473 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6474}
6475
6476/// Check for a num threads constant value (stored in \p DefaultVal), or
6477/// expression (stored in \p E). If the value is conditional (via an if-clause),
6478/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6479/// nullptr, no expression evaluation is perfomed.
6480static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6481 const Expr **E, int32_t &UpperBound,
6482 bool UpperBoundOnly, llvm::Value **CondVal) {
6484 CGF.getContext(), CS->getCapturedStmt());
6485 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6486 if (!Dir)
6487 return;
6488
6489 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6490 // Handle if clause. If if clause present, the number of threads is
6491 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6492 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6493 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6494 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6495 const OMPIfClause *IfClause = nullptr;
6496 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6497 if (C->getNameModifier() == OMPD_unknown ||
6498 C->getNameModifier() == OMPD_parallel) {
6499 IfClause = C;
6500 break;
6501 }
6502 }
6503 if (IfClause) {
6504 const Expr *CondExpr = IfClause->getCondition();
6505 bool Result;
6506 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6507 if (!Result) {
6508 UpperBound = 1;
6509 return;
6510 }
6511 } else {
6513 if (const auto *PreInit =
6514 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6515 for (const auto *I : PreInit->decls()) {
6516 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6517 CGF.EmitVarDecl(cast<VarDecl>(*I));
6518 } else {
6521 CGF.EmitAutoVarCleanups(Emission);
6522 }
6523 }
6524 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6525 }
6526 }
6527 }
6528 }
6529 // Check the value of num_threads clause iff if clause was not specified
6530 // or is not evaluated to false.
6531 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6532 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6533 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6534 const auto *NumThreadsClause =
6535 Dir->getSingleClause<OMPNumThreadsClause>();
6536 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6537 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6538 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6539 UpperBound =
6540 UpperBound
6541 ? Constant->getZExtValue()
6542 : std::min(UpperBound,
6543 static_cast<int32_t>(Constant->getZExtValue()));
6544 // If we haven't found a upper bound, remember we saw a thread limiting
6545 // clause.
6546 if (UpperBound == -1)
6547 UpperBound = 0;
6548 if (!E)
6549 return;
6550 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6551 if (const auto *PreInit =
6552 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6553 for (const auto *I : PreInit->decls()) {
6554 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6555 CGF.EmitVarDecl(cast<VarDecl>(*I));
6556 } else {
6559 CGF.EmitAutoVarCleanups(Emission);
6560 }
6561 }
6562 }
6563 *E = NTExpr;
6564 }
6565 return;
6566 }
6567 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6568 UpperBound = 1;
6569}
6570
6572 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6573 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6574 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6575 "Clauses associated with the teams directive expected to be emitted "
6576 "only for the host!");
6577 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6578 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6579 "Expected target-based executable directive.");
6580
6581 const Expr *NT = nullptr;
6582 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6583
6584 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6585 if (E->isIntegerConstantExpr(CGF.getContext())) {
6586 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6587 UpperBound = UpperBound ? Constant->getZExtValue()
6588 : std::min(UpperBound,
6589 int32_t(Constant->getZExtValue()));
6590 }
6591 // If we haven't found a upper bound, remember we saw a thread limiting
6592 // clause.
6593 if (UpperBound == -1)
6594 UpperBound = 0;
6595 if (EPtr)
6596 *EPtr = E;
6597 };
6598
6599 auto ReturnSequential = [&]() {
6600 UpperBound = 1;
6601 return NT;
6602 };
6603
6604 switch (DirectiveKind) {
6605 case OMPD_target: {
6606 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6607 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6609 CGF.getContext(), CS->getCapturedStmt());
6610 // TODO: The standard is not clear how to resolve two thread limit clauses,
6611 // let's pick the teams one if it's present, otherwise the target one.
6612 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6613 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6614 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6615 ThreadLimitClause = TLC;
6616 if (ThreadLimitExpr) {
6617 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6618 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6620 CGF,
6621 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6622 if (const auto *PreInit =
6623 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6624 for (const auto *I : PreInit->decls()) {
6625 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6626 CGF.EmitVarDecl(cast<VarDecl>(*I));
6627 } else {
6630 CGF.EmitAutoVarCleanups(Emission);
6631 }
6632 }
6633 }
6634 }
6635 }
6636 }
6637 if (ThreadLimitClause)
6638 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6639 ThreadLimitExpr);
6640 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6641 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6642 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6643 CS = Dir->getInnermostCapturedStmt();
6645 CGF.getContext(), CS->getCapturedStmt());
6646 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6647 }
6648 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6649 CS = Dir->getInnermostCapturedStmt();
6650 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6651 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6652 return ReturnSequential();
6653 }
6654 return NT;
6655 }
6656 case OMPD_target_teams: {
6657 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6658 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6659 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6660 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6661 ThreadLimitExpr);
6662 }
6663 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6664 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6666 CGF.getContext(), CS->getCapturedStmt());
6667 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6668 if (Dir->getDirectiveKind() == OMPD_distribute) {
6669 CS = Dir->getInnermostCapturedStmt();
6670 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6671 }
6672 }
6673 return NT;
6674 }
6675 case OMPD_target_teams_distribute:
6676 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6677 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6678 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6679 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6680 ThreadLimitExpr);
6681 }
6682 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6683 UpperBoundOnly, CondVal);
6684 return NT;
6685 case OMPD_target_teams_loop:
6686 case OMPD_target_parallel_loop:
6687 case OMPD_target_parallel:
6688 case OMPD_target_parallel_for:
6689 case OMPD_target_parallel_for_simd:
6690 case OMPD_target_teams_distribute_parallel_for:
6691 case OMPD_target_teams_distribute_parallel_for_simd: {
6692 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6693 const OMPIfClause *IfClause = nullptr;
6694 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6695 if (C->getNameModifier() == OMPD_unknown ||
6696 C->getNameModifier() == OMPD_parallel) {
6697 IfClause = C;
6698 break;
6699 }
6700 }
6701 if (IfClause) {
6702 const Expr *Cond = IfClause->getCondition();
6703 bool Result;
6704 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6705 if (!Result)
6706 return ReturnSequential();
6707 } else {
6709 *CondVal = CGF.EvaluateExprAsBool(Cond);
6710 }
6711 }
6712 }
6713 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6714 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6715 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6716 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6717 ThreadLimitExpr);
6718 }
6719 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6720 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6721 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6722 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6723 return NumThreadsClause->getNumThreads();
6724 }
6725 return NT;
6726 }
6727 case OMPD_target_teams_distribute_simd:
6728 case OMPD_target_simd:
6729 return ReturnSequential();
6730 default:
6731 break;
6732 }
6733 llvm_unreachable("Unsupported directive kind.");
6734}
6735
6737 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6738 llvm::Value *NumThreadsVal = nullptr;
6739 llvm::Value *CondVal = nullptr;
6740 llvm::Value *ThreadLimitVal = nullptr;
6741 const Expr *ThreadLimitExpr = nullptr;
6742 int32_t UpperBound = -1;
6743
6745 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6746 &ThreadLimitExpr);
6747
6748 // Thread limit expressions are used below, emit them.
6749 if (ThreadLimitExpr) {
6750 ThreadLimitVal =
6751 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6752 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6753 /*isSigned=*/false);
6754 }
6755
6756 // Generate the num teams expression.
6757 if (UpperBound == 1) {
6758 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6759 } else if (NT) {
6760 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6761 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6762 /*isSigned=*/false);
6763 } else if (ThreadLimitVal) {
6764 // If we do not have a num threads value but a thread limit, replace the
6765 // former with the latter. We know handled the thread limit expression.
6766 NumThreadsVal = ThreadLimitVal;
6767 ThreadLimitVal = nullptr;
6768 } else {
6769 // Default to "0" which means runtime choice.
6770 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6771 NumThreadsVal = CGF.Builder.getInt32(0);
6772 }
6773
6774 // Handle if clause. If if clause present, the number of threads is
6775 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6776 if (CondVal) {
6778 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6779 CGF.Builder.getInt32(1));
6780 }
6781
6782 // If the thread limit and num teams expression were present, take the
6783 // minimum.
6784 if (ThreadLimitVal) {
6785 NumThreadsVal = CGF.Builder.CreateSelect(
6786 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6787 ThreadLimitVal, NumThreadsVal);
6788 }
6789
6790 return NumThreadsVal;
6791}
6792
6793namespace {
6795
6796// Utility to handle information from clauses associated with a given
6797// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6798// It provides a convenient interface to obtain the information and generate
6799// code for that information.
6800class MappableExprsHandler {
6801public:
6802 /// Custom comparator for attach-pointer expressions that compares them by
6803 /// complexity (i.e. their component-depth) first, then by the order in which
6804 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6805 /// different.
6806 struct AttachPtrExprComparator {
6807 const MappableExprsHandler &Handler;
6808 // Cache of previous equality comparison results.
6809 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6810 CachedEqualityComparisons;
6811
6812 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6813 AttachPtrExprComparator() = delete;
6814
6815 // Return true iff LHS is "less than" RHS.
6816 bool operator()(const Expr *LHS, const Expr *RHS) const {
6817 if (LHS == RHS)
6818 return false;
6819
6820 // First, compare by complexity (depth)
6821 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6822 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6823
6824 std::optional<size_t> DepthLHS =
6825 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6826 : std::nullopt;
6827 std::optional<size_t> DepthRHS =
6828 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6829 : std::nullopt;
6830
6831 // std::nullopt (no attach pointer) has lowest complexity
6832 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6833 // Both have same complexity, now check semantic equality
6834 if (areEqual(LHS, RHS))
6835 return false;
6836 // Different semantically, compare by computation order
6837 return wasComputedBefore(LHS, RHS);
6838 }
6839 if (!DepthLHS.has_value())
6840 return true; // LHS has lower complexity
6841 if (!DepthRHS.has_value())
6842 return false; // RHS has lower complexity
6843
6844 // Both have values, compare by depth (lower depth = lower complexity)
6845 if (DepthLHS.value() != DepthRHS.value())
6846 return DepthLHS.value() < DepthRHS.value();
6847
6848 // Same complexity, now check semantic equality
6849 if (areEqual(LHS, RHS))
6850 return false;
6851 // Different semantically, compare by computation order
6852 return wasComputedBefore(LHS, RHS);
6853 }
6854
6855 public:
6856 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6857 /// results, if available, otherwise does a recursive semantic comparison.
6858 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6859 // Check cache first for faster lookup
6860 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6861 if (CachedResultIt != CachedEqualityComparisons.end())
6862 return CachedResultIt->second;
6863
6864 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6865
6866 // Cache the result for future lookups (both orders since semantic
6867 // equality is commutative)
6868 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6869 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6870 return ComparisonResult;
6871 }
6872
6873 /// Compare the two attach-ptr expressions by their computation order.
6874 /// Returns true iff LHS was computed before RHS by
6875 /// collectAttachPtrExprInfo().
6876 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6877 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6878 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6879
6880 return OrderLHS < OrderRHS;
6881 }
6882
6883 private:
6884 /// Helper function to compare attach-pointer expressions semantically.
6885 /// This function handles various expression types that can be part of an
6886 /// attach-pointer.
6887 /// TODO: Not urgent, but we should ideally return true when comparing
6888 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6889 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6890 if (LHS == RHS)
6891 return true;
6892
6893 // If only one is null, they aren't equal
6894 if (!LHS || !RHS)
6895 return false;
6896
6897 ASTContext &Ctx = Handler.CGF.getContext();
6898 // Strip away parentheses and no-op casts to get to the core expression
6899 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6900 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6901
6902 // Direct pointer comparison of the underlying expressions
6903 if (LHS == RHS)
6904 return true;
6905
6906 // Check if the expression classes match
6907 if (LHS->getStmtClass() != RHS->getStmtClass())
6908 return false;
6909
6910 // Handle DeclRefExpr (variable references)
6911 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6912 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6913 if (!RD)
6914 return false;
6915 return LD->getDecl()->getCanonicalDecl() ==
6916 RD->getDecl()->getCanonicalDecl();
6917 }
6918
6919 // Handle ArraySubscriptExpr (array indexing like a[i])
6920 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6921 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6922 if (!RA)
6923 return false;
6924 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6925 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6926 }
6927
6928 // Handle MemberExpr (member access like s.m or p->m)
6929 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6930 const auto *RM = dyn_cast<MemberExpr>(RHS);
6931 if (!RM)
6932 return false;
6933 if (LM->getMemberDecl()->getCanonicalDecl() !=
6934 RM->getMemberDecl()->getCanonicalDecl())
6935 return false;
6936 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6937 }
6938
6939 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6940 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6941 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6942 if (!RU)
6943 return false;
6944 if (LU->getOpcode() != RU->getOpcode())
6945 return false;
6946 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6947 }
6948
6949 // Handle BinaryOperator (binary operations like p + offset)
6950 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6951 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6952 if (!RB)
6953 return false;
6954 if (LB->getOpcode() != RB->getOpcode())
6955 return false;
6956 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6957 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6958 }
6959
6960 // Handle ArraySectionExpr (array sections like a[0:1])
6961 // Attach pointers should not contain array-sections, but currently we
6962 // don't emit an error.
6963 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6964 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6965 if (!RAS)
6966 return false;
6967 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6968 areSemanticallyEqual(LAS->getLowerBound(),
6969 RAS->getLowerBound()) &&
6970 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6971 }
6972
6973 // Handle CastExpr (explicit casts)
6974 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6975 const auto *RC = dyn_cast<CastExpr>(RHS);
6976 if (!RC)
6977 return false;
6978 if (LC->getCastKind() != RC->getCastKind())
6979 return false;
6980 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6981 }
6982
6983 // Handle CXXThisExpr (this pointer)
6984 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6985 return true;
6986
6987 // Handle IntegerLiteral (integer constants)
6988 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6989 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6990 if (!RI)
6991 return false;
6992 return LI->getValue() == RI->getValue();
6993 }
6994
6995 // Handle CharacterLiteral (character constants)
6996 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
6997 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
6998 if (!RC)
6999 return false;
7000 return LC->getValue() == RC->getValue();
7001 }
7002
7003 // Handle FloatingLiteral (floating point constants)
7004 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7005 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7006 if (!RF)
7007 return false;
7008 // Use bitwise comparison for floating point literals
7009 return LF->getValue().bitwiseIsEqual(RF->getValue());
7010 }
7011
7012 // Handle StringLiteral (string constants)
7013 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7014 const auto *RS = dyn_cast<StringLiteral>(RHS);
7015 if (!RS)
7016 return false;
7017 return LS->getString() == RS->getString();
7018 }
7019
7020 // Handle CXXNullPtrLiteralExpr (nullptr)
7022 return true;
7023
7024 // Handle CXXBoolLiteralExpr (true/false)
7025 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7026 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7027 if (!RB)
7028 return false;
7029 return LB->getValue() == RB->getValue();
7030 }
7031
7032 // Fallback for other forms - use the existing comparison method
7033 return Expr::isSameComparisonOperand(LHS, RHS);
7034 }
7035 };
7036
7037 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7038 static unsigned getFlagMemberOffset() {
7039 unsigned Offset = 0;
7040 for (uint64_t Remain =
7041 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7042 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7043 !(Remain & 1); Remain = Remain >> 1)
7044 Offset++;
7045 return Offset;
7046 }
7047
7048 /// Class that holds debugging information for a data mapping to be passed to
7049 /// the runtime library.
7050 class MappingExprInfo {
7051 /// The variable declaration used for the data mapping.
7052 const ValueDecl *MapDecl = nullptr;
7053 /// The original expression used in the map clause, or null if there is
7054 /// none.
7055 const Expr *MapExpr = nullptr;
7056
7057 public:
7058 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7059 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7060
7061 const ValueDecl *getMapDecl() const { return MapDecl; }
7062 const Expr *getMapExpr() const { return MapExpr; }
7063 };
7064
7065 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7066 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7067 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7068 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7069 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7070 using MapNonContiguousArrayTy =
7071 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7072 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7073 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7074 using MapData =
7076 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7077 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7078 using MapDataArrayTy = SmallVector<MapData, 4>;
7079
7080 /// This structure contains combined information generated for mappable
7081 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7082 /// mappers, and non-contiguous information.
7083 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7084 MapExprsArrayTy Exprs;
7085 MapValueDeclsArrayTy Mappers;
7086 MapValueDeclsArrayTy DevicePtrDecls;
7087
7088 /// Append arrays in \a CurInfo.
7089 void append(MapCombinedInfoTy &CurInfo) {
7090 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7091 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7092 CurInfo.DevicePtrDecls.end());
7093 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7094 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7095 }
7096 };
7097
7098 /// Map between a struct and the its lowest & highest elements which have been
7099 /// mapped.
7100 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7101 /// HE(FieldIndex, Pointer)}
7102 struct StructRangeInfoTy {
7103 MapCombinedInfoTy PreliminaryMapData;
7104 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7105 0, Address::invalid()};
7106 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7107 0, Address::invalid()};
7108 Address Base = Address::invalid();
7109 Address LB = Address::invalid();
7110 bool IsArraySection = false;
7111 bool HasCompleteRecord = false;
7112 };
7113
7114 /// A struct to store the attach pointer and pointee information, to be used
7115 /// when emitting an attach entry.
7116 struct AttachInfoTy {
7117 Address AttachPtrAddr = Address::invalid();
7118 Address AttachPteeAddr = Address::invalid();
7119 const ValueDecl *AttachPtrDecl = nullptr;
7120 const Expr *AttachMapExpr = nullptr;
7121
7122 bool isValid() const {
7123 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7124 }
7125 };
7126
7127 /// Check if there's any component list where the attach pointer expression
7128 /// matches the given captured variable.
7129 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7130 for (const auto &AttachEntry : AttachPtrExprMap) {
7131 if (AttachEntry.second) {
7132 // Check if the attach pointer expression is a DeclRefExpr that
7133 // references the captured variable
7134 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7135 if (DRE->getDecl() == VD)
7136 return true;
7137 }
7138 }
7139 return false;
7140 }
7141
7142 /// Get the previously-cached attach pointer for a component list, if-any.
7143 const Expr *getAttachPtrExpr(
7145 const {
7146 const auto It = AttachPtrExprMap.find(Components);
7147 if (It != AttachPtrExprMap.end())
7148 return It->second;
7149
7150 return nullptr;
7151 }
7152
7153private:
7154 /// Kind that defines how a device pointer has to be returned.
7155 struct MapInfo {
7158 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7159 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7160 bool ReturnDevicePointer = false;
7161 bool IsImplicit = false;
7162 const ValueDecl *Mapper = nullptr;
7163 const Expr *VarRef = nullptr;
7164 bool ForDeviceAddr = false;
7165
7166 MapInfo() = default;
7167 MapInfo(
7169 OpenMPMapClauseKind MapType,
7170 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7171 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7172 bool ReturnDevicePointer, bool IsImplicit,
7173 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7174 bool ForDeviceAddr = false)
7175 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7176 MotionModifiers(MotionModifiers),
7177 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7178 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7179 };
7180
7181 /// The target directive from where the mappable clauses were extracted. It
7182 /// is either a executable directive or a user-defined mapper directive.
7183 llvm::PointerUnion<const OMPExecutableDirective *,
7184 const OMPDeclareMapperDecl *>
7185 CurDir;
7186
7187 /// Function the directive is being generated for.
7188 CodeGenFunction &CGF;
7189
7190 /// Set of all first private variables in the current directive.
7191 /// bool data is set to true if the variable is implicitly marked as
7192 /// firstprivate, false otherwise.
7193 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7194
7195 /// Set of defaultmap clause kinds that use firstprivate behavior.
7196 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7197
7198 /// Map between device pointer declarations and their expression components.
7199 /// The key value for declarations in 'this' is null.
7200 llvm::DenseMap<
7201 const ValueDecl *,
7202 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7203 DevPointersMap;
7204
7205 /// Map between device addr declarations and their expression components.
7206 /// The key value for declarations in 'this' is null.
7207 llvm::DenseMap<
7208 const ValueDecl *,
7209 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7210 HasDevAddrsMap;
7211
7212 /// Map between lambda declarations and their map type.
7213 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7214
7215 /// Map from component lists to their attach pointer expressions.
7217 const Expr *>
7218 AttachPtrExprMap;
7219
7220 /// Map from attach pointer expressions to their component depth.
7221 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7222 /// expressions with increasing/decreasing depth.
7223 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7224 /// TODO: Not urgent, but we should ideally use the number of pointer
7225 /// dereferences in an expr as an indicator of its complexity, instead of the
7226 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7227 /// `*(p + 5 + 5)` together.
7228 llvm::DenseMap<const Expr *, std::optional<size_t>>
7229 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7230
7231 /// Map from attach pointer expressions to the order they were computed in, in
7232 /// collectAttachPtrExprInfo().
7233 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7234 {nullptr, 0}};
7235
7236 /// An instance of attach-ptr-expr comparator that can be used throughout the
7237 /// lifetime of this handler.
7238 AttachPtrExprComparator AttachPtrComparator;
7239
7240 llvm::Value *getExprTypeSize(const Expr *E) const {
7241 QualType ExprTy = E->getType().getCanonicalType();
7242
7243 // Calculate the size for array shaping expression.
7244 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7245 llvm::Value *Size =
7246 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7247 for (const Expr *SE : OAE->getDimensions()) {
7248 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7249 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7250 CGF.getContext().getSizeType(),
7251 SE->getExprLoc());
7252 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7253 }
7254 return Size;
7255 }
7256
7257 // Reference types are ignored for mapping purposes.
7258 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7259 ExprTy = RefTy->getPointeeType().getCanonicalType();
7260
7261 // Given that an array section is considered a built-in type, we need to
7262 // do the calculation based on the length of the section instead of relying
7263 // on CGF.getTypeSize(E->getType()).
7264 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7265 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7266 OAE->getBase()->IgnoreParenImpCasts())
7268
7269 // If there is no length associated with the expression and lower bound is
7270 // not specified too, that means we are using the whole length of the
7271 // base.
7272 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7273 !OAE->getLowerBound())
7274 return CGF.getTypeSize(BaseTy);
7275
7276 llvm::Value *ElemSize;
7277 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7278 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7279 } else {
7280 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7281 assert(ATy && "Expecting array type if not a pointer type.");
7282 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7283 }
7284
7285 // If we don't have a length at this point, that is because we have an
7286 // array section with a single element.
7287 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7288 return ElemSize;
7289
7290 if (const Expr *LenExpr = OAE->getLength()) {
7291 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7292 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7293 CGF.getContext().getSizeType(),
7294 LenExpr->getExprLoc());
7295 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7296 }
7297 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7298 OAE->getLowerBound() && "expected array_section[lb:].");
7299 // Size = sizetype - lb * elemtype;
7300 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7301 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7302 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7303 CGF.getContext().getSizeType(),
7304 OAE->getLowerBound()->getExprLoc());
7305 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7306 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7307 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7308 LengthVal = CGF.Builder.CreateSelect(
7309 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7310 return LengthVal;
7311 }
7312 return CGF.getTypeSize(ExprTy);
7313 }
7314
7315 /// Return the corresponding bits for a given map clause modifier. Add
7316 /// a flag marking the map as a pointer if requested. Add a flag marking the
7317 /// map as the first one of a series of maps that relate to the same map
7318 /// expression.
7319 OpenMPOffloadMappingFlags getMapTypeBits(
7320 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7321 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7322 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7323 OpenMPOffloadMappingFlags Bits =
7324 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7325 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7326 switch (MapType) {
7327 case OMPC_MAP_alloc:
7328 case OMPC_MAP_release:
7329 // alloc and release is the default behavior in the runtime library, i.e.
7330 // if we don't pass any bits alloc/release that is what the runtime is
7331 // going to do. Therefore, we don't need to signal anything for these two
7332 // type modifiers.
7333 break;
7334 case OMPC_MAP_to:
7335 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7336 break;
7337 case OMPC_MAP_from:
7338 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7339 break;
7340 case OMPC_MAP_tofrom:
7341 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7342 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7343 break;
7344 case OMPC_MAP_delete:
7345 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7346 break;
7347 case OMPC_MAP_unknown:
7348 llvm_unreachable("Unexpected map type!");
7349 }
7350 if (AddPtrFlag)
7351 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7352 if (AddIsTargetParamFlag)
7353 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7354 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7355 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7356 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7357 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7358 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7359 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7360 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7361 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7362 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7363 if (IsNonContiguous)
7364 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7365 return Bits;
7366 }
7367
7368 /// Return true if the provided expression is a final array section. A
7369 /// final array section, is one whose length can't be proved to be one.
7370 bool isFinalArraySectionExpression(const Expr *E) const {
7371 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7372
7373 // It is not an array section and therefore not a unity-size one.
7374 if (!OASE)
7375 return false;
7376
7377 // An array section with no colon always refer to a single element.
7378 if (OASE->getColonLocFirst().isInvalid())
7379 return false;
7380
7381 const Expr *Length = OASE->getLength();
7382
7383 // If we don't have a length we have to check if the array has size 1
7384 // for this dimension. Also, we should always expect a length if the
7385 // base type is pointer.
7386 if (!Length) {
7387 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7388 OASE->getBase()->IgnoreParenImpCasts())
7390 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7391 return ATy->getSExtSize() != 1;
7392 // If we don't have a constant dimension length, we have to consider
7393 // the current section as having any size, so it is not necessarily
7394 // unitary. If it happen to be unity size, that's user fault.
7395 return true;
7396 }
7397
7398 // Check if the length evaluates to 1.
7399 Expr::EvalResult Result;
7400 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7401 return true; // Can have more that size 1.
7402
7403 llvm::APSInt ConstLength = Result.Val.getInt();
7404 return ConstLength.getSExtValue() != 1;
7405 }
7406
7407 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7408 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7409 /// an attach entry has the following form:
7410 /// &p, &p[1], sizeof(void*), ATTACH
7411 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7412 const AttachInfoTy &AttachInfo) const {
7413 assert(AttachInfo.isValid() &&
7414 "Expected valid attach pointer/pointee information!");
7415
7416 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7417 // size
7418 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7419 llvm::ConstantInt::get(
7420 CGF.CGM.SizeTy, CGF.getContext()
7422 .getQuantity()),
7423 CGF.Int64Ty, /*isSigned=*/true);
7424
7425 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7426 AttachInfo.AttachMapExpr);
7427 CombinedInfo.BasePointers.push_back(
7428 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7429 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7430 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7431 CombinedInfo.Pointers.push_back(
7432 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7433 CombinedInfo.Sizes.push_back(PointerSize);
7434 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7435 CombinedInfo.Mappers.push_back(nullptr);
7436 CombinedInfo.NonContigInfo.Dims.push_back(1);
7437 }
7438
7439 /// A helper class to copy structures with overlapped elements, i.e. those
7440 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7441 /// are not explicitly copied have mapping nodes synthesized for them,
7442 /// taking care to avoid generating zero-sized copies.
7443 class CopyOverlappedEntryGaps {
7444 CodeGenFunction &CGF;
7445 MapCombinedInfoTy &CombinedInfo;
7446 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7447 const ValueDecl *MapDecl = nullptr;
7448 const Expr *MapExpr = nullptr;
7449 Address BP = Address::invalid();
7450 bool IsNonContiguous = false;
7451 uint64_t DimSize = 0;
7452 // These elements track the position as the struct is iterated over
7453 // (in order of increasing element address).
7454 const RecordDecl *LastParent = nullptr;
7455 uint64_t Cursor = 0;
7456 unsigned LastIndex = -1u;
7457 Address LB = Address::invalid();
7458
7459 public:
7460 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7461 MapCombinedInfoTy &CombinedInfo,
7462 OpenMPOffloadMappingFlags Flags,
7463 const ValueDecl *MapDecl, const Expr *MapExpr,
7464 Address BP, Address LB, bool IsNonContiguous,
7465 uint64_t DimSize)
7466 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7467 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7468 DimSize(DimSize), LB(LB) {}
7469
7470 void processField(
7471 const OMPClauseMappableExprCommon::MappableComponent &MC,
7472 const FieldDecl *FD,
7473 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7474 EmitMemberExprBase) {
7475 const RecordDecl *RD = FD->getParent();
7476 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7477 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7478 uint64_t FieldSize =
7480 Address ComponentLB = Address::invalid();
7481
7482 if (FD->getType()->isLValueReferenceType()) {
7483 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7484 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7485 ComponentLB =
7486 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7487 } else {
7488 ComponentLB =
7490 }
7491
7492 if (!LastParent)
7493 LastParent = RD;
7494 if (FD->getParent() == LastParent) {
7495 if (FD->getFieldIndex() != LastIndex + 1)
7496 copyUntilField(FD, ComponentLB);
7497 } else {
7498 LastParent = FD->getParent();
7499 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7500 copyUntilField(FD, ComponentLB);
7501 }
7502 Cursor = FieldOffset + FieldSize;
7503 LastIndex = FD->getFieldIndex();
7504 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7505 }
7506
7507 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7508 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7509 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7510 llvm::Value *Size =
7511 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7512 copySizedChunk(LBPtr, Size);
7513 }
7514
7515 void copyUntilEnd(Address HB) {
7516 if (LastParent) {
7517 const ASTRecordLayout &RL =
7518 CGF.getContext().getASTRecordLayout(LastParent);
7519 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7520 return;
7521 }
7522 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7523 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7524 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7525 LBPtr);
7526 copySizedChunk(LBPtr, Size);
7527 }
7528
7529 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7530 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7531 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7532 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7533 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7534 CombinedInfo.Pointers.push_back(Base);
7535 CombinedInfo.Sizes.push_back(
7536 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7537 CombinedInfo.Types.push_back(Flags);
7538 CombinedInfo.Mappers.push_back(nullptr);
7539 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7540 }
7541 };
7542
7543 /// Generate the base pointers, section pointers, sizes, map type bits, and
7544 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7545 /// map type, map or motion modifiers, and expression components.
7546 /// \a IsFirstComponent should be set to true if the provided set of
7547 /// components is the first associated with a capture.
7548 void generateInfoForComponentList(
7549 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7550 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7552 MapCombinedInfoTy &CombinedInfo,
7553 MapCombinedInfoTy &StructBaseCombinedInfo,
7554 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7555 bool IsFirstComponentList, bool IsImplicit,
7556 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7557 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7558 const Expr *MapExpr = nullptr,
7559 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7560 OverlappedElements = {}) const {
7561
7562 // The following summarizes what has to be generated for each map and the
7563 // types below. The generated information is expressed in this order:
7564 // base pointer, section pointer, size, flags
7565 // (to add to the ones that come from the map type and modifier).
7566 // Entries annotated with (+) are only generated for "target" constructs,
7567 // and only if the variable at the beginning of the expression is used in
7568 // the region.
7569 //
7570 // double d;
7571 // int i[100];
7572 // float *p;
7573 // int **a = &i;
7574 //
7575 // struct S1 {
7576 // int i;
7577 // float f[50];
7578 // }
7579 // struct S2 {
7580 // int i;
7581 // float f[50];
7582 // S1 s;
7583 // double *p;
7584 // double *&pref;
7585 // struct S2 *ps;
7586 // int &ref;
7587 // }
7588 // S2 s;
7589 // S2 *ps;
7590 //
7591 // map(d)
7592 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7593 //
7594 // map(i)
7595 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7596 //
7597 // map(i[1:23])
7598 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7599 //
7600 // map(p)
7601 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7602 //
7603 // map(p[1:24])
7604 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7605 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7606 // // are present, and either is new
7607 //
7608 // map(([22])p)
7609 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7610 // &p, p, sizeof(void*), ATTACH
7611 //
7612 // map((*a)[0:3])
7613 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7614 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7615 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7616 // (+) Only on target, if a is used in the region
7617 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7618 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7619 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7620 // referenced in the target region, because it is a pointer.
7621 //
7622 // map(**a)
7623 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7624 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7625 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7626 // (+) Only on target, if a is used in the region
7627 //
7628 // map(s)
7629 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7630 // effect is supposed to be same as if the user had a map for every element
7631 // of the struct. We currently do a shallow-map of s.
7632 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7633 //
7634 // map(s.i)
7635 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7636 //
7637 // map(s.s.f)
7638 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7639 //
7640 // map(s.p)
7641 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7642 //
7643 // map(to: s.p[:22])
7644 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7645 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7646 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7647 //
7648 // map(to: s.ref)
7649 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7650 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7651 // (*) alloc space for struct members, only this is a target parameter.
7652 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7653 // optimizes this entry out, same in the examples below)
7654 // (***) map the pointee (map: to)
7655 // Note: ptr(s.ref) represents the referring pointer of s.ref
7656 // ptee(s.ref) represents the referenced pointee of s.ref
7657 //
7658 // map(to: s.pref)
7659 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7660 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7661 //
7662 // map(to: s.pref[:22])
7663 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7664 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7665 // FROM | IMPLICIT // (+)
7666 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7667 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7668 //
7669 // map(s.ps)
7670 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7671 //
7672 // map(from: s.ps->s.i)
7673 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7674 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7675 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7676 //
7677 // map(to: s.ps->ps)
7678 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7679 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7680 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7681 //
7682 // map(s.ps->ps->ps)
7683 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7684 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7685 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7686 //
7687 // map(to: s.ps->ps->s.f[:22])
7688 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7689 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7690 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7691 //
7692 // map(ps)
7693 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7694 //
7695 // map(ps->i)
7696 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7697 // &ps, &(ps->i), sizeof(void*), ATTACH
7698 //
7699 // map(ps->s.f)
7700 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7701 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7702 //
7703 // map(from: ps->p)
7704 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7705 // &ps, &(ps->p), sizeof(ps), ATTACH
7706 //
7707 // map(to: ps->p[:22])
7708 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7709 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7710 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7711 //
7712 // map(ps->ps)
7713 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7714 // &ps, &(ps->ps), sizeof(ps), ATTACH
7715 //
7716 // map(from: ps->ps->s.i)
7717 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7718 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7719 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7720 //
7721 // map(from: ps->ps->ps)
7722 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7723 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7724 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7725 //
7726 // map(ps->ps->ps->ps)
7727 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7728 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7729 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7730 //
7731 // map(to: ps->ps->ps->s.f[:22])
7732 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7733 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7734 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7735 //
7736 // map(to: s.f[:22]) map(from: s.p[:33])
7737 // On target, and if s is used in the region:
7738 //
7739 // &s, &(s.f[0]), 50*sizeof(float) +
7740 // sizeof(struct S1) +
7741 // sizeof(double*) (**), TARGET_PARAM
7742 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7743 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7744 // FROM | IMPLICIT
7745 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7746 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7747 // (**) allocate contiguous space needed to fit all mapped members even if
7748 // we allocate space for members not mapped (in this example,
7749 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7750 // them as well because they fall between &s.f[0] and &s.p)
7751 //
7752 // On other constructs, and, if s is not used in the region, on target:
7753 // &s, &(s.f[0]), 22*sizeof(float), TO
7754 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7755 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7756 //
7757 // map(from: s.f[:22]) map(to: ps->p[:33])
7758 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7759 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7760 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7761 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7762 //
7763 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7764 // &s, &(s.f[0]), 50*sizeof(float) +
7765 // sizeof(struct S1), TARGET_PARAM
7766 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7767 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7768 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7769 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7770 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7771 //
7772 // map(p[:100], p)
7773 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7774 // p, &p[0], 100*sizeof(float), TO | FROM
7775 // &p, &p[0], sizeof(float*), ATTACH
7776
7777 // Track if the map information being generated is the first for a capture.
7778 bool IsCaptureFirstInfo = IsFirstComponentList;
7779 // When the variable is on a declare target link or in a to clause with
7780 // unified memory, a reference is needed to hold the host/device address
7781 // of the variable.
7782 bool RequiresReference = false;
7783
7784 // Scan the components from the base to the complete expression.
7785 auto CI = Components.rbegin();
7786 auto CE = Components.rend();
7787 auto I = CI;
7788
7789 // Track if the map information being generated is the first for a list of
7790 // components.
7791 bool IsExpressionFirstInfo = true;
7792 bool FirstPointerInComplexData = false;
7793 Address BP = Address::invalid();
7794 Address FinalLowestElem = Address::invalid();
7795 const Expr *AssocExpr = I->getAssociatedExpression();
7796 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7797 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7798 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7799
7800 // Get the pointer-attachment base-pointer for the given list, if any.
7801 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7802 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7803 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7804
7805 bool HasAttachPtr = AttachPtrExpr != nullptr;
7806 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7807 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7808
7809 if (FirstComponentIsForAttachPtr) {
7810 // No need to process AttachPtr here. It will be processed at the end
7811 // after we have computed the pointee's address.
7812 ++I;
7813 } else if (isa<MemberExpr>(AssocExpr)) {
7814 // The base is the 'this' pointer. The content of the pointer is going
7815 // to be the base of the field being mapped.
7816 BP = CGF.LoadCXXThisAddress();
7817 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7818 (OASE &&
7819 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7820 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7821 } else if (OAShE &&
7822 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7823 BP = Address(
7824 CGF.EmitScalarExpr(OAShE->getBase()),
7825 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7826 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7827 } else {
7828 // The base is the reference to the variable.
7829 // BP = &Var.
7830 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7831 if (const auto *VD =
7832 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7833 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7834 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7835 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7836 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7837 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7839 RequiresReference = true;
7841 }
7842 }
7843 }
7844
7845 // If the variable is a pointer and is being dereferenced (i.e. is not
7846 // the last component), the base has to be the pointer itself, not its
7847 // reference. References are ignored for mapping purposes.
7848 QualType Ty =
7849 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7850 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7851 // No need to generate individual map information for the pointer, it
7852 // can be associated with the combined storage if shared memory mode is
7853 // active or the base declaration is not global variable.
7854 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7856 !VD || VD->hasLocalStorage() || HasAttachPtr)
7857 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7858 else
7859 FirstPointerInComplexData = true;
7860 ++I;
7861 }
7862 }
7863
7864 // Track whether a component of the list should be marked as MEMBER_OF some
7865 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7866 // in a component list should be marked as MEMBER_OF, all subsequent entries
7867 // do not belong to the base struct. E.g.
7868 // struct S2 s;
7869 // s.ps->ps->ps->f[:]
7870 // (1) (2) (3) (4)
7871 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7872 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7873 // is the pointee of ps(2) which is not member of struct s, so it should not
7874 // be marked as such (it is still PTR_AND_OBJ).
7875 // The variable is initialized to false so that PTR_AND_OBJ entries which
7876 // are not struct members are not considered (e.g. array of pointers to
7877 // data).
7878 bool ShouldBeMemberOf = false;
7879
7880 // Variable keeping track of whether or not we have encountered a component
7881 // in the component list which is a member expression. Useful when we have a
7882 // pointer or a final array section, in which case it is the previous
7883 // component in the list which tells us whether we have a member expression.
7884 // E.g. X.f[:]
7885 // While processing the final array section "[:]" it is "f" which tells us
7886 // whether we are dealing with a member of a declared struct.
7887 const MemberExpr *EncounteredME = nullptr;
7888
7889 // Track for the total number of dimension. Start from one for the dummy
7890 // dimension.
7891 uint64_t DimSize = 1;
7892
7893 // Detects non-contiguous updates due to strided accesses.
7894 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7895 // correctly when generating information to be passed to the runtime. The
7896 // flag is set to true if any array section has a stride not equal to 1, or
7897 // if the stride is not a constant expression (conservatively assumed
7898 // non-contiguous).
7899 bool IsNonContiguous =
7900 CombinedInfo.NonContigInfo.IsNonContiguous ||
7901 any_of(Components, [&](const auto &Component) {
7902 const auto *OASE =
7903 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7904 if (!OASE)
7905 return false;
7906
7907 const Expr *StrideExpr = OASE->getStride();
7908 if (!StrideExpr)
7909 return false;
7910
7911 const auto Constant =
7912 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7913 if (!Constant)
7914 return false;
7915
7916 return !Constant->isOne();
7917 });
7918
7919 bool IsPrevMemberReference = false;
7920
7921 bool IsPartialMapped =
7922 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7923
7924 // We need to check if we will be encountering any MEs. If we do not
7925 // encounter any ME expression it means we will be mapping the whole struct.
7926 // In that case we need to skip adding an entry for the struct to the
7927 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7928 // list only when generating all info for clauses.
7929 bool IsMappingWholeStruct = true;
7930 if (!GenerateAllInfoForClauses) {
7931 IsMappingWholeStruct = false;
7932 } else {
7933 for (auto TempI = I; TempI != CE; ++TempI) {
7934 const MemberExpr *PossibleME =
7935 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7936 if (PossibleME) {
7937 IsMappingWholeStruct = false;
7938 break;
7939 }
7940 }
7941 }
7942
7943 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
7944 for (; I != CE; ++I) {
7945 // If we have a valid attach-ptr, we skip processing all components until
7946 // after the attach-ptr.
7947 if (HasAttachPtr && !SeenAttachPtr) {
7948 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
7949 continue;
7950 }
7951
7952 // After finding the attach pointer, skip binary-ops, to skip past
7953 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
7954 // the attach-ptr.
7955 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
7956 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7957 if (BO)
7958 continue;
7959
7960 // Found the first non-binary-operator component after attach
7961 SeenFirstNonBinOpExprAfterAttachPtr = true;
7962 BP = AttachPteeBaseAddr;
7963 }
7964
7965 // If the current component is member of a struct (parent struct) mark it.
7966 if (!EncounteredME) {
7967 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7968 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7969 // as MEMBER_OF the parent struct.
7970 if (EncounteredME) {
7971 ShouldBeMemberOf = true;
7972 // Do not emit as complex pointer if this is actually not array-like
7973 // expression.
7974 if (FirstPointerInComplexData) {
7975 QualType Ty = std::prev(I)
7976 ->getAssociatedDeclaration()
7977 ->getType()
7978 .getNonReferenceType();
7979 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7980 FirstPointerInComplexData = false;
7981 }
7982 }
7983 }
7984
7985 auto Next = std::next(I);
7986
7987 // We need to generate the addresses and sizes if this is the last
7988 // component, if the component is a pointer or if it is an array section
7989 // whose length can't be proved to be one. If this is a pointer, it
7990 // becomes the base address for the following components.
7991
7992 // A final array section, is one whose length can't be proved to be one.
7993 // If the map item is non-contiguous then we don't treat any array section
7994 // as final array section.
7995 bool IsFinalArraySection =
7996 !IsNonContiguous &&
7997 isFinalArraySectionExpression(I->getAssociatedExpression());
7998
7999 // If we have a declaration for the mapping use that, otherwise use
8000 // the base declaration of the map clause.
8001 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8002 ? I->getAssociatedDeclaration()
8003 : BaseDecl;
8004 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8005 : MapExpr;
8006
8007 // Get information on whether the element is a pointer. Have to do a
8008 // special treatment for array sections given that they are built-in
8009 // types.
8010 const auto *OASE =
8011 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8012 const auto *OAShE =
8013 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8014 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8015 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8016 bool IsPointer =
8017 OAShE ||
8020 ->isAnyPointerType()) ||
8021 I->getAssociatedExpression()->getType()->isAnyPointerType();
8022 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8023 MapDecl &&
8024 MapDecl->getType()->isLValueReferenceType();
8025 bool IsNonDerefPointer = IsPointer &&
8026 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8027 !IsNonContiguous;
8028
8029 if (OASE)
8030 ++DimSize;
8031
8032 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8033 IsFinalArraySection) {
8034 // If this is not the last component, we expect the pointer to be
8035 // associated with an array expression or member expression.
8036 assert((Next == CE ||
8037 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8038 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8039 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8040 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8041 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8042 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8043 "Unexpected expression");
8044
8045 Address LB = Address::invalid();
8046 Address LowestElem = Address::invalid();
8047 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8048 const MemberExpr *E) {
8049 const Expr *BaseExpr = E->getBase();
8050 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8051 // scalar.
8052 LValue BaseLV;
8053 if (E->isArrow()) {
8054 LValueBaseInfo BaseInfo;
8055 TBAAAccessInfo TBAAInfo;
8056 Address Addr =
8057 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8058 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8059 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8060 } else {
8061 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8062 }
8063 return BaseLV;
8064 };
8065 if (OAShE) {
8066 LowestElem = LB =
8067 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8069 OAShE->getBase()->getType()->getPointeeType()),
8071 OAShE->getBase()->getType()));
8072 } else if (IsMemberReference) {
8073 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8074 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8075 LowestElem = CGF.EmitLValueForFieldInitialization(
8076 BaseLVal, cast<FieldDecl>(MapDecl))
8077 .getAddress();
8078 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8079 .getAddress();
8080 } else {
8081 LowestElem = LB =
8082 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8083 .getAddress();
8084 }
8085
8086 // Save the final LowestElem, to use it as the pointee in attach maps,
8087 // if emitted.
8088 if (Next == CE)
8089 FinalLowestElem = LowestElem;
8090
8091 // If this component is a pointer inside the base struct then we don't
8092 // need to create any entry for it - it will be combined with the object
8093 // it is pointing to into a single PTR_AND_OBJ entry.
8094 bool IsMemberPointerOrAddr =
8095 EncounteredME &&
8096 (((IsPointer || ForDeviceAddr) &&
8097 I->getAssociatedExpression() == EncounteredME) ||
8098 (IsPrevMemberReference && !IsPointer) ||
8099 (IsMemberReference && Next != CE &&
8100 !Next->getAssociatedExpression()->getType()->isPointerType()));
8101 if (!OverlappedElements.empty() && Next == CE) {
8102 // Handle base element with the info for overlapped elements.
8103 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8104 assert(!IsPointer &&
8105 "Unexpected base element with the pointer type.");
8106 // Mark the whole struct as the struct that requires allocation on the
8107 // device.
8108 PartialStruct.LowestElem = {0, LowestElem};
8109 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8110 I->getAssociatedExpression()->getType());
8111 Address HB = CGF.Builder.CreateConstGEP(
8113 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8114 TypeSize.getQuantity() - 1);
8115 PartialStruct.HighestElem = {
8116 std::numeric_limits<decltype(
8117 PartialStruct.HighestElem.first)>::max(),
8118 HB};
8119 PartialStruct.Base = BP;
8120 PartialStruct.LB = LB;
8121 assert(
8122 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8123 "Overlapped elements must be used only once for the variable.");
8124 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8125 // Emit data for non-overlapped data.
8126 OpenMPOffloadMappingFlags Flags =
8127 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8128 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8129 /*AddPtrFlag=*/false,
8130 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8131 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8132 MapExpr, BP, LB, IsNonContiguous,
8133 DimSize);
8134 // Do bitcopy of all non-overlapped structure elements.
8136 Component : OverlappedElements) {
8137 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8138 Component) {
8139 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8140 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8141 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8142 }
8143 }
8144 }
8145 }
8146 CopyGaps.copyUntilEnd(HB);
8147 break;
8148 }
8149 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8150 // Skip adding an entry in the CurInfo of this combined entry if the
8151 // whole struct is currently being mapped. The struct needs to be added
8152 // in the first position before any data internal to the struct is being
8153 // mapped.
8154 // Skip adding an entry in the CurInfo of this combined entry if the
8155 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8156 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8157 (Next == CE && MapType != OMPC_MAP_unknown)) {
8158 if (!IsMappingWholeStruct) {
8159 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8160 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8161 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8162 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8163 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8164 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8165 Size, CGF.Int64Ty, /*isSigned=*/true));
8166 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8167 : 1);
8168 } else {
8169 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8170 StructBaseCombinedInfo.BasePointers.push_back(
8171 BP.emitRawPointer(CGF));
8172 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8173 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8174 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8175 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8176 Size, CGF.Int64Ty, /*isSigned=*/true));
8177 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8178 IsNonContiguous ? DimSize : 1);
8179 }
8180
8181 // If Mapper is valid, the last component inherits the mapper.
8182 bool HasMapper = Mapper && Next == CE;
8183 if (!IsMappingWholeStruct)
8184 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8185 else
8186 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8187 : nullptr);
8188
8189 // We need to add a pointer flag for each map that comes from the
8190 // same expression except for the first one. We also need to signal
8191 // this map is the first one that relates with the current capture
8192 // (there is a set of entries for each capture).
8193 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8194 MapType, MapModifiers, MotionModifiers, IsImplicit,
8195 !IsExpressionFirstInfo || RequiresReference ||
8196 FirstPointerInComplexData || IsMemberReference,
8197 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8198
8199 if (!IsExpressionFirstInfo || IsMemberReference) {
8200 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8201 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8202 if (IsPointer || (IsMemberReference && Next != CE))
8203 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8204 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8205 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8206 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8207 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8208
8209 if (ShouldBeMemberOf) {
8210 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8211 // should be later updated with the correct value of MEMBER_OF.
8212 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8213 // From now on, all subsequent PTR_AND_OBJ entries should not be
8214 // marked as MEMBER_OF.
8215 ShouldBeMemberOf = false;
8216 }
8217 }
8218
8219 if (!IsMappingWholeStruct)
8220 CombinedInfo.Types.push_back(Flags);
8221 else
8222 StructBaseCombinedInfo.Types.push_back(Flags);
8223 }
8224
8225 // If we have encountered a member expression so far, keep track of the
8226 // mapped member. If the parent is "*this", then the value declaration
8227 // is nullptr.
8228 if (EncounteredME) {
8229 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8230 unsigned FieldIndex = FD->getFieldIndex();
8231
8232 // Update info about the lowest and highest elements for this struct
8233 if (!PartialStruct.Base.isValid()) {
8234 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8235 if (IsFinalArraySection && OASE) {
8236 Address HB =
8237 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8238 .getAddress();
8239 PartialStruct.HighestElem = {FieldIndex, HB};
8240 } else {
8241 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8242 }
8243 PartialStruct.Base = BP;
8244 PartialStruct.LB = BP;
8245 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8246 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8247 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8248 if (IsFinalArraySection && OASE) {
8249 Address HB =
8250 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8251 .getAddress();
8252 PartialStruct.HighestElem = {FieldIndex, HB};
8253 } else {
8254 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8255 }
8256 }
8257 }
8258
8259 // Need to emit combined struct for array sections.
8260 if (IsFinalArraySection || IsNonContiguous)
8261 PartialStruct.IsArraySection = true;
8262
8263 // If we have a final array section, we are done with this expression.
8264 if (IsFinalArraySection)
8265 break;
8266
8267 // The pointer becomes the base for the next element.
8268 if (Next != CE)
8269 BP = IsMemberReference ? LowestElem : LB;
8270 if (!IsPartialMapped)
8271 IsExpressionFirstInfo = false;
8272 IsCaptureFirstInfo = false;
8273 FirstPointerInComplexData = false;
8274 IsPrevMemberReference = IsMemberReference;
8275 } else if (FirstPointerInComplexData) {
8276 QualType Ty = Components.rbegin()
8277 ->getAssociatedDeclaration()
8278 ->getType()
8279 .getNonReferenceType();
8280 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8281 FirstPointerInComplexData = false;
8282 }
8283 }
8284 // If ran into the whole component - allocate the space for the whole
8285 // record.
8286 if (!EncounteredME)
8287 PartialStruct.HasCompleteRecord = true;
8288
8289 // Populate ATTACH information for later processing by emitAttachEntry.
8290 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8291 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8292 AttachInfo.AttachPteeAddr = FinalLowestElem;
8293 AttachInfo.AttachPtrDecl = BaseDecl;
8294 AttachInfo.AttachMapExpr = MapExpr;
8295 }
8296
8297 if (!IsNonContiguous)
8298 return;
8299
8300 const ASTContext &Context = CGF.getContext();
8301
8302 // For supporting stride in array section, we need to initialize the first
8303 // dimension size as 1, first offset as 0, and first count as 1
8304 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8305 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8306 MapValuesArrayTy CurStrides;
8307 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8308 uint64_t ElementTypeSize;
8309
8310 // Collect Size information for each dimension and get the element size as
8311 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8312 // should be [10, 10] and the first stride is 4 btyes.
8313 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8314 Components) {
8315 const Expr *AssocExpr = Component.getAssociatedExpression();
8316 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8317
8318 if (!OASE)
8319 continue;
8320
8321 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8322 auto *CAT = Context.getAsConstantArrayType(Ty);
8323 auto *VAT = Context.getAsVariableArrayType(Ty);
8324
8325 // We need all the dimension size except for the last dimension.
8326 assert((VAT || CAT || &Component == &*Components.begin()) &&
8327 "Should be either ConstantArray or VariableArray if not the "
8328 "first Component");
8329
8330 // Get element size if CurStrides is empty.
8331 if (CurStrides.empty()) {
8332 const Type *ElementType = nullptr;
8333 if (CAT)
8334 ElementType = CAT->getElementType().getTypePtr();
8335 else if (VAT)
8336 ElementType = VAT->getElementType().getTypePtr();
8337 else if (&Component == &*Components.begin()) {
8338 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8339 // there was no earlier CAT/VAT/array handling to establish
8340 // ElementType. Capture the pointee type now so that subsequent
8341 // components (offset/length/stride) have a concrete element type to
8342 // work with. This makes pointer-backed sections behave consistently
8343 // with CAT/VAT/array bases.
8344 if (const auto *PtrType = Ty->getAs<PointerType>())
8345 ElementType = PtrType->getPointeeType().getTypePtr();
8346 } else {
8347 // Any component after the first should never have a raw pointer type;
8348 // by this point. ElementType must already be known (set above or in
8349 // prior array / CAT / VAT handling).
8350 assert(!Ty->isPointerType() &&
8351 "Non-first components should not be raw pointers");
8352 }
8353
8354 // At this stage, if ElementType was a base pointer and we are in the
8355 // first iteration, it has been computed.
8356 if (ElementType) {
8357 // For the case that having pointer as base, we need to remove one
8358 // level of indirection.
8359 if (&Component != &*Components.begin())
8360 ElementType = ElementType->getPointeeOrArrayElementType();
8361 ElementTypeSize =
8362 Context.getTypeSizeInChars(ElementType).getQuantity();
8363 CurStrides.push_back(
8364 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8365 }
8366 }
8367 // Get dimension value except for the last dimension since we don't need
8368 // it.
8369 if (DimSizes.size() < Components.size() - 1) {
8370 if (CAT)
8371 DimSizes.push_back(
8372 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8373 else if (VAT)
8374 DimSizes.push_back(CGF.Builder.CreateIntCast(
8375 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8376 /*IsSigned=*/false));
8377 }
8378 }
8379
8380 // Skip the dummy dimension since we have already have its information.
8381 auto *DI = DimSizes.begin() + 1;
8382 // Product of dimension.
8383 llvm::Value *DimProd =
8384 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8385
8386 // Collect info for non-contiguous. Notice that offset, count, and stride
8387 // are only meaningful for array-section, so we insert a null for anything
8388 // other than array-section.
8389 // Also, the size of offset, count, and stride are not the same as
8390 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8391 // count, and stride are the same as the number of non-contiguous
8392 // declaration in target update to/from clause.
8393 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8394 Components) {
8395 const Expr *AssocExpr = Component.getAssociatedExpression();
8396
8397 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8398 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8399 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8400 /*isSigned=*/false);
8401 CurOffsets.push_back(Offset);
8402 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8403 CurStrides.push_back(CurStrides.back());
8404 continue;
8405 }
8406
8407 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8408
8409 if (!OASE)
8410 continue;
8411
8412 // Offset
8413 const Expr *OffsetExpr = OASE->getLowerBound();
8414 llvm::Value *Offset = nullptr;
8415 if (!OffsetExpr) {
8416 // If offset is absent, then we just set it to zero.
8417 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8418 } else {
8419 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8420 CGF.Int64Ty,
8421 /*isSigned=*/false);
8422 }
8423 CurOffsets.push_back(Offset);
8424
8425 // Count
8426 const Expr *CountExpr = OASE->getLength();
8427 llvm::Value *Count = nullptr;
8428 if (!CountExpr) {
8429 // In Clang, once a high dimension is an array section, we construct all
8430 // the lower dimension as array section, however, for case like
8431 // arr[0:2][2], Clang construct the inner dimension as an array section
8432 // but it actually is not in an array section form according to spec.
8433 if (!OASE->getColonLocFirst().isValid() &&
8434 !OASE->getColonLocSecond().isValid()) {
8435 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8436 } else {
8437 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8438 // When the length is absent it defaults to ⌈(size −
8439 // lower-bound)/stride⌉, where size is the size of the array
8440 // dimension.
8441 const Expr *StrideExpr = OASE->getStride();
8442 llvm::Value *Stride =
8443 StrideExpr
8444 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8445 CGF.Int64Ty, /*isSigned=*/false)
8446 : nullptr;
8447 if (Stride)
8448 Count = CGF.Builder.CreateUDiv(
8449 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8450 else
8451 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8452 }
8453 } else {
8454 Count = CGF.EmitScalarExpr(CountExpr);
8455 }
8456 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8457 CurCounts.push_back(Count);
8458
8459 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8460 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8461 // Offset Count Stride
8462 // D0 0 1 4 (int) <- dummy dimension
8463 // D1 0 2 8 (2 * (1) * 4)
8464 // D2 1 2 20 (1 * (1 * 5) * 4)
8465 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8466 const Expr *StrideExpr = OASE->getStride();
8467 llvm::Value *Stride =
8468 StrideExpr
8469 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8470 CGF.Int64Ty, /*isSigned=*/false)
8471 : nullptr;
8472 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8473 if (Stride)
8474 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8475 else
8476 CurStrides.push_back(DimProd);
8477 if (DI != DimSizes.end())
8478 ++DI;
8479 }
8480
8481 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8482 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8483 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8484 }
8485
8486 /// Return the adjusted map modifiers if the declaration a capture refers to
8487 /// appears in a first-private clause. This is expected to be used only with
8488 /// directives that start with 'target'.
8489 OpenMPOffloadMappingFlags
8490 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8491 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8492
8493 // A first private variable captured by reference will use only the
8494 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8495 // declaration is known as first-private in this handler.
8496 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8497 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8498 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8499 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8500 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8501 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8502 }
8503 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8504 if (I != LambdasMap.end())
8505 // for map(to: lambda): using user specified map type.
8506 return getMapTypeBits(
8507 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8508 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8509 /*AddPtrFlag=*/false,
8510 /*AddIsTargetParamFlag=*/false,
8511 /*isNonContiguous=*/false);
8512 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8513 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8514 }
8515
8516 void getPlainLayout(const CXXRecordDecl *RD,
8517 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8518 bool AsBase) const {
8519 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8520
8521 llvm::StructType *St =
8522 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8523
8524 unsigned NumElements = St->getNumElements();
8525 llvm::SmallVector<
8526 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8527 RecordLayout(NumElements);
8528
8529 // Fill bases.
8530 for (const auto &I : RD->bases()) {
8531 if (I.isVirtual())
8532 continue;
8533
8534 QualType BaseTy = I.getType();
8535 const auto *Base = BaseTy->getAsCXXRecordDecl();
8536 // Ignore empty bases.
8537 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8538 CGF.getContext()
8539 .getASTRecordLayout(Base)
8541 .isZero())
8542 continue;
8543
8544 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8545 RecordLayout[FieldIndex] = Base;
8546 }
8547 // Fill in virtual bases.
8548 for (const auto &I : RD->vbases()) {
8549 QualType BaseTy = I.getType();
8550 // Ignore empty bases.
8551 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8552 continue;
8553
8554 const auto *Base = BaseTy->getAsCXXRecordDecl();
8555 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8556 if (RecordLayout[FieldIndex])
8557 continue;
8558 RecordLayout[FieldIndex] = Base;
8559 }
8560 // Fill in all the fields.
8561 assert(!RD->isUnion() && "Unexpected union.");
8562 for (const auto *Field : RD->fields()) {
8563 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8564 // will fill in later.)
8565 if (!Field->isBitField() &&
8566 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8567 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8568 RecordLayout[FieldIndex] = Field;
8569 }
8570 }
8571 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8572 &Data : RecordLayout) {
8573 if (Data.isNull())
8574 continue;
8575 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8576 getPlainLayout(Base, Layout, /*AsBase=*/true);
8577 else
8578 Layout.push_back(cast<const FieldDecl *>(Data));
8579 }
8580 }
8581
8582 /// Returns the address corresponding to \p PointerExpr.
8583 static Address getAttachPtrAddr(const Expr *PointerExpr,
8584 CodeGenFunction &CGF) {
8585 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8586 Address AttachPtrAddr = Address::invalid();
8587
8588 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8589 // If the pointer is a variable, we can use its address directly.
8590 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8591 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8592 AttachPtrAddr =
8593 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8594 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8595 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8596 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8597 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8598 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8599 assert(UO->getOpcode() == UO_Deref &&
8600 "Unexpected unary-operator on attach-ptr-expr");
8601 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8602 }
8603 assert(AttachPtrAddr.isValid() &&
8604 "Failed to get address for attach pointer expression");
8605 return AttachPtrAddr;
8606 }
8607
8608 /// Get the address of the attach pointer, and a load from it, to get the
8609 /// pointee base address.
8610 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8611 /// contains invalid addresses if \p AttachPtrExpr is null.
8612 static std::pair<Address, Address>
8613 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8614 CodeGenFunction &CGF) {
8615
8616 if (!AttachPtrExpr)
8617 return {Address::invalid(), Address::invalid()};
8618
8619 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8620 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8621
8622 QualType AttachPtrType =
8625
8626 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8627 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8628 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8629
8630 return {AttachPtrAddr, AttachPteeBaseAddr};
8631 }
8632
8633 /// Returns whether an attach entry should be emitted for a map on
8634 /// \p MapBaseDecl on the directive \p CurDir.
8635 static bool
8636 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8637 CodeGenFunction &CGF,
8638 llvm::PointerUnion<const OMPExecutableDirective *,
8639 const OMPDeclareMapperDecl *>
8640 CurDir) {
8641 if (!PointerExpr)
8642 return false;
8643
8644 // Pointer attachment is needed at map-entering time or for declare
8645 // mappers.
8646 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8649 ->getDirectiveKind());
8650 }
8651
8652 /// Computes the attach-ptr expr for \p Components, and updates various maps
8653 /// with the information.
8654 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8655 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8656 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8657 /// AttachPtrExprMap.
8658 void collectAttachPtrExprInfo(
8660 llvm::PointerUnion<const OMPExecutableDirective *,
8661 const OMPDeclareMapperDecl *>
8662 CurDir) {
8663
8664 OpenMPDirectiveKind CurDirectiveID =
8666 ? OMPD_declare_mapper
8667 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8668
8669 const auto &[AttachPtrExpr, Depth] =
8671 CurDirectiveID);
8672
8673 AttachPtrComputationOrderMap.try_emplace(
8674 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8675 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8676 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8677 }
8678
8679 /// Generate all the base pointers, section pointers, sizes, map types, and
8680 /// mappers for the extracted mappable expressions (all included in \a
8681 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8682 /// pair of the relevant declaration and index where it occurs is appended to
8683 /// the device pointers info array.
8684 void generateAllInfoForClauses(
8685 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8686 llvm::OpenMPIRBuilder &OMPBuilder,
8687 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8688 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8689 // We have to process the component lists that relate with the same
8690 // declaration in a single chunk so that we can generate the map flags
8691 // correctly. Therefore, we organize all lists in a map.
8692 enum MapKind { Present, Allocs, Other, Total };
8693 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8694 SmallVector<SmallVector<MapInfo, 8>, 4>>
8695 Info;
8696
8697 // Helper function to fill the information map for the different supported
8698 // clauses.
8699 auto &&InfoGen =
8700 [&Info, &SkipVarSet](
8701 const ValueDecl *D, MapKind Kind,
8703 OpenMPMapClauseKind MapType,
8704 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8705 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8706 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8707 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8708 if (SkipVarSet.contains(D))
8709 return;
8710 auto It = Info.try_emplace(D, Total).first;
8711 It->second[Kind].emplace_back(
8712 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8713 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8714 };
8715
8716 for (const auto *Cl : Clauses) {
8717 const auto *C = dyn_cast<OMPMapClause>(Cl);
8718 if (!C)
8719 continue;
8720 MapKind Kind = Other;
8721 if (llvm::is_contained(C->getMapTypeModifiers(),
8722 OMPC_MAP_MODIFIER_present))
8723 Kind = Present;
8724 else if (C->getMapType() == OMPC_MAP_alloc)
8725 Kind = Allocs;
8726 const auto *EI = C->getVarRefs().begin();
8727 for (const auto L : C->component_lists()) {
8728 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8729 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8730 C->getMapTypeModifiers(), {},
8731 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8732 E);
8733 ++EI;
8734 }
8735 }
8736 for (const auto *Cl : Clauses) {
8737 const auto *C = dyn_cast<OMPToClause>(Cl);
8738 if (!C)
8739 continue;
8740 MapKind Kind = Other;
8741 if (llvm::is_contained(C->getMotionModifiers(),
8742 OMPC_MOTION_MODIFIER_present))
8743 Kind = Present;
8744 if (llvm::is_contained(C->getMotionModifiers(),
8745 OMPC_MOTION_MODIFIER_iterator)) {
8746 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8747 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8748 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8749 CGF.EmitVarDecl(*VD);
8750 }
8751 }
8752
8753 const auto *EI = C->getVarRefs().begin();
8754 for (const auto L : C->component_lists()) {
8755 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8756 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8757 C->isImplicit(), std::get<2>(L), *EI);
8758 ++EI;
8759 }
8760 }
8761 for (const auto *Cl : Clauses) {
8762 const auto *C = dyn_cast<OMPFromClause>(Cl);
8763 if (!C)
8764 continue;
8765 MapKind Kind = Other;
8766 if (llvm::is_contained(C->getMotionModifiers(),
8767 OMPC_MOTION_MODIFIER_present))
8768 Kind = Present;
8769 if (llvm::is_contained(C->getMotionModifiers(),
8770 OMPC_MOTION_MODIFIER_iterator)) {
8771 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8772 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8773 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8774 CGF.EmitVarDecl(*VD);
8775 }
8776 }
8777
8778 const auto *EI = C->getVarRefs().begin();
8779 for (const auto L : C->component_lists()) {
8780 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8781 C->getMotionModifiers(),
8782 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8783 *EI);
8784 ++EI;
8785 }
8786 }
8787
8788 // Look at the use_device_ptr and use_device_addr clauses information and
8789 // mark the existing map entries as such. If there is no map information for
8790 // an entry in the use_device_ptr and use_device_addr list, we create one
8791 // with map type 'return_param' and zero size section. It is the user's
8792 // fault if that was not mapped before. If there is no map information, then
8793 // we defer the emission of that entry until all the maps for the same VD
8794 // have been handled.
8795 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8796
8797 auto &&UseDeviceDataCombinedInfoGen =
8798 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8799 CodeGenFunction &CGF, bool IsDevAddr) {
8800 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8801 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8802 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8803 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8804 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8805 // FIXME: For use_device_addr on array-sections, this should
8806 // be the starting address of the section.
8807 // e.g. int *p;
8808 // ... use_device_addr(p[3])
8809 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8810 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8811 UseDeviceDataCombinedInfo.Sizes.push_back(
8812 llvm::Constant::getNullValue(CGF.Int64Ty));
8813 UseDeviceDataCombinedInfo.Types.push_back(
8814 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8815 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8816 };
8817
8818 auto &&MapInfoGen =
8819 [&UseDeviceDataCombinedInfoGen](
8820 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8822 Components,
8823 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false) {
8824 // We didn't find any match in our map information - generate a zero
8825 // size array section.
8826 llvm::Value *Ptr;
8827 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8828 if (IE->isGLValue())
8829 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8830 else
8831 Ptr = CGF.EmitScalarExpr(IE);
8832 } else {
8833 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8834 }
8835 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8836 // For the purpose of address-translation, treat something like the
8837 // following:
8838 // int *p;
8839 // ... use_device_addr(p[1])
8840 // equivalent to
8841 // ... use_device_ptr(p)
8842 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
8843 !TreatDevAddrAsDevPtr);
8844 };
8845
8846 auto &&IsMapInfoExist = [&Info, this](CodeGenFunction &CGF,
8847 const ValueDecl *VD, const Expr *IE,
8848 const Expr *DesiredAttachPtrExpr,
8849 bool IsDevAddr) -> bool {
8850 // We potentially have map information for this declaration already.
8851 // Look for the first set of components that refer to it. If found,
8852 // return true.
8853 // If the first component is a member expression, we have to look into
8854 // 'this', which maps to null in the map of map information. Otherwise
8855 // look directly for the information.
8856 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8857 if (It != Info.end()) {
8858 bool Found = false;
8859 for (auto &Data : It->second) {
8860 MapInfo *CI = nullptr;
8861 // We potentially have multiple maps for the same decl. We need to
8862 // only consider those for which the attach-ptr matches the desired
8863 // attach-ptr.
8864 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
8865 if (MI.Components.back().getAssociatedDeclaration() != VD)
8866 return false;
8867
8868 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
8869 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
8870 DesiredAttachPtrExpr);
8871 return Match;
8872 });
8873
8874 if (It != Data.end())
8875 CI = &*It;
8876
8877 if (CI) {
8878 if (IsDevAddr) {
8879 CI->ForDeviceAddr = true;
8880 CI->ReturnDevicePointer = true;
8881 Found = true;
8882 break;
8883 } else {
8884 auto PrevCI = std::next(CI->Components.rbegin());
8885 const auto *VarD = dyn_cast<VarDecl>(VD);
8886 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
8887 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8888 isa<MemberExpr>(IE) ||
8889 !VD->getType().getNonReferenceType()->isPointerType() ||
8890 PrevCI == CI->Components.rend() ||
8891 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8892 VarD->hasLocalStorage() ||
8893 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
8894 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
8895 CI->ForDeviceAddr = IsDevAddr;
8896 CI->ReturnDevicePointer = true;
8897 Found = true;
8898 break;
8899 }
8900 }
8901 }
8902 }
8903 return Found;
8904 }
8905 return false;
8906 };
8907
8908 // Look at the use_device_ptr clause information and mark the existing map
8909 // entries as such. If there is no map information for an entry in the
8910 // use_device_ptr list, we create one with map type 'alloc' and zero size
8911 // section. It is the user fault if that was not mapped before. If there is
8912 // no map information and the pointer is a struct member, then we defer the
8913 // emission of that entry until the whole struct has been processed.
8914 for (const auto *Cl : Clauses) {
8915 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8916 if (!C)
8917 continue;
8918 for (const auto L : C->component_lists()) {
8920 std::get<1>(L);
8921 assert(!Components.empty() &&
8922 "Not expecting empty list of components!");
8923 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8925 const Expr *IE = Components.back().getAssociatedExpression();
8926 // For use_device_ptr, we match an existing map clause if its attach-ptr
8927 // is same as the use_device_ptr operand. e.g.
8928 // map expr | use_device_ptr expr | current behavior
8929 // ---------|---------------------|-----------------
8930 // p[1] | p | match
8931 // ps->a | ps | match
8932 // p | p | no match
8933 const Expr *UDPOperandExpr =
8934 Components.front().getAssociatedExpression();
8935 if (IsMapInfoExist(CGF, VD, IE,
8936 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
8937 /*IsDevAddr=*/false))
8938 continue;
8939 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false);
8940 }
8941 }
8942
8943 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8944 for (const auto *Cl : Clauses) {
8945 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8946 if (!C)
8947 continue;
8948 for (const auto L : C->component_lists()) {
8950 std::get<1>(L);
8951 assert(!std::get<1>(L).empty() &&
8952 "Not expecting empty list of components!");
8953 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8954 if (!Processed.insert(VD).second)
8955 continue;
8957 // For use_device_addr, we match an existing map clause if the
8958 // use_device_addr operand's attach-ptr matches the map operand's
8959 // attach-ptr.
8960 // We chould also restrict to only match cases when there is a full
8961 // match between the map/use_device_addr clause exprs, but that may be
8962 // unnecessary.
8963 //
8964 // map expr | use_device_addr expr | current | possible restrictive/
8965 // | | behavior | safer behavior
8966 // ---------|----------------------|-----------|-----------------------
8967 // p | p | match | match
8968 // p[0] | p[0] | match | match
8969 // p[0:1] | p[0] | match | no match
8970 // p[0:1] | p[2:1] | match | no match
8971 // p[1] | p[0] | match | no match
8972 // ps->a | ps->b | match | no match
8973 // p | p[0] | no match | no match
8974 // pp | pp[0][0] | no match | no match
8975 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
8976 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8977 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
8978 "use_device_addr operand has an attach-ptr, but does not match "
8979 "last component's expr.");
8980 if (IsMapInfoExist(CGF, VD, IE,
8981 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
8982 /*IsDevAddr=*/true))
8983 continue;
8984 MapInfoGen(CGF, IE, VD, Components,
8985 /*IsDevAddr=*/true,
8986 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
8987 }
8988 }
8989
8990 for (const auto &Data : Info) {
8991 MapCombinedInfoTy CurInfo;
8992 const Decl *D = Data.first;
8993 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8994 // Group component lists by their AttachPtrExpr and process them in order
8995 // of increasing complexity (nullptr first, then simple expressions like
8996 // p, then more complex ones like p[0], etc.)
8997 //
8998 // This is similar to how generateInfoForCaptureFromClauseInfo handles
8999 // grouping for target constructs.
9000 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9001
9002 // First, collect all MapData entries with their attach-ptr exprs.
9003 for (const auto &M : Data.second) {
9004 for (const MapInfo &L : M) {
9005 assert(!L.Components.empty() &&
9006 "Not expecting declaration with no component lists.");
9007
9008 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9009 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9010 }
9011 }
9012
9013 // Next, sort by increasing order of their complexity.
9014 llvm::stable_sort(AttachPtrMapInfoPairs,
9015 [this](const auto &LHS, const auto &RHS) {
9016 return AttachPtrComparator(LHS.first, RHS.first);
9017 });
9018
9019 // And finally, process them all in order, grouping those with
9020 // equivalent attach-ptr exprs together.
9021 auto *It = AttachPtrMapInfoPairs.begin();
9022 while (It != AttachPtrMapInfoPairs.end()) {
9023 const Expr *AttachPtrExpr = It->first;
9024
9025 SmallVector<MapInfo, 8> GroupLists;
9026 while (It != AttachPtrMapInfoPairs.end() &&
9027 (It->first == AttachPtrExpr ||
9028 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9029 GroupLists.push_back(It->second);
9030 ++It;
9031 }
9032 assert(!GroupLists.empty() && "GroupLists should not be empty");
9033
9034 StructRangeInfoTy PartialStruct;
9035 AttachInfoTy AttachInfo;
9036 MapCombinedInfoTy GroupCurInfo;
9037 // Current group's struct base information:
9038 MapCombinedInfoTy GroupStructBaseCurInfo;
9039 for (const MapInfo &L : GroupLists) {
9040 // Remember the current base pointer index.
9041 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9042 unsigned StructBasePointersIdx =
9043 GroupStructBaseCurInfo.BasePointers.size();
9044
9045 GroupCurInfo.NonContigInfo.IsNonContiguous =
9046 L.Components.back().isNonContiguous();
9047 generateInfoForComponentList(
9048 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9049 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9050 /*IsFirstComponentList=*/false, L.IsImplicit,
9051 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9052 L.VarRef, /*OverlappedElements*/ {});
9053
9054 // If this entry relates to a device pointer, set the relevant
9055 // declaration and add the 'return pointer' flag.
9056 if (L.ReturnDevicePointer) {
9057 // Check whether a value was added to either GroupCurInfo or
9058 // GroupStructBaseCurInfo and error if no value was added to either
9059 // of them:
9060 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9061 StructBasePointersIdx <
9062 GroupStructBaseCurInfo.BasePointers.size()) &&
9063 "Unexpected number of mapped base pointers.");
9064
9065 // Choose a base pointer index which is always valid:
9066 const ValueDecl *RelevantVD =
9067 L.Components.back().getAssociatedDeclaration();
9068 assert(RelevantVD &&
9069 "No relevant declaration related with device pointer??");
9070
9071 // If GroupStructBaseCurInfo has been updated this iteration then
9072 // work on the first new entry added to it i.e. make sure that when
9073 // multiple values are added to any of the lists, the first value
9074 // added is being modified by the assignments below (not the last
9075 // value added).
9076 if (StructBasePointersIdx <
9077 GroupStructBaseCurInfo.BasePointers.size()) {
9078 GroupStructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
9079 RelevantVD;
9080 GroupStructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
9081 L.ForDeviceAddr ? DeviceInfoTy::Address
9082 : DeviceInfoTy::Pointer;
9083 GroupStructBaseCurInfo.Types[StructBasePointersIdx] |=
9084 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9085 } else {
9086 GroupCurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
9087 GroupCurInfo.DevicePointers[CurrentBasePointersIdx] =
9088 L.ForDeviceAddr ? DeviceInfoTy::Address
9089 : DeviceInfoTy::Pointer;
9090 GroupCurInfo.Types[CurrentBasePointersIdx] |=
9091 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9092 }
9093 }
9094 }
9095
9096 // Unify entries in one list making sure the struct mapping precedes the
9097 // individual fields:
9098 MapCombinedInfoTy GroupUnionCurInfo;
9099 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9100 GroupUnionCurInfo.append(GroupCurInfo);
9101
9102 // If there is an entry in PartialStruct it means we have a struct with
9103 // individual members mapped. Emit an extra combined entry.
9104 if (PartialStruct.Base.isValid()) {
9105 // Prepend a synthetic dimension of length 1 to represent the
9106 // aggregated struct object. Using 1 (not 0, as 0 produced an
9107 // incorrect non-contiguous descriptor (DimSize==1), causing the
9108 // non-contiguous motion clause path to be skipped.) is important:
9109 // * It preserves the correct rank so targetDataUpdate() computes
9110 // DimSize == 2 for cases like strided array sections originating
9111 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9112 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9113 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9114 emitCombinedEntry(
9115 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9116 /*IsMapThis=*/!VD, OMPBuilder, VD,
9117 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9118 /*NotTargetParams=*/true);
9119 }
9120
9121 // Append this group's results to the overall CurInfo in the correct
9122 // order: combined-entry -> original-field-entries -> attach-entry
9123 CurInfo.append(GroupUnionCurInfo);
9124 if (AttachInfo.isValid())
9125 emitAttachEntry(CGF, CurInfo, AttachInfo);
9126 }
9127
9128 // We need to append the results of this capture to what we already have.
9129 CombinedInfo.append(CurInfo);
9130 }
9131 // Append data for use_device_ptr/addr clauses.
9132 CombinedInfo.append(UseDeviceDataCombinedInfo);
9133 }
9134
9135public:
9136 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9137 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9138 // Extract firstprivate clause information.
9139 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9140 for (const auto *D : C->varlist())
9141 FirstPrivateDecls.try_emplace(
9142 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9143 // Extract implicit firstprivates from uses_allocators clauses.
9144 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9145 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9146 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9147 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9148 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9149 /*Implicit=*/true);
9150 else if (const auto *VD = dyn_cast<VarDecl>(
9151 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9152 ->getDecl()))
9153 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9154 }
9155 }
9156 // Extract defaultmap clause information.
9157 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9158 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9159 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9160 // Extract device pointer clause information.
9161 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9162 for (auto L : C->component_lists())
9163 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9164 // Extract device addr clause information.
9165 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9166 for (auto L : C->component_lists())
9167 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9168 // Extract map information.
9169 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9170 if (C->getMapType() != OMPC_MAP_to)
9171 continue;
9172 for (auto L : C->component_lists()) {
9173 const ValueDecl *VD = std::get<0>(L);
9174 const auto *RD = VD ? VD->getType()
9175 .getCanonicalType()
9176 .getNonReferenceType()
9177 ->getAsCXXRecordDecl()
9178 : nullptr;
9179 if (RD && RD->isLambda())
9180 LambdasMap.try_emplace(std::get<0>(L), C);
9181 }
9182 }
9183
9184 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9185 for (auto L : C->component_lists()) {
9187 std::get<1>(L);
9188 if (!Components.empty())
9189 collectAttachPtrExprInfo(Components, CurDir);
9190 }
9191 };
9192
9193 // Populate the AttachPtrExprMap for all component lists from map-related
9194 // clauses.
9195 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9196 CollectAttachPtrExprsForClauseComponents(C);
9197 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9198 CollectAttachPtrExprsForClauseComponents(C);
9199 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9200 CollectAttachPtrExprsForClauseComponents(C);
9201 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9202 CollectAttachPtrExprsForClauseComponents(C);
9203 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9204 CollectAttachPtrExprsForClauseComponents(C);
9205 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9206 CollectAttachPtrExprsForClauseComponents(C);
9207 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9208 CollectAttachPtrExprsForClauseComponents(C);
9209 }
9210
9211 /// Constructor for the declare mapper directive.
9212 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9213 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9214
9215 /// Generate code for the combined entry if we have a partially mapped struct
9216 /// and take care of the mapping flags of the arguments corresponding to
9217 /// individual struct members.
9218 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9219 /// to the combined-entry's begin address, if emitted.
9220 /// \p PartialStruct contains attach base-pointer information.
9221 /// \returns The index of the combined entry if one was added, std::nullopt
9222 /// otherwise.
9223 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9224 MapFlagsArrayTy &CurTypes,
9225 const StructRangeInfoTy &PartialStruct,
9226 AttachInfoTy &AttachInfo, bool IsMapThis,
9227 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9228 unsigned OffsetForMemberOfFlag,
9229 bool NotTargetParams) const {
9230 if (CurTypes.size() == 1 &&
9231 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9232 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9233 !PartialStruct.IsArraySection)
9234 return;
9235 Address LBAddr = PartialStruct.LowestElem.second;
9236 Address HBAddr = PartialStruct.HighestElem.second;
9237 if (PartialStruct.HasCompleteRecord) {
9238 LBAddr = PartialStruct.LB;
9239 HBAddr = PartialStruct.LB;
9240 }
9241 CombinedInfo.Exprs.push_back(VD);
9242 // Base is the base of the struct
9243 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9244 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9245 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9246 // Pointer is the address of the lowest element
9247 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9248 const CXXMethodDecl *MD =
9249 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9250 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9251 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9252 // There should not be a mapper for a combined entry.
9253 if (HasBaseClass) {
9254 // OpenMP 5.2 148:21:
9255 // If the target construct is within a class non-static member function,
9256 // and a variable is an accessible data member of the object for which the
9257 // non-static data member function is invoked, the variable is treated as
9258 // if the this[:1] expression had appeared in a map clause with a map-type
9259 // of tofrom.
9260 // Emit this[:1]
9261 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9262 QualType Ty = MD->getFunctionObjectParameterType();
9263 llvm::Value *Size =
9264 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9265 /*isSigned=*/true);
9266 CombinedInfo.Sizes.push_back(Size);
9267 } else {
9268 CombinedInfo.Pointers.push_back(LB);
9269 // Size is (addr of {highest+1} element) - (addr of lowest element)
9270 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9271 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9272 HBAddr.getElementType(), HB, /*Idx0=*/1);
9273 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9274 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9275 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9276 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9277 /*isSigned=*/false);
9278 CombinedInfo.Sizes.push_back(Size);
9279 }
9280 CombinedInfo.Mappers.push_back(nullptr);
9281 // Map type is always TARGET_PARAM, if generate info for captures.
9282 CombinedInfo.Types.push_back(
9283 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9284 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9285 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9286 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9287 // If any element has the present modifier, then make sure the runtime
9288 // doesn't attempt to allocate the struct.
9289 if (CurTypes.end() !=
9290 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9291 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9292 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9293 }))
9294 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9295 // Remove TARGET_PARAM flag from the first element
9296 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9297 // If any element has the ompx_hold modifier, then make sure the runtime
9298 // uses the hold reference count for the struct as a whole so that it won't
9299 // be unmapped by an extra dynamic reference count decrement. Add it to all
9300 // elements as well so the runtime knows which reference count to check
9301 // when determining whether it's time for device-to-host transfers of
9302 // individual elements.
9303 if (CurTypes.end() !=
9304 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9305 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9306 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9307 })) {
9308 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9309 for (auto &M : CurTypes)
9310 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9311 }
9312
9313 // All other current entries will be MEMBER_OF the combined entry
9314 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9315 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9316 // to be handled by themselves, after all other maps).
9317 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9318 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9319 for (auto &M : CurTypes)
9320 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9321
9322 // When we are emitting a combined entry. If there were any pending
9323 // attachments to be done, we do them to the begin address of the combined
9324 // entry. Note that this means only one attachment per combined-entry will
9325 // be done. So, for instance, if we have:
9326 // S *ps;
9327 // ... map(ps->a, ps->b)
9328 // When we are emitting a combined entry. If AttachInfo is valid,
9329 // update the pointee address to point to the begin address of the combined
9330 // entry. This ensures that if we have multiple maps like:
9331 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9332 //
9333 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9334 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9335 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9336 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9337 if (AttachInfo.isValid())
9338 AttachInfo.AttachPteeAddr = LBAddr;
9339 }
9340
9341 /// Generate all the base pointers, section pointers, sizes, map types, and
9342 /// mappers for the extracted mappable expressions (all included in \a
9343 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9344 /// pair of the relevant declaration and index where it occurs is appended to
9345 /// the device pointers info array.
9346 void generateAllInfo(
9347 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9348 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9349 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9350 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9351 "Expect a executable directive");
9352 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9353 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9354 SkipVarSet);
9355 }
9356
9357 /// Generate all the base pointers, section pointers, sizes, map types, and
9358 /// mappers for the extracted map clauses of user-defined mapper (all included
9359 /// in \a CombinedInfo).
9360 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9361 llvm::OpenMPIRBuilder &OMPBuilder) const {
9362 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9363 "Expect a declare mapper directive");
9364 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9365 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9366 OMPBuilder);
9367 }
9368
9369 /// Emit capture info for lambdas for variables captured by reference.
9370 void generateInfoForLambdaCaptures(
9371 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9372 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9373 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9374 const auto *RD = VDType->getAsCXXRecordDecl();
9375 if (!RD || !RD->isLambda())
9376 return;
9377 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9378 CGF.getContext().getDeclAlign(VD));
9379 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9380 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9381 FieldDecl *ThisCapture = nullptr;
9382 RD->getCaptureFields(Captures, ThisCapture);
9383 if (ThisCapture) {
9384 LValue ThisLVal =
9385 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9386 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9387 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9388 VDLVal.getPointer(CGF));
9389 CombinedInfo.Exprs.push_back(VD);
9390 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9391 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9392 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9393 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9394 CombinedInfo.Sizes.push_back(
9395 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9396 CGF.Int64Ty, /*isSigned=*/true));
9397 CombinedInfo.Types.push_back(
9398 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9399 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9400 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9401 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9402 CombinedInfo.Mappers.push_back(nullptr);
9403 }
9404 for (const LambdaCapture &LC : RD->captures()) {
9405 if (!LC.capturesVariable())
9406 continue;
9407 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9408 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9409 continue;
9410 auto It = Captures.find(VD);
9411 assert(It != Captures.end() && "Found lambda capture without field.");
9412 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9413 if (LC.getCaptureKind() == LCK_ByRef) {
9414 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9415 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9416 VDLVal.getPointer(CGF));
9417 CombinedInfo.Exprs.push_back(VD);
9418 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9419 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9420 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9421 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9422 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9423 CGF.getTypeSize(
9425 CGF.Int64Ty, /*isSigned=*/true));
9426 } else {
9427 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9428 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9429 VDLVal.getPointer(CGF));
9430 CombinedInfo.Exprs.push_back(VD);
9431 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9432 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9433 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9434 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9435 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9436 }
9437 CombinedInfo.Types.push_back(
9438 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9439 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9440 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9441 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9442 CombinedInfo.Mappers.push_back(nullptr);
9443 }
9444 }
9445
9446 /// Set correct indices for lambdas captures.
9447 void adjustMemberOfForLambdaCaptures(
9448 llvm::OpenMPIRBuilder &OMPBuilder,
9449 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9450 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9451 MapFlagsArrayTy &Types) const {
9452 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9453 // Set correct member_of idx for all implicit lambda captures.
9454 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9455 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9456 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9457 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9458 continue;
9459 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9460 assert(BasePtr && "Unable to find base lambda address.");
9461 int TgtIdx = -1;
9462 for (unsigned J = I; J > 0; --J) {
9463 unsigned Idx = J - 1;
9464 if (Pointers[Idx] != BasePtr)
9465 continue;
9466 TgtIdx = Idx;
9467 break;
9468 }
9469 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9470 // All other current entries will be MEMBER_OF the combined entry
9471 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9472 // 0xFFFF in the MEMBER_OF field).
9473 OpenMPOffloadMappingFlags MemberOfFlag =
9474 OMPBuilder.getMemberOfFlag(TgtIdx);
9475 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9476 }
9477 }
9478
9479 /// Populate component lists for non-lambda captured variables from map,
9480 /// is_device_ptr and has_device_addr clause info.
9481 void populateComponentListsForNonLambdaCaptureFromClauses(
9482 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9483 SmallVectorImpl<
9484 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9485 &StorageForImplicitlyAddedComponentLists) const {
9486 if (VD && LambdasMap.count(VD))
9487 return;
9488
9489 // For member fields list in is_device_ptr, store it in
9490 // DeclComponentLists for generating components info.
9492 auto It = DevPointersMap.find(VD);
9493 if (It != DevPointersMap.end())
9494 for (const auto &MCL : It->second)
9495 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9496 /*IsImpicit = */ true, nullptr,
9497 nullptr);
9498 auto I = HasDevAddrsMap.find(VD);
9499 if (I != HasDevAddrsMap.end())
9500 for (const auto &MCL : I->second)
9501 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9502 /*IsImpicit = */ true, nullptr,
9503 nullptr);
9504 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9505 "Expect a executable directive");
9506 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9507 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9508 const auto *EI = C->getVarRefs().begin();
9509 for (const auto L : C->decl_component_lists(VD)) {
9510 const ValueDecl *VDecl, *Mapper;
9511 // The Expression is not correct if the mapping is implicit
9512 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9514 std::tie(VDecl, Components, Mapper) = L;
9515 assert(VDecl == VD && "We got information for the wrong declaration??");
9516 assert(!Components.empty() &&
9517 "Not expecting declaration with no component lists.");
9518 DeclComponentLists.emplace_back(Components, C->getMapType(),
9519 C->getMapTypeModifiers(),
9520 C->isImplicit(), Mapper, E);
9521 ++EI;
9522 }
9523 }
9524
9525 // For the target construct, if there's a map with a base-pointer that's
9526 // a member of an implicitly captured struct, of the current class,
9527 // we need to emit an implicit map on the pointer.
9528 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9529 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9530 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9531
9532 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9533 const MapData &RHS) {
9534 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9535 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9536 bool HasPresent =
9537 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9538 bool HasAllocs = MapType == OMPC_MAP_alloc;
9539 MapModifiers = std::get<2>(RHS);
9540 MapType = std::get<1>(LHS);
9541 bool HasPresentR =
9542 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9543 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9544 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9545 });
9546 }
9547
9548 /// On a target construct, if there's an implicit map on a struct, or that of
9549 /// this[:], and an explicit map with a member of that struct/class as the
9550 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9551 /// to make sure we don't map the full struct/class. For example:
9552 ///
9553 /// \code
9554 /// struct S {
9555 /// int dummy[10000];
9556 /// int *p;
9557 /// void f1() {
9558 /// #pragma omp target map(p[0:1])
9559 /// (void)this;
9560 /// }
9561 /// }; S s;
9562 ///
9563 /// void f2() {
9564 /// #pragma omp target map(s.p[0:10])
9565 /// (void)s;
9566 /// }
9567 /// \endcode
9568 ///
9569 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9570 //
9571 // OpenMP 6.0: 7.9.6 map clause, pg 285
9572 // If a list item with an implicitly determined data-mapping attribute does
9573 // not have any corresponding storage in the device data environment prior to
9574 // a task encountering the construct associated with the map clause, and one
9575 // or more contiguous parts of the original storage are either list items or
9576 // base pointers to list items that are explicitly mapped on the construct,
9577 // only those parts of the original storage will have corresponding storage in
9578 // the device data environment as a result of the map clauses on the
9579 // construct.
9580 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9581 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9582 SmallVectorImpl<
9583 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9584 &ComponentVectorStorage) const {
9585 bool IsThisCapture = CapturedVD == nullptr;
9586
9587 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9589 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9590 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9591 if (!AttachPtrExpr)
9592 continue;
9593
9594 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9595 if (!ME)
9596 continue;
9597
9598 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9599
9600 // If we are handling a "this" capture, then we are looking for
9601 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9602 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9603 continue;
9604
9605 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9606 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9607 continue;
9608
9609 // For non-this captures, we are looking for attach-ptrs of form
9610 // `s.p`.
9611 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9612 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9613 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9614 continue;
9615
9616 // Check if we have an existing map on either:
9617 // this[:], s, this->p, or s.p, in which case, we don't need to add
9618 // an implicit one for the attach-ptr s.p/this->p.
9619 bool FoundExistingMap = false;
9620 for (const MapData &ExistingL : DeclComponentLists) {
9622 ExistingComponents = std::get<0>(ExistingL);
9623
9624 if (ExistingComponents.empty())
9625 continue;
9626
9627 // First check if we have a map like map(this->p) or map(s.p).
9628 const auto &FirstComponent = ExistingComponents.front();
9629 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9630
9631 if (!FirstExpr)
9632 continue;
9633
9634 // First check if we have a map like map(this->p) or map(s.p).
9635 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9636 FoundExistingMap = true;
9637 break;
9638 }
9639
9640 // Check if we have a map like this[0:1]
9641 if (IsThisCapture) {
9642 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9643 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9644 FoundExistingMap = true;
9645 break;
9646 }
9647 }
9648 continue;
9649 }
9650
9651 // When the attach-ptr is something like `s.p`, check if
9652 // `s` itself is mapped explicitly.
9653 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9654 if (DRE->getDecl() == CapturedVD) {
9655 FoundExistingMap = true;
9656 break;
9657 }
9658 }
9659 }
9660
9661 if (FoundExistingMap)
9662 continue;
9663
9664 // If no base map is found, we need to create an implicit map for the
9665 // attach-pointer expr.
9666
9667 ComponentVectorStorage.emplace_back();
9668 auto &AttachPtrComponents = ComponentVectorStorage.back();
9669
9671 bool SeenAttachPtrComponent = false;
9672 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9673 // components from the component-list which has `s.p/this->p`
9674 // as the attach-ptr, starting from the component which matches
9675 // `s.p/this->p`. This way, we'll have component-lists of
9676 // `s.p` -> `s`, and `this->p` -> `this`.
9677 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9678 const auto &Component = ComponentsWithAttachPtr[i];
9679 const Expr *ComponentExpr = Component.getAssociatedExpression();
9680
9681 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9682 continue;
9683 SeenAttachPtrComponent = true;
9684
9685 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9686 Component.getAssociatedDeclaration(),
9687 Component.isNonContiguous());
9688 }
9689 assert(!AttachPtrComponents.empty() &&
9690 "Could not populate component-lists for mapping attach-ptr");
9691
9692 DeclComponentLists.emplace_back(
9693 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9694 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9695 }
9696 }
9697
9698 /// For a capture that has an associated clause, generate the base pointers,
9699 /// section pointers, sizes, map types, and mappers (all included in
9700 /// \a CurCaptureVarInfo).
9701 void generateInfoForCaptureFromClauseInfo(
9702 const MapDataArrayTy &DeclComponentListsFromClauses,
9703 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9704 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9705 unsigned OffsetForMemberOfFlag) const {
9706 assert(!Cap->capturesVariableArrayType() &&
9707 "Not expecting to generate map info for a variable array type!");
9708
9709 // We need to know when we generating information for the first component
9710 const ValueDecl *VD = Cap->capturesThis()
9711 ? nullptr
9712 : Cap->getCapturedVar()->getCanonicalDecl();
9713
9714 // for map(to: lambda): skip here, processing it in
9715 // generateDefaultMapInfo
9716 if (LambdasMap.count(VD))
9717 return;
9718
9719 // If this declaration appears in a is_device_ptr clause we just have to
9720 // pass the pointer by value. If it is a reference to a declaration, we just
9721 // pass its value.
9722 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9723 CurCaptureVarInfo.Exprs.push_back(VD);
9724 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9725 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9726 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9727 CurCaptureVarInfo.Pointers.push_back(Arg);
9728 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9729 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9730 /*isSigned=*/true));
9731 CurCaptureVarInfo.Types.push_back(
9732 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9733 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9734 CurCaptureVarInfo.Mappers.push_back(nullptr);
9735 return;
9736 }
9737
9738 auto GenerateInfoForComponentLists =
9739 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9740 bool IsEligibleForTargetParamFlag) {
9741 MapCombinedInfoTy CurInfoForComponentLists;
9742 StructRangeInfoTy PartialStruct;
9743 AttachInfoTy AttachInfo;
9744
9745 if (DeclComponentListsFromClauses.empty())
9746 return;
9747
9748 generateInfoForCaptureFromComponentLists(
9749 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9750 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9751
9752 // If there is an entry in PartialStruct it means we have a
9753 // struct with individual members mapped. Emit an extra combined
9754 // entry.
9755 if (PartialStruct.Base.isValid()) {
9756 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9757 emitCombinedEntry(
9758 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9759 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9760 /*VD=*/nullptr, OffsetForMemberOfFlag,
9761 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9762 }
9763
9764 // We do the appends to get the entries in the following order:
9765 // combined-entry -> individual-field-entries -> attach-entry,
9766 CurCaptureVarInfo.append(CurInfoForComponentLists);
9767 if (AttachInfo.isValid())
9768 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9769 };
9770
9771 // Group component lists by their AttachPtrExpr and process them in order
9772 // of increasing complexity (nullptr first, then simple expressions like p,
9773 // then more complex ones like p[0], etc.)
9774 //
9775 // This ensure that we:
9776 // * handle maps that can contribute towards setting the kernel argument,
9777 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9778 // * allocate a single contiguous storage for all exprs with the same
9779 // captured var and having the same attach-ptr.
9780 //
9781 // Example: The map clauses below should be handled grouped together based
9782 // on their attachable-base-pointers:
9783 // map-clause | attachable-base-pointer
9784 // --------------------------+------------------------
9785 // map(p, ps) | nullptr
9786 // map(p[0]) | p
9787 // map(p[0]->b, p[0]->c) | p[0]
9788 // map(ps->d, ps->e, ps->pt) | ps
9789 // map(ps->pt->d, ps->pt->e) | ps->pt
9790
9791 // First, collect all MapData entries with their attach-ptr exprs.
9792 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9793
9794 for (const MapData &L : DeclComponentListsFromClauses) {
9796 std::get<0>(L);
9797 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9798 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9799 }
9800
9801 // Next, sort by increasing order of their complexity.
9802 llvm::stable_sort(AttachPtrMapDataPairs,
9803 [this](const auto &LHS, const auto &RHS) {
9804 return AttachPtrComparator(LHS.first, RHS.first);
9805 });
9806
9807 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9808 bool IsFirstGroup = true;
9809
9810 // And finally, process them all in order, grouping those with
9811 // equivalent attach-ptr exprs together.
9812 auto *It = AttachPtrMapDataPairs.begin();
9813 while (It != AttachPtrMapDataPairs.end()) {
9814 const Expr *AttachPtrExpr = It->first;
9815
9816 MapDataArrayTy GroupLists;
9817 while (It != AttachPtrMapDataPairs.end() &&
9818 (It->first == AttachPtrExpr ||
9819 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9820 GroupLists.push_back(It->second);
9821 ++It;
9822 }
9823 assert(!GroupLists.empty() && "GroupLists should not be empty");
9824
9825 // Determine if this group of component-lists is eligible for TARGET_PARAM
9826 // flag. Only the first group processed should be eligible, and only if no
9827 // default mapping was done.
9828 bool IsEligibleForTargetParamFlag =
9829 IsFirstGroup && NoDefaultMappingDoneForVD;
9830
9831 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9832 IsFirstGroup = false;
9833 }
9834 }
9835
9836 /// Generate the base pointers, section pointers, sizes, map types, and
9837 /// mappers associated to \a DeclComponentLists for a given capture
9838 /// \a VD (all included in \a CurComponentListInfo).
9839 void generateInfoForCaptureFromComponentLists(
9840 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9841 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9842 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
9843 // Find overlapping elements (including the offset from the base element).
9844 llvm::SmallDenseMap<
9845 const MapData *,
9846 llvm::SmallVector<
9848 4>
9849 OverlappedData;
9850 size_t Count = 0;
9851 for (const MapData &L : DeclComponentLists) {
9853 OpenMPMapClauseKind MapType;
9854 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9855 bool IsImplicit;
9856 const ValueDecl *Mapper;
9857 const Expr *VarRef;
9858 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9859 L;
9860 ++Count;
9861 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9863 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9864 VarRef) = L1;
9865 auto CI = Components.rbegin();
9866 auto CE = Components.rend();
9867 auto SI = Components1.rbegin();
9868 auto SE = Components1.rend();
9869 for (; CI != CE && SI != SE; ++CI, ++SI) {
9870 if (CI->getAssociatedExpression()->getStmtClass() !=
9871 SI->getAssociatedExpression()->getStmtClass())
9872 break;
9873 // Are we dealing with different variables/fields?
9874 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9875 break;
9876 }
9877 // Found overlapping if, at least for one component, reached the head
9878 // of the components list.
9879 if (CI == CE || SI == SE) {
9880 // Ignore it if it is the same component.
9881 if (CI == CE && SI == SE)
9882 continue;
9883 const auto It = (SI == SE) ? CI : SI;
9884 // If one component is a pointer and another one is a kind of
9885 // dereference of this pointer (array subscript, section, dereference,
9886 // etc.), it is not an overlapping.
9887 // Same, if one component is a base and another component is a
9888 // dereferenced pointer memberexpr with the same base.
9889 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9890 (std::prev(It)->getAssociatedDeclaration() &&
9891 std::prev(It)
9892 ->getAssociatedDeclaration()
9893 ->getType()
9894 ->isPointerType()) ||
9895 (It->getAssociatedDeclaration() &&
9896 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9897 std::next(It) != CE && std::next(It) != SE))
9898 continue;
9899 const MapData &BaseData = CI == CE ? L : L1;
9901 SI == SE ? Components : Components1;
9902 OverlappedData[&BaseData].push_back(SubData);
9903 }
9904 }
9905 }
9906 // Sort the overlapped elements for each item.
9907 llvm::SmallVector<const FieldDecl *, 4> Layout;
9908 if (!OverlappedData.empty()) {
9909 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9910 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9911 while (BaseType != OrigType) {
9912 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9913 OrigType = BaseType->getPointeeOrArrayElementType();
9914 }
9915
9916 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9917 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9918 else {
9919 const auto *RD = BaseType->getAsRecordDecl();
9920 Layout.append(RD->field_begin(), RD->field_end());
9921 }
9922 }
9923 for (auto &Pair : OverlappedData) {
9924 llvm::stable_sort(
9925 Pair.getSecond(),
9926 [&Layout](
9929 Second) {
9930 auto CI = First.rbegin();
9931 auto CE = First.rend();
9932 auto SI = Second.rbegin();
9933 auto SE = Second.rend();
9934 for (; CI != CE && SI != SE; ++CI, ++SI) {
9935 if (CI->getAssociatedExpression()->getStmtClass() !=
9936 SI->getAssociatedExpression()->getStmtClass())
9937 break;
9938 // Are we dealing with different variables/fields?
9939 if (CI->getAssociatedDeclaration() !=
9940 SI->getAssociatedDeclaration())
9941 break;
9942 }
9943
9944 // Lists contain the same elements.
9945 if (CI == CE && SI == SE)
9946 return false;
9947
9948 // List with less elements is less than list with more elements.
9949 if (CI == CE || SI == SE)
9950 return CI == CE;
9951
9952 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9953 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9954 if (FD1->getParent() == FD2->getParent())
9955 return FD1->getFieldIndex() < FD2->getFieldIndex();
9956 const auto *It =
9957 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9958 return FD == FD1 || FD == FD2;
9959 });
9960 return *It == FD1;
9961 });
9962 }
9963
9964 // Associated with a capture, because the mapping flags depend on it.
9965 // Go through all of the elements with the overlapped elements.
9966 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9967 MapCombinedInfoTy StructBaseCombinedInfo;
9968 for (const auto &Pair : OverlappedData) {
9969 const MapData &L = *Pair.getFirst();
9971 OpenMPMapClauseKind MapType;
9972 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9973 bool IsImplicit;
9974 const ValueDecl *Mapper;
9975 const Expr *VarRef;
9976 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9977 L;
9978 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9979 OverlappedComponents = Pair.getSecond();
9980 generateInfoForComponentList(
9981 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9982 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
9983 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9984 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9985 AddTargetParamFlag = false;
9986 }
9987 // Go through other elements without overlapped elements.
9988 for (const MapData &L : DeclComponentLists) {
9990 OpenMPMapClauseKind MapType;
9991 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9992 bool IsImplicit;
9993 const ValueDecl *Mapper;
9994 const Expr *VarRef;
9995 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9996 L;
9997 auto It = OverlappedData.find(&L);
9998 if (It == OverlappedData.end())
9999 generateInfoForComponentList(
10000 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10001 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10002 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10003 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10004 /*OverlappedElements*/ {});
10005 AddTargetParamFlag = false;
10006 }
10007 }
10008
10009 /// Check if a variable should be treated as firstprivate due to explicit
10010 /// firstprivate clause or defaultmap(firstprivate:...).
10011 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10012 // Check explicit firstprivate clauses (not implicit from defaultmap)
10013 auto I = FirstPrivateDecls.find(VD);
10014 if (I != FirstPrivateDecls.end() && !I->getSecond())
10015 return true; // Explicit firstprivate only
10016
10017 // Check defaultmap(firstprivate:scalar) for scalar types
10018 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10019 if (Type->isScalarType())
10020 return true;
10021 }
10022
10023 // Check defaultmap(firstprivate:pointer) for pointer types
10024 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10025 if (Type->isAnyPointerType())
10026 return true;
10027 }
10028
10029 // Check defaultmap(firstprivate:aggregate) for aggregate types
10030 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10031 if (Type->isAggregateType())
10032 return true;
10033 }
10034
10035 // Check defaultmap(firstprivate:all) for all types
10036 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10037 }
10038
10039 /// Generate the default map information for a given capture \a CI,
10040 /// record field declaration \a RI and captured value \a CV.
10041 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10042 const FieldDecl &RI, llvm::Value *CV,
10043 MapCombinedInfoTy &CombinedInfo) const {
10044 bool IsImplicit = true;
10045 // Do the default mapping.
10046 if (CI.capturesThis()) {
10047 CombinedInfo.Exprs.push_back(nullptr);
10048 CombinedInfo.BasePointers.push_back(CV);
10049 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10050 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10051 CombinedInfo.Pointers.push_back(CV);
10052 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10053 CombinedInfo.Sizes.push_back(
10054 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10055 CGF.Int64Ty, /*isSigned=*/true));
10056 // Default map type.
10057 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10058 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10059 } else if (CI.capturesVariableByCopy()) {
10060 const VarDecl *VD = CI.getCapturedVar();
10061 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10062 CombinedInfo.BasePointers.push_back(CV);
10063 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10064 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10065 CombinedInfo.Pointers.push_back(CV);
10066 bool IsFirstprivate =
10067 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10068
10069 if (!RI.getType()->isAnyPointerType()) {
10070 // We have to signal to the runtime captures passed by value that are
10071 // not pointers.
10072 CombinedInfo.Types.push_back(
10073 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10074 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10075 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10076 } else if (IsFirstprivate) {
10077 // Firstprivate pointers should be passed by value (as literals)
10078 // without performing a present table lookup at runtime.
10079 CombinedInfo.Types.push_back(
10080 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10081 // Use zero size for pointer literals (just passing the pointer value)
10082 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10083 } else {
10084 // Pointers are implicitly mapped with a zero size and no flags
10085 // (other than first map that is added for all implicit maps).
10086 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10087 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10088 }
10089 auto I = FirstPrivateDecls.find(VD);
10090 if (I != FirstPrivateDecls.end())
10091 IsImplicit = I->getSecond();
10092 } else {
10093 assert(CI.capturesVariable() && "Expected captured reference.");
10094 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10095 QualType ElementType = PtrTy->getPointeeType();
10096 const VarDecl *VD = CI.getCapturedVar();
10097 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10098 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10099 CombinedInfo.BasePointers.push_back(CV);
10100 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10101 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10102
10103 // For firstprivate pointers, pass by value instead of dereferencing
10104 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10105 // Treat as a literal value (pass the pointer value itself)
10106 CombinedInfo.Pointers.push_back(CV);
10107 // Use zero size for pointer literals
10108 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10109 CombinedInfo.Types.push_back(
10110 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10111 } else {
10112 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10113 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10114 // The default map type for a scalar/complex type is 'to' because by
10115 // default the value doesn't have to be retrieved. For an aggregate
10116 // type, the default is 'tofrom'.
10117 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10118 CombinedInfo.Pointers.push_back(CV);
10119 }
10120 auto I = FirstPrivateDecls.find(VD);
10121 if (I != FirstPrivateDecls.end())
10122 IsImplicit = I->getSecond();
10123 }
10124 // Every default map produces a single argument which is a target parameter.
10125 CombinedInfo.Types.back() |=
10126 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10127
10128 // Add flag stating this is an implicit map.
10129 if (IsImplicit)
10130 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10131
10132 // No user-defined mapper for default mapping.
10133 CombinedInfo.Mappers.push_back(nullptr);
10134 }
10135};
10136} // anonymous namespace
10137
10138// Try to extract the base declaration from a `this->x` expression if possible.
10140 if (!E)
10141 return nullptr;
10142
10143 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10144 if (const MemberExpr *ME =
10145 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10146 return ME->getMemberDecl();
10147 return nullptr;
10148}
10149
10150/// Emit a string constant containing the names of the values mapped to the
10151/// offloading runtime library.
10152static llvm::Constant *
10153emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10154 MappableExprsHandler::MappingExprInfo &MapExprs) {
10155
10156 uint32_t SrcLocStrSize;
10157 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10158 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10159
10160 SourceLocation Loc;
10161 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10162 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10163 Loc = VD->getLocation();
10164 else
10165 Loc = MapExprs.getMapExpr()->getExprLoc();
10166 } else {
10167 Loc = MapExprs.getMapDecl()->getLocation();
10168 }
10169
10170 std::string ExprName;
10171 if (MapExprs.getMapExpr()) {
10173 llvm::raw_string_ostream OS(ExprName);
10174 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10175 } else {
10176 ExprName = MapExprs.getMapDecl()->getNameAsString();
10177 }
10178
10179 std::string FileName;
10181 if (auto *DbgInfo = CGF.getDebugInfo())
10182 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10183 else
10184 FileName = PLoc.getFilename();
10185 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10186 PLoc.getColumn(), SrcLocStrSize);
10187}
10188/// Emit the arrays used to pass the captures and map information to the
10189/// offloading runtime library. If there is no map or capture information,
10190/// return nullptr by reference.
10192 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10193 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10194 bool IsNonContiguous = false, bool ForEndCall = false) {
10195 CodeGenModule &CGM = CGF.CGM;
10196
10197 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10198 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10199 CGF.AllocaInsertPt->getIterator());
10200 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10201 CGF.Builder.GetInsertPoint());
10202
10203 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10204 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10205 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10206 }
10207 };
10208
10209 auto CustomMapperCB = [&](unsigned int I) {
10210 llvm::Function *MFunc = nullptr;
10211 if (CombinedInfo.Mappers[I]) {
10212 Info.HasMapper = true;
10214 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10215 }
10216 return MFunc;
10217 };
10218 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10219 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10220 IsNonContiguous, ForEndCall, DeviceAddrCB));
10221}
10222
10223/// Check for inner distribute directive.
10224static const OMPExecutableDirective *
10226 const auto *CS = D.getInnermostCapturedStmt();
10227 const auto *Body =
10228 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10229 const Stmt *ChildStmt =
10231
10232 if (const auto *NestedDir =
10233 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10234 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10235 switch (D.getDirectiveKind()) {
10236 case OMPD_target:
10237 // For now, treat 'target' with nested 'teams loop' as if it's
10238 // distributed (target teams distribute).
10239 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10240 return NestedDir;
10241 if (DKind == OMPD_teams) {
10242 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10243 /*IgnoreCaptured=*/true);
10244 if (!Body)
10245 return nullptr;
10246 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10247 if (const auto *NND =
10248 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10249 DKind = NND->getDirectiveKind();
10250 if (isOpenMPDistributeDirective(DKind))
10251 return NND;
10252 }
10253 }
10254 return nullptr;
10255 case OMPD_target_teams:
10256 if (isOpenMPDistributeDirective(DKind))
10257 return NestedDir;
10258 return nullptr;
10259 case OMPD_target_parallel:
10260 case OMPD_target_simd:
10261 case OMPD_target_parallel_for:
10262 case OMPD_target_parallel_for_simd:
10263 return nullptr;
10264 case OMPD_target_teams_distribute:
10265 case OMPD_target_teams_distribute_simd:
10266 case OMPD_target_teams_distribute_parallel_for:
10267 case OMPD_target_teams_distribute_parallel_for_simd:
10268 case OMPD_parallel:
10269 case OMPD_for:
10270 case OMPD_parallel_for:
10271 case OMPD_parallel_master:
10272 case OMPD_parallel_sections:
10273 case OMPD_for_simd:
10274 case OMPD_parallel_for_simd:
10275 case OMPD_cancel:
10276 case OMPD_cancellation_point:
10277 case OMPD_ordered:
10278 case OMPD_threadprivate:
10279 case OMPD_allocate:
10280 case OMPD_task:
10281 case OMPD_simd:
10282 case OMPD_tile:
10283 case OMPD_unroll:
10284 case OMPD_sections:
10285 case OMPD_section:
10286 case OMPD_single:
10287 case OMPD_master:
10288 case OMPD_critical:
10289 case OMPD_taskyield:
10290 case OMPD_barrier:
10291 case OMPD_taskwait:
10292 case OMPD_taskgroup:
10293 case OMPD_atomic:
10294 case OMPD_flush:
10295 case OMPD_depobj:
10296 case OMPD_scan:
10297 case OMPD_teams:
10298 case OMPD_target_data:
10299 case OMPD_target_exit_data:
10300 case OMPD_target_enter_data:
10301 case OMPD_distribute:
10302 case OMPD_distribute_simd:
10303 case OMPD_distribute_parallel_for:
10304 case OMPD_distribute_parallel_for_simd:
10305 case OMPD_teams_distribute:
10306 case OMPD_teams_distribute_simd:
10307 case OMPD_teams_distribute_parallel_for:
10308 case OMPD_teams_distribute_parallel_for_simd:
10309 case OMPD_target_update:
10310 case OMPD_declare_simd:
10311 case OMPD_declare_variant:
10312 case OMPD_begin_declare_variant:
10313 case OMPD_end_declare_variant:
10314 case OMPD_declare_target:
10315 case OMPD_end_declare_target:
10316 case OMPD_declare_reduction:
10317 case OMPD_declare_mapper:
10318 case OMPD_taskloop:
10319 case OMPD_taskloop_simd:
10320 case OMPD_master_taskloop:
10321 case OMPD_master_taskloop_simd:
10322 case OMPD_parallel_master_taskloop:
10323 case OMPD_parallel_master_taskloop_simd:
10324 case OMPD_requires:
10325 case OMPD_metadirective:
10326 case OMPD_unknown:
10327 default:
10328 llvm_unreachable("Unexpected directive.");
10329 }
10330 }
10331
10332 return nullptr;
10333}
10334
10335/// Emit the user-defined mapper function. The code generation follows the
10336/// pattern in the example below.
10337/// \code
10338/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10339/// void *base, void *begin,
10340/// int64_t size, int64_t type,
10341/// void *name = nullptr) {
10342/// // Allocate space for an array section first or add a base/begin for
10343/// // pointer dereference.
10344/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
10345/// !maptype.IsDelete)
10346/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10347/// size*sizeof(Ty), clearToFromMember(type));
10348/// // Map members.
10349/// for (unsigned i = 0; i < size; i++) {
10350/// // For each component specified by this mapper:
10351/// for (auto c : begin[i]->all_components) {
10352/// if (c.hasMapper())
10353/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10354/// c.arg_type, c.arg_name);
10355/// else
10356/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10357/// c.arg_begin, c.arg_size, c.arg_type,
10358/// c.arg_name);
10359/// }
10360/// }
10361/// // Delete the array section.
10362/// if (size > 1 && maptype.IsDelete)
10363/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10364/// size*sizeof(Ty), clearToFromMember(type));
10365/// }
10366/// \endcode
10368 CodeGenFunction *CGF) {
10369 if (UDMMap.count(D) > 0)
10370 return;
10371 ASTContext &C = CGM.getContext();
10372 QualType Ty = D->getType();
10373 auto *MapperVarDecl =
10375 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10376 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10377
10378 CodeGenFunction MapperCGF(CGM);
10379 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10380 auto PrivatizeAndGenMapInfoCB =
10381 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10382 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10383 MapperCGF.Builder.restoreIP(CodeGenIP);
10384
10385 // Privatize the declared variable of mapper to be the current array
10386 // element.
10387 Address PtrCurrent(
10388 PtrPHI, ElemTy,
10389 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10390 .getAlignment()
10391 .alignmentOfArrayElement(ElementSize));
10393 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10394 (void)Scope.Privatize();
10395
10396 // Get map clause information.
10397 MappableExprsHandler MEHandler(*D, MapperCGF);
10398 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10399
10400 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10401 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10402 };
10403 if (CGM.getCodeGenOpts().getDebugInfo() !=
10404 llvm::codegenoptions::NoDebugInfo) {
10405 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10406 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10407 FillInfoMap);
10408 }
10409
10410 return CombinedInfo;
10411 };
10412
10413 auto CustomMapperCB = [&](unsigned I) {
10414 llvm::Function *MapperFunc = nullptr;
10415 if (CombinedInfo.Mappers[I]) {
10416 // Call the corresponding mapper function.
10418 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10419 assert(MapperFunc && "Expect a valid mapper function is available.");
10420 }
10421 return MapperFunc;
10422 };
10423
10424 SmallString<64> TyStr;
10425 llvm::raw_svector_ostream Out(TyStr);
10426 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10427 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10428
10429 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10430 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10431 UDMMap.try_emplace(D, NewFn);
10432 if (CGF)
10433 FunctionUDMMap[CGF->CurFn].push_back(D);
10434}
10435
10437 const OMPDeclareMapperDecl *D) {
10438 auto I = UDMMap.find(D);
10439 if (I != UDMMap.end())
10440 return I->second;
10442 return UDMMap.lookup(D);
10443}
10444
10447 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10448 const OMPLoopDirective &D)>
10449 SizeEmitter) {
10450 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10451 const OMPExecutableDirective *TD = &D;
10452 // Get nested teams distribute kind directive, if any. For now, treat
10453 // 'target_teams_loop' as if it's really a target_teams_distribute.
10454 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10455 Kind != OMPD_target_teams_loop)
10456 TD = getNestedDistributeDirective(CGM.getContext(), D);
10457 if (!TD)
10458 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10459
10460 const auto *LD = cast<OMPLoopDirective>(TD);
10461 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10462 return NumIterations;
10463 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10464}
10465
10466static void
10467emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10468 const OMPExecutableDirective &D,
10470 bool RequiresOuterTask, const CapturedStmt &CS,
10471 bool OffloadingMandatory, CodeGenFunction &CGF) {
10472 if (OffloadingMandatory) {
10473 CGF.Builder.CreateUnreachable();
10474 } else {
10475 if (RequiresOuterTask) {
10476 CapturedVars.clear();
10477 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10478 }
10479 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10480 CapturedVars);
10481 }
10482}
10483
10484static llvm::Value *emitDeviceID(
10485 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10486 CodeGenFunction &CGF) {
10487 // Emit device ID if any.
10488 llvm::Value *DeviceID;
10489 if (Device.getPointer()) {
10490 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10491 Device.getInt() == OMPC_DEVICE_device_num) &&
10492 "Expected device_num modifier.");
10493 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10494 DeviceID =
10495 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10496 } else {
10497 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10498 }
10499 return DeviceID;
10500}
10501
10502static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10504 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10505 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10506
10507 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10508 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10509 llvm::Value *DynGPVal =
10510 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10511 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10512 /*isSigned=*/false);
10513 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10514 switch (FallbackModifier) {
10515 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10516 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10517 break;
10518 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10519 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10520 break;
10521 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10523 // This is the default for dyn_groupprivate.
10524 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10525 break;
10526 default:
10527 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10528 }
10529 } else if (auto *OMPXDynCGClause =
10530 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10531 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10532 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10533 /*IgnoreResultAssign=*/true);
10534 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10535 /*isSigned=*/false);
10536 }
10537 return {DynGP, DynGPFallback};
10538}
10539
10541 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10542 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10543 llvm::OpenMPIRBuilder &OMPBuilder,
10544 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10545 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10546
10547 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10548 auto RI = CS.getCapturedRecordDecl()->field_begin();
10549 auto *CV = CapturedVars.begin();
10551 CE = CS.capture_end();
10552 CI != CE; ++CI, ++RI, ++CV) {
10553 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10554
10555 // VLA sizes are passed to the outlined region by copy and do not have map
10556 // information associated.
10557 if (CI->capturesVariableArrayType()) {
10558 CurInfo.Exprs.push_back(nullptr);
10559 CurInfo.BasePointers.push_back(*CV);
10560 CurInfo.DevicePtrDecls.push_back(nullptr);
10561 CurInfo.DevicePointers.push_back(
10562 MappableExprsHandler::DeviceInfoTy::None);
10563 CurInfo.Pointers.push_back(*CV);
10564 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10565 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10566 // Copy to the device as an argument. No need to retrieve it.
10567 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10568 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10569 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10570 CurInfo.Mappers.push_back(nullptr);
10571 } else {
10572 const ValueDecl *CapturedVD =
10573 CI->capturesThis() ? nullptr
10575 bool HasEntryWithCVAsAttachPtr = false;
10576 if (CapturedVD)
10577 HasEntryWithCVAsAttachPtr =
10578 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10579
10580 // Populate component lists for the captured variable from clauses.
10581 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10584 StorageForImplicitlyAddedComponentLists;
10585 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10586 CapturedVD, DeclComponentLists,
10587 StorageForImplicitlyAddedComponentLists);
10588
10589 // OpenMP 6.0, 15.8, target construct, restrictions:
10590 // * A list item in a map clause that is specified on a target construct
10591 // must have a base variable or base pointer.
10592 //
10593 // Map clauses on a target construct must either have a base pointer, or a
10594 // base-variable. So, if we don't have a base-pointer, that means that it
10595 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10596 // etc. In such cases, we do not need to handle default map generation
10597 // for `s`.
10598 bool HasEntryWithoutAttachPtr =
10599 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10601 Components = std::get<0>(MapData);
10602 return !MEHandler.getAttachPtrExpr(Components);
10603 });
10604
10605 // Generate default map info first if there's no direct map with CV as
10606 // the base-variable, or attach pointer.
10607 if (DeclComponentLists.empty() ||
10608 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10609 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10610
10611 // If we have any information in the map clause, we use it, otherwise we
10612 // just do a default mapping.
10613 MEHandler.generateInfoForCaptureFromClauseInfo(
10614 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10615 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10616
10617 if (!CI->capturesThis())
10618 MappedVarSet.insert(CI->getCapturedVar());
10619 else
10620 MappedVarSet.insert(nullptr);
10621
10622 // Generate correct mapping for variables captured by reference in
10623 // lambdas.
10624 if (CI->capturesVariable())
10625 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10626 CurInfo, LambdaPointers);
10627 }
10628 // We expect to have at least an element of information for this capture.
10629 assert(!CurInfo.BasePointers.empty() &&
10630 "Non-existing map pointer for capture!");
10631 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10632 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10633 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10634 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10635 "Inconsistent map information sizes!");
10636
10637 // We need to append the results of this capture to what we already have.
10638 CombinedInfo.append(CurInfo);
10639 }
10640 // Adjust MEMBER_OF flags for the lambdas captures.
10641 MEHandler.adjustMemberOfForLambdaCaptures(
10642 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10643 CombinedInfo.Pointers, CombinedInfo.Types);
10644}
10645static void
10646genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10647 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10648 llvm::OpenMPIRBuilder &OMPBuilder,
10649 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10650 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10651
10652 CodeGenModule &CGM = CGF.CGM;
10653 // Map any list items in a map clause that were not captures because they
10654 // weren't referenced within the construct.
10655 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10656
10657 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10658 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10659 };
10660 if (CGM.getCodeGenOpts().getDebugInfo() !=
10661 llvm::codegenoptions::NoDebugInfo) {
10662 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10663 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10664 FillInfoMap);
10665 }
10666}
10667
10669 const CapturedStmt &CS,
10671 llvm::OpenMPIRBuilder &OMPBuilder,
10672 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10673 // Get mappable expression information.
10674 MappableExprsHandler MEHandler(D, CGF);
10675 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10676
10677 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10678 MappedVarSet, CombinedInfo);
10679 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10680}
10681
10682template <typename ClauseTy>
10683static void
10685 const OMPExecutableDirective &D,
10687 const auto *C = D.getSingleClause<ClauseTy>();
10688 assert(!C->varlist_empty() &&
10689 "ompx_bare requires explicit num_teams and thread_limit");
10691 for (auto *E : C->varlist()) {
10692 llvm::Value *V = CGF.EmitScalarExpr(E);
10693 Values.push_back(
10694 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10695 }
10696}
10697
10699 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10700 const OMPExecutableDirective &D,
10701 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10702 const CapturedStmt &CS, bool OffloadingMandatory,
10703 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10704 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10705 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10706 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10707 const OMPLoopDirective &D)>
10708 SizeEmitter,
10709 CodeGenFunction &CGF, CodeGenModule &CGM) {
10710 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10711
10712 // Fill up the arrays with all the captured variables.
10713 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10715 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10716
10717 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10718 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10719
10720 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10721 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10722 CGF.VoidPtrTy, CGM.getPointerAlign());
10723 InputInfo.PointersArray =
10724 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10725 InputInfo.SizesArray =
10726 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10727 InputInfo.MappersArray =
10728 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10729 MapTypesArray = Info.RTArgs.MapTypesArray;
10730 MapNamesArray = Info.RTArgs.MapNamesArray;
10731
10732 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10733 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10734 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10735 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10736 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10737
10738 if (IsReverseOffloading) {
10739 // Reverse offloading is not supported, so just execute on the host.
10740 // FIXME: This fallback solution is incorrect since it ignores the
10741 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10742 // assert here and ensure SEMA emits an error.
10743 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10744 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10745 return;
10746 }
10747
10748 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10749 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10750
10751 llvm::Value *BasePointersArray =
10752 InputInfo.BasePointersArray.emitRawPointer(CGF);
10753 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10754 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10755 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10756
10757 auto &&EmitTargetCallFallbackCB =
10758 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10759 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10760 -> llvm::OpenMPIRBuilder::InsertPointTy {
10761 CGF.Builder.restoreIP(IP);
10762 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10763 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10764 return CGF.Builder.saveIP();
10765 };
10766
10767 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10770 if (IsBare) {
10773 NumThreads);
10774 } else {
10775 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10776 NumThreads.push_back(
10777 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10778 }
10779
10780 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10781 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10782 llvm::Value *NumIterations =
10783 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10784 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10785 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10786 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10787
10788 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10789 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10790 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10791
10792 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10793 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10794 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10795
10796 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10797 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10798 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10799 RTLoc, AllocaIP));
10800 CGF.Builder.restoreIP(AfterIP);
10801 };
10802
10803 if (RequiresOuterTask)
10804 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10805 else
10806 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10807}
10808
10809static void
10810emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10811 const OMPExecutableDirective &D,
10813 bool RequiresOuterTask, const CapturedStmt &CS,
10814 bool OffloadingMandatory, CodeGenFunction &CGF) {
10815
10816 // Notify that the host version must be executed.
10817 auto &&ElseGen =
10818 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10819 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10820 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10821 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10822 };
10823
10824 if (RequiresOuterTask) {
10826 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10827 } else {
10828 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10829 }
10830}
10831
10834 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10835 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10836 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10837 const OMPLoopDirective &D)>
10838 SizeEmitter) {
10839 if (!CGF.HaveInsertPoint())
10840 return;
10841
10842 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10843 CGM.getLangOpts().OpenMPOffloadMandatory;
10844
10845 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10846
10847 const bool RequiresOuterTask =
10848 D.hasClausesOfKind<OMPDependClause>() ||
10849 D.hasClausesOfKind<OMPNowaitClause>() ||
10850 D.hasClausesOfKind<OMPInReductionClause>() ||
10851 (CGM.getLangOpts().OpenMP >= 51 &&
10852 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10853 D.hasClausesOfKind<OMPThreadLimitClause>());
10855 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10856 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10857 PrePostActionTy &) {
10858 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10859 };
10860 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10861
10863 llvm::Value *MapTypesArray = nullptr;
10864 llvm::Value *MapNamesArray = nullptr;
10865
10866 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10867 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10868 OutlinedFnID, &InputInfo, &MapTypesArray,
10869 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10870 PrePostActionTy &) {
10871 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10872 RequiresOuterTask, CS, OffloadingMandatory,
10873 Device, OutlinedFnID, InputInfo, MapTypesArray,
10874 MapNamesArray, SizeEmitter, CGF, CGM);
10875 };
10876
10877 auto &&TargetElseGen =
10878 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10879 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10880 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10881 CS, OffloadingMandatory, CGF);
10882 };
10883
10884 // If we have a target function ID it means that we need to support
10885 // offloading, otherwise, just execute on the host. We need to execute on host
10886 // regardless of the conditional in the if clause if, e.g., the user do not
10887 // specify target triples.
10888 if (OutlinedFnID) {
10889 if (IfCond) {
10890 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10891 } else {
10892 RegionCodeGenTy ThenRCG(TargetThenGen);
10893 ThenRCG(CGF);
10894 }
10895 } else {
10896 RegionCodeGenTy ElseRCG(TargetElseGen);
10897 ElseRCG(CGF);
10898 }
10899}
10900
10902 StringRef ParentName) {
10903 if (!S)
10904 return;
10905
10906 // Codegen OMP target directives that offload compute to the device.
10907 bool RequiresDeviceCodegen =
10910 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10911
10912 if (RequiresDeviceCodegen) {
10913 const auto &E = *cast<OMPExecutableDirective>(S);
10914
10915 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10916 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10917
10918 // Is this a target region that should not be emitted as an entry point? If
10919 // so just signal we are done with this target region.
10920 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10921 return;
10922
10923 switch (E.getDirectiveKind()) {
10924 case OMPD_target:
10927 break;
10928 case OMPD_target_parallel:
10930 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10931 break;
10932 case OMPD_target_teams:
10934 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10935 break;
10936 case OMPD_target_teams_distribute:
10939 break;
10940 case OMPD_target_teams_distribute_simd:
10943 break;
10944 case OMPD_target_parallel_for:
10947 break;
10948 case OMPD_target_parallel_for_simd:
10951 break;
10952 case OMPD_target_simd:
10954 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10955 break;
10956 case OMPD_target_teams_distribute_parallel_for:
10958 CGM, ParentName,
10960 break;
10961 case OMPD_target_teams_distribute_parallel_for_simd:
10964 CGM, ParentName,
10966 break;
10967 case OMPD_target_teams_loop:
10970 break;
10971 case OMPD_target_parallel_loop:
10974 break;
10975 case OMPD_parallel:
10976 case OMPD_for:
10977 case OMPD_parallel_for:
10978 case OMPD_parallel_master:
10979 case OMPD_parallel_sections:
10980 case OMPD_for_simd:
10981 case OMPD_parallel_for_simd:
10982 case OMPD_cancel:
10983 case OMPD_cancellation_point:
10984 case OMPD_ordered:
10985 case OMPD_threadprivate:
10986 case OMPD_allocate:
10987 case OMPD_task:
10988 case OMPD_simd:
10989 case OMPD_tile:
10990 case OMPD_unroll:
10991 case OMPD_sections:
10992 case OMPD_section:
10993 case OMPD_single:
10994 case OMPD_master:
10995 case OMPD_critical:
10996 case OMPD_taskyield:
10997 case OMPD_barrier:
10998 case OMPD_taskwait:
10999 case OMPD_taskgroup:
11000 case OMPD_atomic:
11001 case OMPD_flush:
11002 case OMPD_depobj:
11003 case OMPD_scan:
11004 case OMPD_teams:
11005 case OMPD_target_data:
11006 case OMPD_target_exit_data:
11007 case OMPD_target_enter_data:
11008 case OMPD_distribute:
11009 case OMPD_distribute_simd:
11010 case OMPD_distribute_parallel_for:
11011 case OMPD_distribute_parallel_for_simd:
11012 case OMPD_teams_distribute:
11013 case OMPD_teams_distribute_simd:
11014 case OMPD_teams_distribute_parallel_for:
11015 case OMPD_teams_distribute_parallel_for_simd:
11016 case OMPD_target_update:
11017 case OMPD_declare_simd:
11018 case OMPD_declare_variant:
11019 case OMPD_begin_declare_variant:
11020 case OMPD_end_declare_variant:
11021 case OMPD_declare_target:
11022 case OMPD_end_declare_target:
11023 case OMPD_declare_reduction:
11024 case OMPD_declare_mapper:
11025 case OMPD_taskloop:
11026 case OMPD_taskloop_simd:
11027 case OMPD_master_taskloop:
11028 case OMPD_master_taskloop_simd:
11029 case OMPD_parallel_master_taskloop:
11030 case OMPD_parallel_master_taskloop_simd:
11031 case OMPD_requires:
11032 case OMPD_metadirective:
11033 case OMPD_unknown:
11034 default:
11035 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11036 }
11037 return;
11038 }
11039
11040 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11041 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11042 return;
11043
11044 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11045 return;
11046 }
11047
11048 // If this is a lambda function, look into its body.
11049 if (const auto *L = dyn_cast<LambdaExpr>(S))
11050 S = L->getBody();
11051
11052 // Keep looking for target regions recursively.
11053 for (const Stmt *II : S->children())
11054 scanForTargetRegionsFunctions(II, ParentName);
11055}
11056
11057static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11058 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11059 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11060 if (!DevTy)
11061 return false;
11062 // Do not emit device_type(nohost) functions for the host.
11063 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11064 return true;
11065 // Do not emit device_type(host) functions for the device.
11066 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11067 return true;
11068 return false;
11069}
11070
11072 // If emitting code for the host, we do not process FD here. Instead we do
11073 // the normal code generation.
11074 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11075 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11077 CGM.getLangOpts().OpenMPIsTargetDevice))
11078 return true;
11079 return false;
11080 }
11081
11082 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11083 // Try to detect target regions in the function.
11084 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11085 StringRef Name = CGM.getMangledName(GD);
11088 CGM.getLangOpts().OpenMPIsTargetDevice))
11089 return true;
11090 }
11091
11092 // Do not to emit function if it is not marked as declare target.
11093 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11094 AlreadyEmittedTargetDecls.count(VD) == 0;
11095}
11096
11099 CGM.getLangOpts().OpenMPIsTargetDevice))
11100 return true;
11101
11102 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11103 return false;
11104
11105 // Check if there are Ctors/Dtors in this declaration and look for target
11106 // regions in it. We use the complete variant to produce the kernel name
11107 // mangling.
11108 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11109 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11110 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11111 StringRef ParentName =
11112 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11113 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11114 }
11115 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11116 StringRef ParentName =
11117 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11118 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11119 }
11120 }
11121
11122 // Do not to emit variable if it is not marked as declare target.
11123 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11124 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11125 cast<VarDecl>(GD.getDecl()));
11126 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11127 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11128 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11131 return true;
11132 }
11133 return false;
11134}
11135
11137 llvm::Constant *Addr) {
11138 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11139 !CGM.getLangOpts().OpenMPIsTargetDevice)
11140 return;
11141
11142 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11143 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11144
11145 // If this is an 'extern' declaration we defer to the canonical definition and
11146 // do not emit an offloading entry.
11147 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11148 VD->hasExternalStorage())
11149 return;
11150
11151 if (!Res) {
11152 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11153 // Register non-target variables being emitted in device code (debug info
11154 // may cause this).
11155 StringRef VarName = CGM.getMangledName(VD);
11156 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11157 }
11158 return;
11159 }
11160
11161 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11162 auto LinkageForVariable = [&VD, this]() {
11163 return CGM.getLLVMLinkageVarDefinition(VD);
11164 };
11165
11166 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11167 OMPBuilder.registerTargetGlobalVariable(
11169 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11170 VD->isExternallyVisible(),
11172 VD->getCanonicalDecl()->getBeginLoc()),
11173 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11174 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11175 CGM.getTypes().ConvertTypeForMem(
11176 CGM.getContext().getPointerType(VD->getType())),
11177 Addr);
11178
11179 for (auto *ref : GeneratedRefs)
11180 CGM.addCompilerUsedGlobal(ref);
11181}
11182
11184 if (isa<FunctionDecl>(GD.getDecl()) ||
11186 return emitTargetFunctions(GD);
11187
11188 return emitTargetGlobalVariable(GD);
11189}
11190
11192 for (const VarDecl *VD : DeferredGlobalVariables) {
11193 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11194 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11195 if (!Res)
11196 continue;
11197 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11198 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11200 CGM.EmitGlobal(VD);
11201 } else {
11202 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11203 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11204 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11206 "Expected link clause or to clause with unified memory.");
11207 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11208 }
11209 }
11210}
11211
11213 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11214 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11215 " Expected target-based directive.");
11216}
11217
11219 for (const OMPClause *Clause : D->clauselists()) {
11220 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11222 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11223 } else if (const auto *AC =
11224 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11225 switch (AC->getAtomicDefaultMemOrderKind()) {
11226 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11227 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11228 break;
11229 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11230 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11231 break;
11232 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11233 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11234 break;
11236 break;
11237 }
11238 }
11239 }
11240}
11241
11242llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11244}
11245
11247 LangAS &AS) {
11248 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11249 return false;
11250 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11251 switch(A->getAllocatorType()) {
11252 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11253 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11254 // Not supported, fallback to the default mem space.
11255 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11256 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11257 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11258 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11259 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11260 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11261 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11262 AS = LangAS::Default;
11263 return true;
11264 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11265 llvm_unreachable("Expected predefined allocator for the variables with the "
11266 "static storage.");
11267 }
11268 return false;
11269}
11270
11274
11276 CodeGenModule &CGM)
11277 : CGM(CGM) {
11278 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11279 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11280 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11281 }
11282}
11283
11285 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11286 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11287}
11288
11290 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11291 return true;
11292
11293 const auto *D = cast<FunctionDecl>(GD.getDecl());
11294 // Do not to emit function if it is marked as declare target as it was already
11295 // emitted.
11296 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11297 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11298 if (auto *F = dyn_cast_or_null<llvm::Function>(
11299 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11300 return !F->isDeclaration();
11301 return false;
11302 }
11303 return true;
11304 }
11305
11306 return !AlreadyEmittedTargetDecls.insert(D).second;
11307}
11308
11310 const OMPExecutableDirective &D,
11311 SourceLocation Loc,
11312 llvm::Function *OutlinedFn,
11313 ArrayRef<llvm::Value *> CapturedVars) {
11314 if (!CGF.HaveInsertPoint())
11315 return;
11316
11317 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11319
11320 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11321 llvm::Value *Args[] = {
11322 RTLoc,
11323 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11324 OutlinedFn};
11326 RealArgs.append(std::begin(Args), std::end(Args));
11327 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11328
11329 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11330 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11331 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11332}
11333
11335 const Expr *NumTeams,
11336 const Expr *ThreadLimit,
11337 SourceLocation Loc) {
11338 if (!CGF.HaveInsertPoint())
11339 return;
11340
11341 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11342
11343 llvm::Value *NumTeamsVal =
11344 NumTeams
11345 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11346 CGF.CGM.Int32Ty, /* isSigned = */ true)
11347 : CGF.Builder.getInt32(0);
11348
11349 llvm::Value *ThreadLimitVal =
11350 ThreadLimit
11351 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11352 CGF.CGM.Int32Ty, /* isSigned = */ true)
11353 : CGF.Builder.getInt32(0);
11354
11355 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11356 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11357 ThreadLimitVal};
11358 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11359 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11360 PushNumTeamsArgs);
11361}
11362
11364 const Expr *ThreadLimit,
11365 SourceLocation Loc) {
11366 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11367 llvm::Value *ThreadLimitVal =
11368 ThreadLimit
11369 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11370 CGF.CGM.Int32Ty, /* isSigned = */ true)
11371 : CGF.Builder.getInt32(0);
11372
11373 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11374 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11375 ThreadLimitVal};
11376 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11377 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11378 ThreadLimitArgs);
11379}
11380
11382 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11383 const Expr *Device, const RegionCodeGenTy &CodeGen,
11385 if (!CGF.HaveInsertPoint())
11386 return;
11387
11388 // Action used to replace the default codegen action and turn privatization
11389 // off.
11390 PrePostActionTy NoPrivAction;
11391
11392 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11393
11394 llvm::Value *IfCondVal = nullptr;
11395 if (IfCond)
11396 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11397
11398 // Emit device ID if any.
11399 llvm::Value *DeviceID = nullptr;
11400 if (Device) {
11401 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11402 CGF.Int64Ty, /*isSigned=*/true);
11403 } else {
11404 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11405 }
11406
11407 // Fill up the arrays with all the mapped variables.
11408 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11409 auto GenMapInfoCB =
11410 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11411 CGF.Builder.restoreIP(CodeGenIP);
11412 // Get map clause information.
11413 MappableExprsHandler MEHandler(D, CGF);
11414 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11415
11416 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11417 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11418 };
11419 if (CGM.getCodeGenOpts().getDebugInfo() !=
11420 llvm::codegenoptions::NoDebugInfo) {
11421 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11422 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11423 FillInfoMap);
11424 }
11425
11426 return CombinedInfo;
11427 };
11428 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11429 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11430 CGF.Builder.restoreIP(CodeGenIP);
11431 switch (BodyGenType) {
11432 case BodyGenTy::Priv:
11433 if (!Info.CaptureDeviceAddrMap.empty())
11434 CodeGen(CGF);
11435 break;
11436 case BodyGenTy::DupNoPriv:
11437 if (!Info.CaptureDeviceAddrMap.empty()) {
11438 CodeGen.setAction(NoPrivAction);
11439 CodeGen(CGF);
11440 }
11441 break;
11442 case BodyGenTy::NoPriv:
11443 if (Info.CaptureDeviceAddrMap.empty()) {
11444 CodeGen.setAction(NoPrivAction);
11445 CodeGen(CGF);
11446 }
11447 break;
11448 }
11449 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11450 CGF.Builder.GetInsertPoint());
11451 };
11452
11453 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11454 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11455 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11456 }
11457 };
11458
11459 auto CustomMapperCB = [&](unsigned int I) {
11460 llvm::Function *MFunc = nullptr;
11461 if (CombinedInfo.Mappers[I]) {
11462 Info.HasMapper = true;
11464 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11465 }
11466 return MFunc;
11467 };
11468
11469 // Source location for the ident struct
11470 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11471
11472 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11473 CGF.AllocaInsertPt->getIterator());
11474 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11475 CGF.Builder.GetInsertPoint());
11476 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11477 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11478 cantFail(OMPBuilder.createTargetData(
11479 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11480 CustomMapperCB,
11481 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11482 CGF.Builder.restoreIP(AfterIP);
11483}
11484
11486 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11487 const Expr *Device) {
11488 if (!CGF.HaveInsertPoint())
11489 return;
11490
11494 "Expecting either target enter, exit data, or update directives.");
11495
11497 llvm::Value *MapTypesArray = nullptr;
11498 llvm::Value *MapNamesArray = nullptr;
11499 // Generate the code for the opening of the data environment.
11500 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11501 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11502 // Emit device ID if any.
11503 llvm::Value *DeviceID = nullptr;
11504 if (Device) {
11505 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11506 CGF.Int64Ty, /*isSigned=*/true);
11507 } else {
11508 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11509 }
11510
11511 // Emit the number of elements in the offloading arrays.
11512 llvm::Constant *PointerNum =
11513 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11514
11515 // Source location for the ident struct
11516 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11517
11518 SmallVector<llvm::Value *, 13> OffloadingArgs(
11519 {RTLoc, DeviceID, PointerNum,
11520 InputInfo.BasePointersArray.emitRawPointer(CGF),
11521 InputInfo.PointersArray.emitRawPointer(CGF),
11522 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11523 InputInfo.MappersArray.emitRawPointer(CGF)});
11524
11525 // Select the right runtime function call for each standalone
11526 // directive.
11527 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11528 RuntimeFunction RTLFn;
11529 switch (D.getDirectiveKind()) {
11530 case OMPD_target_enter_data:
11531 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11532 : OMPRTL___tgt_target_data_begin_mapper;
11533 break;
11534 case OMPD_target_exit_data:
11535 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11536 : OMPRTL___tgt_target_data_end_mapper;
11537 break;
11538 case OMPD_target_update:
11539 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11540 : OMPRTL___tgt_target_data_update_mapper;
11541 break;
11542 case OMPD_parallel:
11543 case OMPD_for:
11544 case OMPD_parallel_for:
11545 case OMPD_parallel_master:
11546 case OMPD_parallel_sections:
11547 case OMPD_for_simd:
11548 case OMPD_parallel_for_simd:
11549 case OMPD_cancel:
11550 case OMPD_cancellation_point:
11551 case OMPD_ordered:
11552 case OMPD_threadprivate:
11553 case OMPD_allocate:
11554 case OMPD_task:
11555 case OMPD_simd:
11556 case OMPD_tile:
11557 case OMPD_unroll:
11558 case OMPD_sections:
11559 case OMPD_section:
11560 case OMPD_single:
11561 case OMPD_master:
11562 case OMPD_critical:
11563 case OMPD_taskyield:
11564 case OMPD_barrier:
11565 case OMPD_taskwait:
11566 case OMPD_taskgroup:
11567 case OMPD_atomic:
11568 case OMPD_flush:
11569 case OMPD_depobj:
11570 case OMPD_scan:
11571 case OMPD_teams:
11572 case OMPD_target_data:
11573 case OMPD_distribute:
11574 case OMPD_distribute_simd:
11575 case OMPD_distribute_parallel_for:
11576 case OMPD_distribute_parallel_for_simd:
11577 case OMPD_teams_distribute:
11578 case OMPD_teams_distribute_simd:
11579 case OMPD_teams_distribute_parallel_for:
11580 case OMPD_teams_distribute_parallel_for_simd:
11581 case OMPD_declare_simd:
11582 case OMPD_declare_variant:
11583 case OMPD_begin_declare_variant:
11584 case OMPD_end_declare_variant:
11585 case OMPD_declare_target:
11586 case OMPD_end_declare_target:
11587 case OMPD_declare_reduction:
11588 case OMPD_declare_mapper:
11589 case OMPD_taskloop:
11590 case OMPD_taskloop_simd:
11591 case OMPD_master_taskloop:
11592 case OMPD_master_taskloop_simd:
11593 case OMPD_parallel_master_taskloop:
11594 case OMPD_parallel_master_taskloop_simd:
11595 case OMPD_target:
11596 case OMPD_target_simd:
11597 case OMPD_target_teams_distribute:
11598 case OMPD_target_teams_distribute_simd:
11599 case OMPD_target_teams_distribute_parallel_for:
11600 case OMPD_target_teams_distribute_parallel_for_simd:
11601 case OMPD_target_teams:
11602 case OMPD_target_parallel:
11603 case OMPD_target_parallel_for:
11604 case OMPD_target_parallel_for_simd:
11605 case OMPD_requires:
11606 case OMPD_metadirective:
11607 case OMPD_unknown:
11608 default:
11609 llvm_unreachable("Unexpected standalone target data directive.");
11610 break;
11611 }
11612 if (HasNowait) {
11613 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11614 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11615 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11616 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11617 }
11618 CGF.EmitRuntimeCall(
11619 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11620 OffloadingArgs);
11621 };
11622
11623 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11624 &MapNamesArray](CodeGenFunction &CGF,
11625 PrePostActionTy &) {
11626 // Fill up the arrays with all the mapped variables.
11627 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11629 MappableExprsHandler MEHandler(D, CGF);
11630 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11631 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11632 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11633
11634 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11635 D.hasClausesOfKind<OMPNowaitClause>();
11636
11637 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11638 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11639 CGF.VoidPtrTy, CGM.getPointerAlign());
11640 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11641 CGM.getPointerAlign());
11642 InputInfo.SizesArray =
11643 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11644 InputInfo.MappersArray =
11645 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11646 MapTypesArray = Info.RTArgs.MapTypesArray;
11647 MapNamesArray = Info.RTArgs.MapNamesArray;
11648 if (RequiresOuterTask)
11649 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11650 else
11651 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11652 };
11653
11654 if (IfCond) {
11655 emitIfClause(CGF, IfCond, TargetThenGen,
11656 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11657 } else {
11658 RegionCodeGenTy ThenRCG(TargetThenGen);
11659 ThenRCG(CGF);
11660 }
11661}
11662
11663namespace {
11664 /// Kind of parameter in a function with 'declare simd' directive.
11665enum ParamKindTy {
11666 Linear,
11667 LinearRef,
11668 LinearUVal,
11669 LinearVal,
11670 Uniform,
11671 Vector,
11672};
11673/// Attribute set of the parameter.
11674struct ParamAttrTy {
11675 ParamKindTy Kind = Vector;
11676 llvm::APSInt StrideOrArg;
11677 llvm::APSInt Alignment;
11678 bool HasVarStride = false;
11679};
11680} // namespace
11681
11682static unsigned evaluateCDTSize(const FunctionDecl *FD,
11683 ArrayRef<ParamAttrTy> ParamAttrs) {
11684 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11685 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11686 // of that clause. The VLEN value must be power of 2.
11687 // In other case the notion of the function`s "characteristic data type" (CDT)
11688 // is used to compute the vector length.
11689 // CDT is defined in the following order:
11690 // a) For non-void function, the CDT is the return type.
11691 // b) If the function has any non-uniform, non-linear parameters, then the
11692 // CDT is the type of the first such parameter.
11693 // c) If the CDT determined by a) or b) above is struct, union, or class
11694 // type which is pass-by-value (except for the type that maps to the
11695 // built-in complex data type), the characteristic data type is int.
11696 // d) If none of the above three cases is applicable, the CDT is int.
11697 // The VLEN is then determined based on the CDT and the size of vector
11698 // register of that ISA for which current vector version is generated. The
11699 // VLEN is computed using the formula below:
11700 // VLEN = sizeof(vector_register) / sizeof(CDT),
11701 // where vector register size specified in section 3.2.1 Registers and the
11702 // Stack Frame of original AMD64 ABI document.
11703 QualType RetType = FD->getReturnType();
11704 if (RetType.isNull())
11705 return 0;
11706 ASTContext &C = FD->getASTContext();
11707 QualType CDT;
11708 if (!RetType.isNull() && !RetType->isVoidType()) {
11709 CDT = RetType;
11710 } else {
11711 unsigned Offset = 0;
11712 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11713 if (ParamAttrs[Offset].Kind == Vector)
11714 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11715 ++Offset;
11716 }
11717 if (CDT.isNull()) {
11718 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11719 if (ParamAttrs[I + Offset].Kind == Vector) {
11720 CDT = FD->getParamDecl(I)->getType();
11721 break;
11722 }
11723 }
11724 }
11725 }
11726 if (CDT.isNull())
11727 CDT = C.IntTy;
11728 CDT = CDT->getCanonicalTypeUnqualified();
11729 if (CDT->isRecordType() || CDT->isUnionType())
11730 CDT = C.IntTy;
11731 return C.getTypeSize(CDT);
11732}
11733
11734/// Mangle the parameter part of the vector function name according to
11735/// their OpenMP classification. The mangling function is defined in
11736/// section 4.5 of the AAVFABI(2021Q1).
11737static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11738 SmallString<256> Buffer;
11739 llvm::raw_svector_ostream Out(Buffer);
11740 for (const auto &ParamAttr : ParamAttrs) {
11741 switch (ParamAttr.Kind) {
11742 case Linear:
11743 Out << 'l';
11744 break;
11745 case LinearRef:
11746 Out << 'R';
11747 break;
11748 case LinearUVal:
11749 Out << 'U';
11750 break;
11751 case LinearVal:
11752 Out << 'L';
11753 break;
11754 case Uniform:
11755 Out << 'u';
11756 break;
11757 case Vector:
11758 Out << 'v';
11759 break;
11760 }
11761 if (ParamAttr.HasVarStride)
11762 Out << "s" << ParamAttr.StrideOrArg;
11763 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11764 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11765 // Don't print the step value if it is not present or if it is
11766 // equal to 1.
11767 if (ParamAttr.StrideOrArg < 0)
11768 Out << 'n' << -ParamAttr.StrideOrArg;
11769 else if (ParamAttr.StrideOrArg != 1)
11770 Out << ParamAttr.StrideOrArg;
11771 }
11772
11773 if (!!ParamAttr.Alignment)
11774 Out << 'a' << ParamAttr.Alignment;
11775 }
11776
11777 return std::string(Out.str());
11778}
11779
11780static void
11781emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11782 const llvm::APSInt &VLENVal,
11783 ArrayRef<ParamAttrTy> ParamAttrs,
11784 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11785 struct ISADataTy {
11786 char ISA;
11787 unsigned VecRegSize;
11788 };
11789 ISADataTy ISAData[] = {
11790 {
11791 'b', 128
11792 }, // SSE
11793 {
11794 'c', 256
11795 }, // AVX
11796 {
11797 'd', 256
11798 }, // AVX2
11799 {
11800 'e', 512
11801 }, // AVX512
11802 };
11804 switch (State) {
11805 case OMPDeclareSimdDeclAttr::BS_Undefined:
11806 Masked.push_back('N');
11807 Masked.push_back('M');
11808 break;
11809 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11810 Masked.push_back('N');
11811 break;
11812 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11813 Masked.push_back('M');
11814 break;
11815 }
11816 for (char Mask : Masked) {
11817 for (const ISADataTy &Data : ISAData) {
11818 SmallString<256> Buffer;
11819 llvm::raw_svector_ostream Out(Buffer);
11820 Out << "_ZGV" << Data.ISA << Mask;
11821 if (!VLENVal) {
11822 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11823 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11824 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11825 } else {
11826 Out << VLENVal;
11827 }
11828 Out << mangleVectorParameters(ParamAttrs);
11829 Out << '_' << Fn->getName();
11830 Fn->addFnAttr(Out.str());
11831 }
11832 }
11833}
11834
11835// This are the Functions that are needed to mangle the name of the
11836// vector functions generated by the compiler, according to the rules
11837// defined in the "Vector Function ABI specifications for AArch64",
11838// available at
11839// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11840
11841/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11842static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11843 QT = QT.getCanonicalType();
11844
11845 if (QT->isVoidType())
11846 return false;
11847
11848 if (Kind == ParamKindTy::Uniform)
11849 return false;
11850
11851 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11852 return false;
11853
11854 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11855 !QT->isReferenceType())
11856 return false;
11857
11858 return true;
11859}
11860
11861/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11863 QT = QT.getCanonicalType();
11864 unsigned Size = C.getTypeSize(QT);
11865
11866 // Only scalars and complex within 16 bytes wide set PVB to true.
11867 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11868 return false;
11869
11870 if (QT->isFloatingType())
11871 return true;
11872
11873 if (QT->isIntegerType())
11874 return true;
11875
11876 if (QT->isPointerType())
11877 return true;
11878
11879 // TODO: Add support for complex types (section 3.1.2, item 2).
11880
11881 return false;
11882}
11883
11884/// Computes the lane size (LS) of a return type or of an input parameter,
11885/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11886/// TODO: Add support for references, section 3.2.1, item 1.
11887static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11888 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11890 if (getAArch64PBV(PTy, C))
11891 return C.getTypeSize(PTy);
11892 }
11893 if (getAArch64PBV(QT, C))
11894 return C.getTypeSize(QT);
11895
11896 return C.getTypeSize(C.getUIntPtrType());
11897}
11898
11899// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11900// signature of the scalar function, as defined in 3.2.2 of the
11901// AAVFABI.
11902static std::tuple<unsigned, unsigned, bool>
11904 QualType RetType = FD->getReturnType().getCanonicalType();
11905
11906 ASTContext &C = FD->getASTContext();
11907
11908 bool OutputBecomesInput = false;
11909
11911 if (!RetType->isVoidType()) {
11912 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11913 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11914 OutputBecomesInput = true;
11915 }
11916 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11918 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11919 }
11920
11921 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11922 // The LS of a function parameter / return value can only be a power
11923 // of 2, starting from 8 bits, up to 128.
11924 assert(llvm::all_of(Sizes,
11925 [](unsigned Size) {
11926 return Size == 8 || Size == 16 || Size == 32 ||
11927 Size == 64 || Size == 128;
11928 }) &&
11929 "Invalid size");
11930
11931 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11932 OutputBecomesInput);
11933}
11934
11935// Function used to add the attribute. The parameter `VLEN` is
11936// templated to allow the use of "x" when targeting scalable functions
11937// for SVE.
11938template <typename T>
11939static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11940 char ISA, StringRef ParSeq,
11941 StringRef MangledName, bool OutputBecomesInput,
11942 llvm::Function *Fn) {
11943 SmallString<256> Buffer;
11944 llvm::raw_svector_ostream Out(Buffer);
11945 Out << Prefix << ISA << LMask << VLEN;
11946 if (OutputBecomesInput)
11947 Out << "v";
11948 Out << ParSeq << "_" << MangledName;
11949 Fn->addFnAttr(Out.str());
11950}
11951
11952// Helper function to generate the Advanced SIMD names depending on
11953// the value of the NDS when simdlen is not present.
11954static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11955 StringRef Prefix, char ISA,
11956 StringRef ParSeq, StringRef MangledName,
11957 bool OutputBecomesInput,
11958 llvm::Function *Fn) {
11959 switch (NDS) {
11960 case 8:
11961 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11962 OutputBecomesInput, Fn);
11963 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11964 OutputBecomesInput, Fn);
11965 break;
11966 case 16:
11967 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11968 OutputBecomesInput, Fn);
11969 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11970 OutputBecomesInput, Fn);
11971 break;
11972 case 32:
11973 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11974 OutputBecomesInput, Fn);
11975 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11976 OutputBecomesInput, Fn);
11977 break;
11978 case 64:
11979 case 128:
11980 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11981 OutputBecomesInput, Fn);
11982 break;
11983 default:
11984 llvm_unreachable("Scalar type is too wide.");
11985 }
11986}
11987
11988/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11990 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11991 ArrayRef<ParamAttrTy> ParamAttrs,
11992 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11993 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11994
11995 // Get basic data for building the vector signature.
11996 const auto Data = getNDSWDS(FD, ParamAttrs);
11997 const unsigned NDS = std::get<0>(Data);
11998 const unsigned WDS = std::get<1>(Data);
11999 const bool OutputBecomesInput = std::get<2>(Data);
12000
12001 // Check the values provided via `simdlen` by the user.
12002 // 1. A `simdlen(1)` doesn't produce vector signatures,
12003 if (UserVLEN == 1) {
12004 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12005 return;
12006 }
12007
12008 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
12009 // Advanced SIMD output.
12010 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12011 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12012 return;
12013 }
12014
12015 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
12016 // limits.
12017 if (ISA == 's' && UserVLEN != 0) {
12018 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
12019 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12020 return;
12021 }
12022 }
12023
12024 // Sort out parameter sequence.
12025 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
12026 StringRef Prefix = "_ZGV";
12027 // Generate simdlen from user input (if any).
12028 if (UserVLEN) {
12029 if (ISA == 's') {
12030 // SVE generates only a masked function.
12031 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12032 OutputBecomesInput, Fn);
12033 } else {
12034 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12035 // Advanced SIMD generates one or two functions, depending on
12036 // the `[not]inbranch` clause.
12037 switch (State) {
12038 case OMPDeclareSimdDeclAttr::BS_Undefined:
12039 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12040 OutputBecomesInput, Fn);
12041 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12042 OutputBecomesInput, Fn);
12043 break;
12044 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12045 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12046 OutputBecomesInput, Fn);
12047 break;
12048 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12049 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12050 OutputBecomesInput, Fn);
12051 break;
12052 }
12053 }
12054 } else {
12055 // If no user simdlen is provided, follow the AAVFABI rules for
12056 // generating the vector length.
12057 if (ISA == 's') {
12058 // SVE, section 3.4.1, item 1.
12059 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
12060 OutputBecomesInput, Fn);
12061 } else {
12062 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12063 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
12064 // two vector names depending on the use of the clause
12065 // `[not]inbranch`.
12066 switch (State) {
12067 case OMPDeclareSimdDeclAttr::BS_Undefined:
12068 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12069 OutputBecomesInput, Fn);
12070 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12071 OutputBecomesInput, Fn);
12072 break;
12073 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12074 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12075 OutputBecomesInput, Fn);
12076 break;
12077 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12078 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12079 OutputBecomesInput, Fn);
12080 break;
12081 }
12082 }
12083 }
12084}
12085
12087 llvm::Function *Fn) {
12088 ASTContext &C = CGM.getContext();
12089 FD = FD->getMostRecentDecl();
12090 while (FD) {
12091 // Map params to their positions in function decl.
12092 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12093 if (isa<CXXMethodDecl>(FD))
12094 ParamPositions.try_emplace(FD, 0);
12095 unsigned ParamPos = ParamPositions.size();
12096 for (const ParmVarDecl *P : FD->parameters()) {
12097 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12098 ++ParamPos;
12099 }
12100 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12101 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12102 // Mark uniform parameters.
12103 for (const Expr *E : Attr->uniforms()) {
12104 E = E->IgnoreParenImpCasts();
12105 unsigned Pos;
12106 if (isa<CXXThisExpr>(E)) {
12107 Pos = ParamPositions[FD];
12108 } else {
12109 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12110 ->getCanonicalDecl();
12111 auto It = ParamPositions.find(PVD);
12112 assert(It != ParamPositions.end() && "Function parameter not found");
12113 Pos = It->second;
12114 }
12115 ParamAttrs[Pos].Kind = Uniform;
12116 }
12117 // Get alignment info.
12118 auto *NI = Attr->alignments_begin();
12119 for (const Expr *E : Attr->aligneds()) {
12120 E = E->IgnoreParenImpCasts();
12121 unsigned Pos;
12122 QualType ParmTy;
12123 if (isa<CXXThisExpr>(E)) {
12124 Pos = ParamPositions[FD];
12125 ParmTy = E->getType();
12126 } else {
12127 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12128 ->getCanonicalDecl();
12129 auto It = ParamPositions.find(PVD);
12130 assert(It != ParamPositions.end() && "Function parameter not found");
12131 Pos = It->second;
12132 ParmTy = PVD->getType();
12133 }
12134 ParamAttrs[Pos].Alignment =
12135 (*NI)
12136 ? (*NI)->EvaluateKnownConstInt(C)
12137 : llvm::APSInt::getUnsigned(
12138 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12139 .getQuantity());
12140 ++NI;
12141 }
12142 // Mark linear parameters.
12143 auto *SI = Attr->steps_begin();
12144 auto *MI = Attr->modifiers_begin();
12145 for (const Expr *E : Attr->linears()) {
12146 E = E->IgnoreParenImpCasts();
12147 unsigned Pos;
12148 bool IsReferenceType = false;
12149 // Rescaling factor needed to compute the linear parameter
12150 // value in the mangled name.
12151 unsigned PtrRescalingFactor = 1;
12152 if (isa<CXXThisExpr>(E)) {
12153 Pos = ParamPositions[FD];
12154 auto *P = cast<PointerType>(E->getType());
12155 PtrRescalingFactor = CGM.getContext()
12156 .getTypeSizeInChars(P->getPointeeType())
12157 .getQuantity();
12158 } else {
12159 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12160 ->getCanonicalDecl();
12161 auto It = ParamPositions.find(PVD);
12162 assert(It != ParamPositions.end() && "Function parameter not found");
12163 Pos = It->second;
12164 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12165 PtrRescalingFactor = CGM.getContext()
12166 .getTypeSizeInChars(P->getPointeeType())
12167 .getQuantity();
12168 else if (PVD->getType()->isReferenceType()) {
12169 IsReferenceType = true;
12170 PtrRescalingFactor =
12171 CGM.getContext()
12172 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12173 .getQuantity();
12174 }
12175 }
12176 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12177 if (*MI == OMPC_LINEAR_ref)
12178 ParamAttr.Kind = LinearRef;
12179 else if (*MI == OMPC_LINEAR_uval)
12180 ParamAttr.Kind = LinearUVal;
12181 else if (IsReferenceType)
12182 ParamAttr.Kind = LinearVal;
12183 else
12184 ParamAttr.Kind = Linear;
12185 // Assuming a stride of 1, for `linear` without modifiers.
12186 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12187 if (*SI) {
12189 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12190 if (const auto *DRE =
12191 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12192 if (const auto *StridePVD =
12193 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12194 ParamAttr.HasVarStride = true;
12195 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12196 assert(It != ParamPositions.end() &&
12197 "Function parameter not found");
12198 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12199 }
12200 }
12201 } else {
12202 ParamAttr.StrideOrArg = Result.Val.getInt();
12203 }
12204 }
12205 // If we are using a linear clause on a pointer, we need to
12206 // rescale the value of linear_step with the byte size of the
12207 // pointee type.
12208 if (!ParamAttr.HasVarStride &&
12209 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
12210 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12211 ++SI;
12212 ++MI;
12213 }
12214 llvm::APSInt VLENVal;
12215 SourceLocation ExprLoc;
12216 const Expr *VLENExpr = Attr->getSimdlen();
12217 if (VLENExpr) {
12218 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12219 ExprLoc = VLENExpr->getExprLoc();
12220 }
12221 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12222 if (CGM.getTriple().isX86()) {
12223 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12224 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12225 unsigned VLEN = VLENVal.getExtValue();
12226 StringRef MangledName = Fn->getName();
12227 if (CGM.getTarget().hasFeature("sve"))
12228 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12229 MangledName, 's', 128, Fn, ExprLoc);
12230 else if (CGM.getTarget().hasFeature("neon"))
12231 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12232 MangledName, 'n', 128, Fn, ExprLoc);
12233 }
12234 }
12235 FD = FD->getPreviousDecl();
12236 }
12237}
12238
12239namespace {
12240/// Cleanup action for doacross support.
12241class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12242public:
12243 static const int DoacrossFinArgs = 2;
12244
12245private:
12246 llvm::FunctionCallee RTLFn;
12247 llvm::Value *Args[DoacrossFinArgs];
12248
12249public:
12250 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12251 ArrayRef<llvm::Value *> CallArgs)
12252 : RTLFn(RTLFn) {
12253 assert(CallArgs.size() == DoacrossFinArgs);
12254 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12255 }
12256 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12257 if (!CGF.HaveInsertPoint())
12258 return;
12259 CGF.EmitRuntimeCall(RTLFn, Args);
12260 }
12261};
12262} // namespace
12263
12265 const OMPLoopDirective &D,
12266 ArrayRef<Expr *> NumIterations) {
12267 if (!CGF.HaveInsertPoint())
12268 return;
12269
12270 ASTContext &C = CGM.getContext();
12271 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12272 RecordDecl *RD;
12273 if (KmpDimTy.isNull()) {
12274 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12275 // kmp_int64 lo; // lower
12276 // kmp_int64 up; // upper
12277 // kmp_int64 st; // stride
12278 // };
12279 RD = C.buildImplicitRecord("kmp_dim");
12280 RD->startDefinition();
12281 addFieldToRecordDecl(C, RD, Int64Ty);
12282 addFieldToRecordDecl(C, RD, Int64Ty);
12283 addFieldToRecordDecl(C, RD, Int64Ty);
12284 RD->completeDefinition();
12285 KmpDimTy = C.getCanonicalTagType(RD);
12286 } else {
12287 RD = KmpDimTy->castAsRecordDecl();
12288 }
12289 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12290 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12292
12293 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12294 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12295 enum { LowerFD = 0, UpperFD, StrideFD };
12296 // Fill dims with data.
12297 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12298 LValue DimsLVal = CGF.MakeAddrLValue(
12299 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12300 // dims.upper = num_iterations;
12301 LValue UpperLVal = CGF.EmitLValueForField(
12302 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12303 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12304 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12305 Int64Ty, NumIterations[I]->getExprLoc());
12306 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12307 // dims.stride = 1;
12308 LValue StrideLVal = CGF.EmitLValueForField(
12309 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12310 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12311 StrideLVal);
12312 }
12313
12314 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12315 // kmp_int32 num_dims, struct kmp_dim * dims);
12316 llvm::Value *Args[] = {
12317 emitUpdateLocation(CGF, D.getBeginLoc()),
12318 getThreadID(CGF, D.getBeginLoc()),
12319 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12321 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12322 CGM.VoidPtrTy)};
12323
12324 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12325 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12326 CGF.EmitRuntimeCall(RTLFn, Args);
12327 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12328 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12329 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12330 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12331 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12332 llvm::ArrayRef(FiniArgs));
12333}
12334
12335template <typename T>
12337 const T *C, llvm::Value *ULoc,
12338 llvm::Value *ThreadID) {
12339 QualType Int64Ty =
12340 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12341 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12343 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12344 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12345 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12346 const Expr *CounterVal = C->getLoopData(I);
12347 assert(CounterVal);
12348 llvm::Value *CntVal = CGF.EmitScalarConversion(
12349 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12350 CounterVal->getExprLoc());
12351 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12352 /*Volatile=*/false, Int64Ty);
12353 }
12354 llvm::Value *Args[] = {
12355 ULoc, ThreadID,
12356 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12357 llvm::FunctionCallee RTLFn;
12358 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12359 OMPDoacrossKind<T> ODK;
12360 if (ODK.isSource(C)) {
12361 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12362 OMPRTL___kmpc_doacross_post);
12363 } else {
12364 assert(ODK.isSink(C) && "Expect sink modifier.");
12365 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12366 OMPRTL___kmpc_doacross_wait);
12367 }
12368 CGF.EmitRuntimeCall(RTLFn, Args);
12369}
12370
12372 const OMPDependClause *C) {
12374 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12375 getThreadID(CGF, C->getBeginLoc()));
12376}
12377
12379 const OMPDoacrossClause *C) {
12381 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12382 getThreadID(CGF, C->getBeginLoc()));
12383}
12384
12386 llvm::FunctionCallee Callee,
12387 ArrayRef<llvm::Value *> Args) const {
12388 assert(Loc.isValid() && "Outlined function call location must be valid.");
12390
12391 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12392 if (Fn->doesNotThrow()) {
12393 CGF.EmitNounwindRuntimeCall(Fn, Args);
12394 return;
12395 }
12396 }
12397 CGF.EmitRuntimeCall(Callee, Args);
12398}
12399
12401 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12402 ArrayRef<llvm::Value *> Args) const {
12403 emitCall(CGF, Loc, OutlinedFn, Args);
12404}
12405
12407 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12408 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12410}
12411
12413 const VarDecl *NativeParam,
12414 const VarDecl *TargetParam) const {
12415 return CGF.GetAddrOfLocalVar(NativeParam);
12416}
12417
12418/// Return allocator value from expression, or return a null allocator (default
12419/// when no allocator specified).
12420static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12421 const Expr *Allocator) {
12422 llvm::Value *AllocVal;
12423 if (Allocator) {
12424 AllocVal = CGF.EmitScalarExpr(Allocator);
12425 // According to the standard, the original allocator type is a enum
12426 // (integer). Convert to pointer type, if required.
12427 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12428 CGF.getContext().VoidPtrTy,
12429 Allocator->getExprLoc());
12430 } else {
12431 // If no allocator specified, it defaults to the null allocator.
12432 AllocVal = llvm::Constant::getNullValue(
12434 }
12435 return AllocVal;
12436}
12437
12438/// Return the alignment from an allocate directive if present.
12439static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12440 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12441
12442 if (!AllocateAlignment)
12443 return nullptr;
12444
12445 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12446}
12447
12449 const VarDecl *VD) {
12450 if (!VD)
12451 return Address::invalid();
12452 Address UntiedAddr = Address::invalid();
12453 Address UntiedRealAddr = Address::invalid();
12454 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12455 if (It != FunctionToUntiedTaskStackMap.end()) {
12456 const UntiedLocalVarsAddressesMap &UntiedData =
12457 UntiedLocalVarsStack[It->second];
12458 auto I = UntiedData.find(VD);
12459 if (I != UntiedData.end()) {
12460 UntiedAddr = I->second.first;
12461 UntiedRealAddr = I->second.second;
12462 }
12463 }
12464 const VarDecl *CVD = VD->getCanonicalDecl();
12465 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12466 // Use the default allocation.
12467 if (!isAllocatableDecl(VD))
12468 return UntiedAddr;
12469 llvm::Value *Size;
12470 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12471 if (CVD->getType()->isVariablyModifiedType()) {
12472 Size = CGF.getTypeSize(CVD->getType());
12473 // Align the size: ((size + align - 1) / align) * align
12474 Size = CGF.Builder.CreateNUWAdd(
12475 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12476 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12477 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12478 } else {
12479 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12480 Size = CGM.getSize(Sz.alignTo(Align));
12481 }
12482 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12483 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12484 const Expr *Allocator = AA->getAllocator();
12485 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12486 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12488 Args.push_back(ThreadID);
12489 if (Alignment)
12490 Args.push_back(Alignment);
12491 Args.push_back(Size);
12492 Args.push_back(AllocVal);
12493 llvm::omp::RuntimeFunction FnID =
12494 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12495 llvm::Value *Addr = CGF.EmitRuntimeCall(
12496 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12497 getName({CVD->getName(), ".void.addr"}));
12498 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12499 CGM.getModule(), OMPRTL___kmpc_free);
12500 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12502 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12503 if (UntiedAddr.isValid())
12504 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12505
12506 // Cleanup action for allocate support.
12507 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12508 llvm::FunctionCallee RTLFn;
12509 SourceLocation::UIntTy LocEncoding;
12510 Address Addr;
12511 const Expr *AllocExpr;
12512
12513 public:
12514 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12515 SourceLocation::UIntTy LocEncoding, Address Addr,
12516 const Expr *AllocExpr)
12517 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12518 AllocExpr(AllocExpr) {}
12519 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12520 if (!CGF.HaveInsertPoint())
12521 return;
12522 llvm::Value *Args[3];
12523 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12524 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12526 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12527 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12528 Args[2] = AllocVal;
12529 CGF.EmitRuntimeCall(RTLFn, Args);
12530 }
12531 };
12532 Address VDAddr =
12533 UntiedRealAddr.isValid()
12534 ? UntiedRealAddr
12535 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12536 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12537 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12538 VDAddr, Allocator);
12539 if (UntiedRealAddr.isValid())
12540 if (auto *Region =
12541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12542 Region->emitUntiedSwitch(CGF);
12543 return VDAddr;
12544 }
12545 return UntiedAddr;
12546}
12547
12549 const VarDecl *VD) const {
12550 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12551 if (It == FunctionToUntiedTaskStackMap.end())
12552 return false;
12553 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12554}
12555
12557 CodeGenModule &CGM, const OMPLoopDirective &S)
12558 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12559 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12560 if (!NeedToPush)
12561 return;
12563 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12564 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12565 for (const Stmt *Ref : C->private_refs()) {
12566 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12567 const ValueDecl *VD;
12568 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12569 VD = DRE->getDecl();
12570 } else {
12571 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12572 assert((ME->isImplicitCXXThis() ||
12573 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12574 "Expected member of current class.");
12575 VD = ME->getMemberDecl();
12576 }
12577 DS.insert(VD);
12578 }
12579 }
12580}
12581
12583 if (!NeedToPush)
12584 return;
12585 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12586}
12587
12589 CodeGenFunction &CGF,
12590 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12591 std::pair<Address, Address>> &LocalVars)
12592 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12593 if (!NeedToPush)
12594 return;
12595 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12596 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12597 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12598}
12599
12601 if (!NeedToPush)
12602 return;
12603 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12604}
12605
12607 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12608
12609 return llvm::any_of(
12610 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12611 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12612}
12613
12614void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12615 const OMPExecutableDirective &S,
12616 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12617 const {
12618 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12619 // Vars in target/task regions must be excluded completely.
12620 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12621 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12623 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12624 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12625 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12626 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12627 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12628 }
12629 }
12630 // Exclude vars in private clauses.
12631 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12632 for (const Expr *Ref : C->varlist()) {
12633 if (!Ref->getType()->isScalarType())
12634 continue;
12635 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12636 if (!DRE)
12637 continue;
12638 NeedToCheckForLPCs.insert(DRE->getDecl());
12639 }
12640 }
12641 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12642 for (const Expr *Ref : C->varlist()) {
12643 if (!Ref->getType()->isScalarType())
12644 continue;
12645 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12646 if (!DRE)
12647 continue;
12648 NeedToCheckForLPCs.insert(DRE->getDecl());
12649 }
12650 }
12651 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12652 for (const Expr *Ref : C->varlist()) {
12653 if (!Ref->getType()->isScalarType())
12654 continue;
12655 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12656 if (!DRE)
12657 continue;
12658 NeedToCheckForLPCs.insert(DRE->getDecl());
12659 }
12660 }
12661 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12662 for (const Expr *Ref : C->varlist()) {
12663 if (!Ref->getType()->isScalarType())
12664 continue;
12665 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12666 if (!DRE)
12667 continue;
12668 NeedToCheckForLPCs.insert(DRE->getDecl());
12669 }
12670 }
12671 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12672 for (const Expr *Ref : C->varlist()) {
12673 if (!Ref->getType()->isScalarType())
12674 continue;
12675 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12676 if (!DRE)
12677 continue;
12678 NeedToCheckForLPCs.insert(DRE->getDecl());
12679 }
12680 }
12681 for (const Decl *VD : NeedToCheckForLPCs) {
12682 for (const LastprivateConditionalData &Data :
12683 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12684 if (Data.DeclToUniqueName.count(VD) > 0) {
12685 if (!Data.Disabled)
12686 NeedToAddForLPCsAsDisabled.insert(VD);
12687 break;
12688 }
12689 }
12690 }
12691}
12692
12693CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12694 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12695 : CGM(CGF.CGM),
12696 Action((CGM.getLangOpts().OpenMP >= 50 &&
12697 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12698 [](const OMPLastprivateClause *C) {
12699 return C->getKind() ==
12700 OMPC_LASTPRIVATE_conditional;
12701 }))
12702 ? ActionToDo::PushAsLastprivateConditional
12703 : ActionToDo::DoNotPush) {
12704 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12705 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12706 return;
12707 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12708 "Expected a push action.");
12710 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12711 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12712 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12713 continue;
12714
12715 for (const Expr *Ref : C->varlist()) {
12716 Data.DeclToUniqueName.insert(std::make_pair(
12717 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12718 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12719 }
12720 }
12721 Data.IVLVal = IVLVal;
12722 Data.Fn = CGF.CurFn;
12723}
12724
12725CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12727 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12728 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12729 if (CGM.getLangOpts().OpenMP < 50)
12730 return;
12731 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12732 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12733 if (!NeedToAddForLPCsAsDisabled.empty()) {
12734 Action = ActionToDo::DisableLastprivateConditional;
12735 LastprivateConditionalData &Data =
12737 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12738 Data.DeclToUniqueName.try_emplace(VD);
12739 Data.Fn = CGF.CurFn;
12740 Data.Disabled = true;
12741 }
12742}
12743
12744CGOpenMPRuntime::LastprivateConditionalRAII
12746 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12747 return LastprivateConditionalRAII(CGF, S);
12748}
12749
12751 if (CGM.getLangOpts().OpenMP < 50)
12752 return;
12753 if (Action == ActionToDo::DisableLastprivateConditional) {
12754 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12755 "Expected list of disabled private vars.");
12756 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12757 }
12758 if (Action == ActionToDo::PushAsLastprivateConditional) {
12759 assert(
12760 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12761 "Expected list of lastprivate conditional vars.");
12762 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12763 }
12764}
12765
12767 const VarDecl *VD) {
12768 ASTContext &C = CGM.getContext();
12769 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12770 QualType NewType;
12771 const FieldDecl *VDField;
12772 const FieldDecl *FiredField;
12773 LValue BaseLVal;
12774 auto VI = I->getSecond().find(VD);
12775 if (VI == I->getSecond().end()) {
12776 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12777 RD->startDefinition();
12778 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12779 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12780 RD->completeDefinition();
12781 NewType = C.getCanonicalTagType(RD);
12782 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12783 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12784 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12785 } else {
12786 NewType = std::get<0>(VI->getSecond());
12787 VDField = std::get<1>(VI->getSecond());
12788 FiredField = std::get<2>(VI->getSecond());
12789 BaseLVal = std::get<3>(VI->getSecond());
12790 }
12791 LValue FiredLVal =
12792 CGF.EmitLValueForField(BaseLVal, FiredField);
12794 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12795 FiredLVal);
12796 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12797}
12798
12799namespace {
12800/// Checks if the lastprivate conditional variable is referenced in LHS.
12801class LastprivateConditionalRefChecker final
12802 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12804 const Expr *FoundE = nullptr;
12805 const Decl *FoundD = nullptr;
12806 StringRef UniqueDeclName;
12807 LValue IVLVal;
12808 llvm::Function *FoundFn = nullptr;
12809 SourceLocation Loc;
12810
12811public:
12812 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12814 llvm::reverse(LPM)) {
12815 auto It = D.DeclToUniqueName.find(E->getDecl());
12816 if (It == D.DeclToUniqueName.end())
12817 continue;
12818 if (D.Disabled)
12819 return false;
12820 FoundE = E;
12821 FoundD = E->getDecl()->getCanonicalDecl();
12822 UniqueDeclName = It->second;
12823 IVLVal = D.IVLVal;
12824 FoundFn = D.Fn;
12825 break;
12826 }
12827 return FoundE == E;
12828 }
12829 bool VisitMemberExpr(const MemberExpr *E) {
12831 return false;
12833 llvm::reverse(LPM)) {
12834 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12835 if (It == D.DeclToUniqueName.end())
12836 continue;
12837 if (D.Disabled)
12838 return false;
12839 FoundE = E;
12840 FoundD = E->getMemberDecl()->getCanonicalDecl();
12841 UniqueDeclName = It->second;
12842 IVLVal = D.IVLVal;
12843 FoundFn = D.Fn;
12844 break;
12845 }
12846 return FoundE == E;
12847 }
12848 bool VisitStmt(const Stmt *S) {
12849 for (const Stmt *Child : S->children()) {
12850 if (!Child)
12851 continue;
12852 if (const auto *E = dyn_cast<Expr>(Child))
12853 if (!E->isGLValue())
12854 continue;
12855 if (Visit(Child))
12856 return true;
12857 }
12858 return false;
12859 }
12860 explicit LastprivateConditionalRefChecker(
12861 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12862 : LPM(LPM) {}
12863 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12864 getFoundData() const {
12865 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12866 }
12867};
12868} // namespace
12869
12871 LValue IVLVal,
12872 StringRef UniqueDeclName,
12873 LValue LVal,
12874 SourceLocation Loc) {
12875 // Last updated loop counter for the lastprivate conditional var.
12876 // int<xx> last_iv = 0;
12877 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12878 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12879 LLIVTy, getName({UniqueDeclName, "iv"}));
12880 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12881 IVLVal.getAlignment().getAsAlign());
12882 LValue LastIVLVal =
12883 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12884
12885 // Last value of the lastprivate conditional.
12886 // decltype(priv_a) last_a;
12887 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12888 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12889 cast<llvm::GlobalVariable>(Last)->setAlignment(
12890 LVal.getAlignment().getAsAlign());
12891 LValue LastLVal =
12892 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12893
12894 // Global loop counter. Required to handle inner parallel-for regions.
12895 // iv
12896 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12897
12898 // #pragma omp critical(a)
12899 // if (last_iv <= iv) {
12900 // last_iv = iv;
12901 // last_a = priv_a;
12902 // }
12903 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12904 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12905 Action.Enter(CGF);
12906 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12907 // (last_iv <= iv) ? Check if the variable is updated and store new
12908 // value in global var.
12909 llvm::Value *CmpRes;
12910 if (IVLVal.getType()->isSignedIntegerType()) {
12911 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12912 } else {
12913 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12914 "Loop iteration variable must be integer.");
12915 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12916 }
12917 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12918 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12919 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12920 // {
12921 CGF.EmitBlock(ThenBB);
12922
12923 // last_iv = iv;
12924 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12925
12926 // last_a = priv_a;
12927 switch (CGF.getEvaluationKind(LVal.getType())) {
12928 case TEK_Scalar: {
12929 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12930 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12931 break;
12932 }
12933 case TEK_Complex: {
12934 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12935 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12936 break;
12937 }
12938 case TEK_Aggregate:
12939 llvm_unreachable(
12940 "Aggregates are not supported in lastprivate conditional.");
12941 }
12942 // }
12943 CGF.EmitBranch(ExitBB);
12944 // There is no need to emit line number for unconditional branch.
12946 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12947 };
12948
12949 if (CGM.getLangOpts().OpenMPSimd) {
12950 // Do not emit as a critical region as no parallel region could be emitted.
12951 RegionCodeGenTy ThenRCG(CodeGen);
12952 ThenRCG(CGF);
12953 } else {
12954 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12955 }
12956}
12957
12959 const Expr *LHS) {
12960 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12961 return;
12962 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12963 if (!Checker.Visit(LHS))
12964 return;
12965 const Expr *FoundE;
12966 const Decl *FoundD;
12967 StringRef UniqueDeclName;
12968 LValue IVLVal;
12969 llvm::Function *FoundFn;
12970 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12971 Checker.getFoundData();
12972 if (FoundFn != CGF.CurFn) {
12973 // Special codegen for inner parallel regions.
12974 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12975 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12976 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12977 "Lastprivate conditional is not found in outer region.");
12978 QualType StructTy = std::get<0>(It->getSecond());
12979 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12980 LValue PrivLVal = CGF.EmitLValue(FoundE);
12982 PrivLVal.getAddress(),
12983 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12984 CGF.ConvertTypeForMem(StructTy));
12985 LValue BaseLVal =
12986 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12987 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12988 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12989 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12990 FiredLVal, llvm::AtomicOrdering::Unordered,
12991 /*IsVolatile=*/true, /*isInit=*/false);
12992 return;
12993 }
12994
12995 // Private address of the lastprivate conditional in the current context.
12996 // priv_a
12997 LValue LVal = CGF.EmitLValue(FoundE);
12998 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12999 FoundE->getExprLoc());
13000}
13001
13004 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13005 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13006 return;
13007 auto Range = llvm::reverse(LastprivateConditionalStack);
13008 auto It = llvm::find_if(
13009 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13010 if (It == Range.end() || It->Fn != CGF.CurFn)
13011 return;
13012 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13013 assert(LPCI != LastprivateConditionalToTypes.end() &&
13014 "Lastprivates must be registered already.");
13016 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13017 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13018 for (const auto &Pair : It->DeclToUniqueName) {
13019 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13020 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13021 continue;
13022 auto I = LPCI->getSecond().find(Pair.first);
13023 assert(I != LPCI->getSecond().end() &&
13024 "Lastprivate must be rehistered already.");
13025 // bool Cmp = priv_a.Fired != 0;
13026 LValue BaseLVal = std::get<3>(I->getSecond());
13027 LValue FiredLVal =
13028 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13029 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13030 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13031 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13032 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13033 // if (Cmp) {
13034 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13035 CGF.EmitBlock(ThenBB);
13036 Address Addr = CGF.GetAddrOfLocalVar(VD);
13037 LValue LVal;
13038 if (VD->getType()->isReferenceType())
13039 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13041 else
13042 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13044 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13045 D.getBeginLoc());
13047 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13048 // }
13049 }
13050}
13051
13053 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13054 SourceLocation Loc) {
13055 if (CGF.getLangOpts().OpenMP < 50)
13056 return;
13057 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13058 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13059 "Unknown lastprivate conditional variable.");
13060 StringRef UniqueName = It->second;
13061 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13062 // The variable was not updated in the region - exit.
13063 if (!GV)
13064 return;
13065 LValue LPLVal = CGF.MakeRawAddrLValue(
13066 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13067 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13068 CGF.EmitStoreOfScalar(Res, PrivLVal);
13069}
13070
13073 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13074 const RegionCodeGenTy &CodeGen) {
13075 llvm_unreachable("Not supported in SIMD-only mode");
13076}
13077
13080 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13081 const RegionCodeGenTy &CodeGen) {
13082 llvm_unreachable("Not supported in SIMD-only mode");
13083}
13084
13086 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13087 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13088 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13089 bool Tied, unsigned &NumberOfParts) {
13090 llvm_unreachable("Not supported in SIMD-only mode");
13091}
13092
13094 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13095 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13096 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13097 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13098 llvm_unreachable("Not supported in SIMD-only mode");
13099}
13100
13102 CodeGenFunction &CGF, StringRef CriticalName,
13103 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13104 const Expr *Hint) {
13105 llvm_unreachable("Not supported in SIMD-only mode");
13106}
13107
13109 const RegionCodeGenTy &MasterOpGen,
13110 SourceLocation Loc) {
13111 llvm_unreachable("Not supported in SIMD-only mode");
13112}
13113
13115 const RegionCodeGenTy &MasterOpGen,
13116 SourceLocation Loc,
13117 const Expr *Filter) {
13118 llvm_unreachable("Not supported in SIMD-only mode");
13119}
13120
13122 SourceLocation Loc) {
13123 llvm_unreachable("Not supported in SIMD-only mode");
13124}
13125
13127 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13128 SourceLocation Loc) {
13129 llvm_unreachable("Not supported in SIMD-only mode");
13130}
13131
13133 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13134 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13136 ArrayRef<const Expr *> AssignmentOps) {
13137 llvm_unreachable("Not supported in SIMD-only mode");
13138}
13139
13141 const RegionCodeGenTy &OrderedOpGen,
13142 SourceLocation Loc,
13143 bool IsThreads) {
13144 llvm_unreachable("Not supported in SIMD-only mode");
13145}
13146
13148 SourceLocation Loc,
13150 bool EmitChecks,
13151 bool ForceSimpleCall) {
13152 llvm_unreachable("Not supported in SIMD-only mode");
13153}
13154
13157 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13158 bool Ordered, const DispatchRTInput &DispatchValues) {
13159 llvm_unreachable("Not supported in SIMD-only mode");
13160}
13161
13163 SourceLocation Loc) {
13164 llvm_unreachable("Not supported in SIMD-only mode");
13165}
13166
13169 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13170 llvm_unreachable("Not supported in SIMD-only mode");
13171}
13172
13175 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13176 llvm_unreachable("Not supported in SIMD-only mode");
13177}
13178
13180 SourceLocation Loc,
13181 unsigned IVSize,
13182 bool IVSigned) {
13183 llvm_unreachable("Not supported in SIMD-only mode");
13184}
13185
13187 SourceLocation Loc,
13188 OpenMPDirectiveKind DKind) {
13189 llvm_unreachable("Not supported in SIMD-only mode");
13190}
13191
13193 SourceLocation Loc,
13194 unsigned IVSize, bool IVSigned,
13195 Address IL, Address LB,
13196 Address UB, Address ST) {
13197 llvm_unreachable("Not supported in SIMD-only mode");
13198}
13199
13201 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13203 SourceLocation SeverityLoc, const Expr *Message,
13204 SourceLocation MessageLoc) {
13205 llvm_unreachable("Not supported in SIMD-only mode");
13206}
13207
13209 ProcBindKind ProcBind,
13210 SourceLocation Loc) {
13211 llvm_unreachable("Not supported in SIMD-only mode");
13212}
13213
13215 const VarDecl *VD,
13216 Address VDAddr,
13217 SourceLocation Loc) {
13218 llvm_unreachable("Not supported in SIMD-only mode");
13219}
13220
13222 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13223 CodeGenFunction *CGF) {
13224 llvm_unreachable("Not supported in SIMD-only mode");
13225}
13226
13228 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13229 llvm_unreachable("Not supported in SIMD-only mode");
13230}
13231
13234 SourceLocation Loc,
13235 llvm::AtomicOrdering AO) {
13236 llvm_unreachable("Not supported in SIMD-only mode");
13237}
13238
13240 const OMPExecutableDirective &D,
13241 llvm::Function *TaskFunction,
13242 QualType SharedsTy, Address Shareds,
13243 const Expr *IfCond,
13244 const OMPTaskDataTy &Data) {
13245 llvm_unreachable("Not supported in SIMD-only mode");
13246}
13247
13250 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13251 const Expr *IfCond, const OMPTaskDataTy &Data) {
13252 llvm_unreachable("Not supported in SIMD-only mode");
13253}
13254
13258 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13259 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13260 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13261 ReductionOps, Options);
13262}
13263
13266 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13267 llvm_unreachable("Not supported in SIMD-only mode");
13268}
13269
13271 SourceLocation Loc,
13272 bool IsWorksharingReduction) {
13273 llvm_unreachable("Not supported in SIMD-only mode");
13274}
13275
13277 SourceLocation Loc,
13278 ReductionCodeGen &RCG,
13279 unsigned N) {
13280 llvm_unreachable("Not supported in SIMD-only mode");
13281}
13282
13284 SourceLocation Loc,
13285 llvm::Value *ReductionsPtr,
13286 LValue SharedLVal) {
13287 llvm_unreachable("Not supported in SIMD-only mode");
13288}
13289
13291 SourceLocation Loc,
13292 const OMPTaskDataTy &Data) {
13293 llvm_unreachable("Not supported in SIMD-only mode");
13294}
13295
13298 OpenMPDirectiveKind CancelRegion) {
13299 llvm_unreachable("Not supported in SIMD-only mode");
13300}
13301
13303 SourceLocation Loc, const Expr *IfCond,
13304 OpenMPDirectiveKind CancelRegion) {
13305 llvm_unreachable("Not supported in SIMD-only mode");
13306}
13307
13309 const OMPExecutableDirective &D, StringRef ParentName,
13310 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13311 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13312 llvm_unreachable("Not supported in SIMD-only mode");
13313}
13314
13317 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13318 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13319 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13320 const OMPLoopDirective &D)>
13321 SizeEmitter) {
13322 llvm_unreachable("Not supported in SIMD-only mode");
13323}
13324
13326 llvm_unreachable("Not supported in SIMD-only mode");
13327}
13328
13330 llvm_unreachable("Not supported in SIMD-only mode");
13331}
13332
13334 return false;
13335}
13336
13338 const OMPExecutableDirective &D,
13339 SourceLocation Loc,
13340 llvm::Function *OutlinedFn,
13341 ArrayRef<llvm::Value *> CapturedVars) {
13342 llvm_unreachable("Not supported in SIMD-only mode");
13343}
13344
13346 const Expr *NumTeams,
13347 const Expr *ThreadLimit,
13348 SourceLocation Loc) {
13349 llvm_unreachable("Not supported in SIMD-only mode");
13350}
13351
13353 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13354 const Expr *Device, const RegionCodeGenTy &CodeGen,
13356 llvm_unreachable("Not supported in SIMD-only mode");
13357}
13358
13360 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13361 const Expr *Device) {
13362 llvm_unreachable("Not supported in SIMD-only mode");
13363}
13364
13366 const OMPLoopDirective &D,
13367 ArrayRef<Expr *> NumIterations) {
13368 llvm_unreachable("Not supported in SIMD-only mode");
13369}
13370
13372 const OMPDependClause *C) {
13373 llvm_unreachable("Not supported in SIMD-only mode");
13374}
13375
13377 const OMPDoacrossClause *C) {
13378 llvm_unreachable("Not supported in SIMD-only mode");
13379}
13380
13381const VarDecl *
13383 const VarDecl *NativeParam) const {
13384 llvm_unreachable("Not supported in SIMD-only mode");
13385}
13386
13387Address
13389 const VarDecl *NativeParam,
13390 const VarDecl *TargetParam) const {
13391 llvm_unreachable("Not supported in SIMD-only mode");
13392}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:851
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:944
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5270
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3723
Attr - This represents one attribute.
Definition Attr.h:45
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3931
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3965
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1353
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3971
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3959
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3962
This captures a statement into a function.
Definition Stmt.h:3918
const Capture * const_capture_iterator
Definition Stmt.h:4052
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4069
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4039
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4022
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1479
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4064
capture_range captures()
Definition Stmt.h:4056
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3213
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3222
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5484
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:177
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:245
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2402
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5039
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:226
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5658
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2627
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3232
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:296
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1575
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:676
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1633
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5294
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1691
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:656
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:740
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:352
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:347
Address getAddress() const
Definition CGValue.h:370
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:355
QualType getType() const
Definition CGValue.h:300
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:344
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3116
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3089
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3669
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4299
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4035
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4696
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3743
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3822
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5536
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:974
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3364
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3447
Expr * getBase() const
Definition Expr.h:3441
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5478
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents 'threadset' clause in the 'pragma omp task ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8293
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8333
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8478
QualType getCanonicalType() const
Definition TypeBase.h:8345
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4321
field_iterator field_end() const
Definition Decl.h:4527
field_range fields() const
Definition Decl.h:4524
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5225
bool field_empty() const
Definition Decl.h:4532
field_iterator field_begin() const
Definition Decl.cpp:5209
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1484
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4902
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8892
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9072
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2206
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8629
bool isPointerType() const
Definition TypeBase.h:8530
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8936
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9179
bool isReferenceType() const
Definition TypeBase.h:8554
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
bool isLValueReferenceType() const
Definition TypeBase.h:8558
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2412
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3120
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9065
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2801
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9165
bool isFloatingType() const
Definition Type.cpp:2305
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2254
bool isAnyPointerType() const
Definition TypeBase.h:8538
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9112
bool isRecordType() const
Definition TypeBase.h:8657
bool isUnionType() const
Definition Type.cpp:719
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2264
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2373
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2382
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3967
Expr * getSizeExpr() const
Definition TypeBase.h:3981
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:817
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:668
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5901
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:563
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5354
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.