clang 23.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateZExtOrTrunc(Size, ElemSizeOf->getType());
819 Size = CGF.Builder.CreateNUWAdd(
820 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
821 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
822 } else {
823 SizeInChars =
824 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
825 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
826 }
827 Sizes.emplace_back(SizeInChars, Size);
829 CGF,
831 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
832 RValue::get(Size));
833 CGF.EmitVariablyModifiedType(PrivateType);
834}
835
837 llvm::Value *Size) {
838 QualType PrivateType = getPrivateType(N);
839 if (!PrivateType->isVariablyModifiedType()) {
840 assert(!Size && !Sizes[N].second &&
841 "Size should be nullptr for non-variably modified reduction "
842 "items.");
843 return;
844 }
846 CGF,
848 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
849 RValue::get(Size));
850 CGF.EmitVariablyModifiedType(PrivateType);
851}
852
854 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
855 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
856 assert(SharedAddresses.size() > N && "No variable was generated");
857 const auto *PrivateVD =
858 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
859 const OMPDeclareReductionDecl *DRD =
860 getReductionInit(ClausesData[N].ReductionOp);
861 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
862 if (DRD && DRD->getInitializer())
863 (void)DefaultInit(CGF);
864 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
865 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
866 (void)DefaultInit(CGF);
867 QualType SharedType = SharedAddresses[N].first.getType();
868 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
869 PrivateAddr, SharedAddr, SharedType);
870 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
871 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
872 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
873 PrivateVD->getType().getQualifiers(),
874 /*IsInitializer=*/false);
875 }
876}
877
879 QualType PrivateType = getPrivateType(N);
880 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
881 return DTorKind != QualType::DK_none;
882}
883
885 Address PrivateAddr) {
886 QualType PrivateType = getPrivateType(N);
887 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
888 if (needCleanups(N)) {
889 PrivateAddr =
890 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
891 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
892 }
893}
894
895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
896 LValue BaseLV) {
897 BaseTy = BaseTy.getNonReferenceType();
898 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
899 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
900 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
901 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
902 } else {
903 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
904 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 }
906 BaseTy = BaseTy->getPointeeType();
907 }
908 return CGF.MakeAddrLValue(
909 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
910 BaseLV.getType(), BaseLV.getBaseInfo(),
911 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
912}
913
915 Address OriginalBaseAddress, llvm::Value *Addr) {
917 Address TopTmp = Address::invalid();
918 Address MostTopTmp = Address::invalid();
919 BaseTy = BaseTy.getNonReferenceType();
920 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
921 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
922 Tmp = CGF.CreateMemTemp(BaseTy);
923 if (TopTmp.isValid())
924 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
925 else
926 MostTopTmp = Tmp;
927 TopTmp = Tmp;
928 BaseTy = BaseTy->getPointeeType();
929 }
930
931 if (Tmp.isValid()) {
933 Addr, Tmp.getElementType());
934 CGF.Builder.CreateStore(Addr, Tmp);
935 return MostTopTmp;
936 }
937
939 Addr, OriginalBaseAddress.getType());
940 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
941}
942
943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
944 const VarDecl *OrigVD = nullptr;
945 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
946 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
948 Base = TempOASE->getBase()->IgnoreParenImpCasts();
949 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
950 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 OrigVD = cast<VarDecl>(DE->getDecl());
953 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
954 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 OrigVD = cast<VarDecl>(DE->getDecl());
959 }
960 return OrigVD;
961}
962
964 Address PrivateAddr) {
965 const DeclRefExpr *DE;
966 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
967 BaseDecls.emplace_back(OrigVD);
968 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
969 LValue BaseLValue =
970 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
971 OriginalBaseLValue);
972 Address SharedAddr = SharedAddresses[N].first.getAddress();
973 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
974 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
975 SharedAddr.emitRawPointer(CGF));
976 llvm::Value *PrivatePointer =
978 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
979 llvm::Value *Ptr = CGF.Builder.CreateGEP(
980 SharedAddr.getElementType(), PrivatePointer, Adjustment);
981 return castToBase(CGF, OrigVD->getType(),
982 SharedAddresses[N].first.getType(),
983 OriginalBaseLValue.getAddress(), Ptr);
984 }
985 BaseDecls.emplace_back(
986 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
987 return PrivateAddr;
988}
989
991 const OMPDeclareReductionDecl *DRD =
992 getReductionInit(ClausesData[N].ReductionOp);
993 return DRD && DRD->getInitializer();
994}
995
996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
997 return CGF.EmitLoadOfPointerLValue(
998 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
999 getThreadIDVariable()->getType()->castAs<PointerType>());
1000}
1001
1002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1003 if (!CGF.HaveInsertPoint())
1004 return;
1005 // 1.2.2 OpenMP Language Terminology
1006 // Structured block - An executable statement with a single entry at the
1007 // top and a single exit at the bottom.
1008 // The point of exit cannot be a branch out of the structured block.
1009 // longjmp() and throw() must not violate the entry/exit criteria.
1010 CGF.EHStack.pushTerminate();
1011 if (S)
1013 CodeGen(CGF);
1014 CGF.EHStack.popTerminate();
1015}
1016
1017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1018 CodeGenFunction &CGF) {
1019 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1020 getThreadIDVariable()->getType(),
1022}
1023
1025 QualType FieldTy) {
1026 auto *Field = FieldDecl::Create(
1027 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1028 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1029 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1030 Field->setAccess(AS_public);
1031 DC->addDecl(Field);
1032 return Field;
1033}
1034
1036 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1037 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1038 llvm::OpenMPIRBuilderConfig Config(
1039 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1040 CGM.getLangOpts().OpenMPOffloadMandatory,
1041 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1042 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1043 Config.setDefaultTargetAS(
1044 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1045 Config.setRuntimeCC(CGM.getRuntimeCC());
1046
1047 OMPBuilder.setConfig(Config);
1048 OMPBuilder.initialize();
1049 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1050 CGM.getLangOpts().OpenMPIsTargetDevice
1051 ? CGM.getLangOpts().OMPHostIRFile
1052 : StringRef{});
1053
1054 // The user forces the compiler to behave as if omp requires
1055 // unified_shared_memory was given.
1056 if (CGM.getLangOpts().OpenMPForceUSM) {
1058 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1059 }
1060}
1061
1063 InternalVars.clear();
1064 // Clean non-target variable declarations possibly used only in debug info.
1065 for (const auto &Data : EmittedNonTargetVariables) {
1066 if (!Data.getValue().pointsToAliveValue())
1067 continue;
1068 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1069 if (!GV)
1070 continue;
1071 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1072 continue;
1073 GV->eraseFromParent();
1074 }
1075}
1076
1078 return OMPBuilder.createPlatformSpecificName(Parts);
1079}
1080
1081static llvm::Function *
1083 const Expr *CombinerInitializer, const VarDecl *In,
1084 const VarDecl *Out, bool IsCombiner) {
1085 // void .omp_combiner.(Ty *in, Ty *out);
1086 ASTContext &C = CGM.getContext();
1087 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1088 FunctionArgList Args;
1089 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1092 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1093 Args.push_back(&OmpOutParm);
1094 Args.push_back(&OmpInParm);
1095 const CGFunctionInfo &FnInfo =
1096 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1097 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1098 std::string Name = CGM.getOpenMPRuntime().getName(
1099 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1100 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1101 Name, &CGM.getModule());
1102 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1103 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1104 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
1105 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1106 Fn->removeFnAttr(llvm::Attribute::NoInline);
1107 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1108 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1109 }
1110 CodeGenFunction CGF(CGM);
1111 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1112 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1113 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1114 Out->getLocation());
1116 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1117 Scope.addPrivate(
1118 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1121 Scope.addPrivate(
1122 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1123 .getAddress());
1124 (void)Scope.Privatize();
1125 if (!IsCombiner && Out->hasInit() &&
1126 !CGF.isTrivialInitializer(Out->getInit())) {
1127 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1128 Out->getType().getQualifiers(),
1129 /*IsInitializer=*/true);
1130 }
1131 if (CombinerInitializer)
1132 CGF.EmitIgnoredExpr(CombinerInitializer);
1133 Scope.ForceCleanup();
1134 CGF.FinishFunction();
1135 return Fn;
1136}
1137
1140 if (UDRMap.count(D) > 0)
1141 return;
1142 llvm::Function *Combiner = emitCombinerOrInitializer(
1143 CGM, D->getType(), D->getCombiner(),
1146 /*IsCombiner=*/true);
1147 llvm::Function *Initializer = nullptr;
1148 if (const Expr *Init = D->getInitializer()) {
1150 CGM, D->getType(),
1152 : nullptr,
1155 /*IsCombiner=*/false);
1156 }
1157 UDRMap.try_emplace(D, Combiner, Initializer);
1158 if (CGF)
1159 FunctionUDRMap[CGF->CurFn].push_back(D);
1160}
1161
1162std::pair<llvm::Function *, llvm::Function *>
1164 auto I = UDRMap.find(D);
1165 if (I != UDRMap.end())
1166 return I->second;
1167 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1168 return UDRMap.lookup(D);
1169}
1170
1171namespace {
1172// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1173// Builder if one is present.
1174struct PushAndPopStackRAII {
1175 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1176 bool HasCancel, llvm::omp::Directive Kind)
1177 : OMPBuilder(OMPBuilder) {
1178 if (!OMPBuilder)
1179 return;
1180
1181 // The following callback is the crucial part of clangs cleanup process.
1182 //
1183 // NOTE:
1184 // Once the OpenMPIRBuilder is used to create parallel regions (and
1185 // similar), the cancellation destination (Dest below) is determined via
1186 // IP. That means if we have variables to finalize we split the block at IP,
1187 // use the new block (=BB) as destination to build a JumpDest (via
1188 // getJumpDestInCurrentScope(BB)) which then is fed to
1189 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1190 // to push & pop an FinalizationInfo object.
1191 // The FiniCB will still be needed but at the point where the
1192 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1193 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1194 assert(IP.getBlock()->end() == IP.getPoint() &&
1195 "Clang CG should cause non-terminated block!");
1196 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1197 CGF.Builder.restoreIP(IP);
1199 CGF.getOMPCancelDestination(OMPD_parallel);
1200 CGF.EmitBranchThroughCleanup(Dest);
1201 return llvm::Error::success();
1202 };
1203
1204 // TODO: Remove this once we emit parallel regions through the
1205 // OpenMPIRBuilder as it can do this setup internally.
1206 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1207 OMPBuilder->pushFinalizationCB(std::move(FI));
1208 }
1209 ~PushAndPopStackRAII() {
1210 if (OMPBuilder)
1211 OMPBuilder->popFinalizationCB();
1212 }
1213 llvm::OpenMPIRBuilder *OMPBuilder;
1214};
1215} // namespace
1216
1218 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1219 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1220 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1221 assert(ThreadIDVar->getType()->isPointerType() &&
1222 "thread id variable must be of type kmp_int32 *");
1223 CodeGenFunction CGF(CGM, true);
1224 bool HasCancel = false;
1225 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1226 HasCancel = OPD->hasCancel();
1227 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1228 HasCancel = OPD->hasCancel();
1229 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1230 HasCancel = OPSD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD =
1241 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1242 HasCancel = OPFD->hasCancel();
1243
1244 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1245 // parallel region to make cancellation barriers work properly.
1246 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1247 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1248 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1249 HasCancel, OutlinedHelperName);
1250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1251 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1255 std::string Suffix = getName({"omp_outlined"});
1256 return (Name + Suffix).str();
1257}
1258
1260 return getOutlinedHelperName(CGF.CurFn->getName());
1261}
1262
1263std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1264 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1265 return (Name + Suffix).str();
1266}
1267
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1280 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1281 const RegionCodeGenTy &CodeGen) {
1282 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1284 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1285 CodeGen);
1286}
1287
1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1292 bool Tied, unsigned &NumberOfParts) {
1293 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1294 PrePostActionTy &) {
1295 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1296 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1297 llvm::Value *TaskArgs[] = {
1298 UpLoc, ThreadID,
1299 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1300 TaskTVar->getType()->castAs<PointerType>())
1301 .getPointer(CGF)};
1302 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1303 CGM.getModule(), OMPRTL___kmpc_omp_task),
1304 TaskArgs);
1305 };
1306 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1307 UntiedCodeGen);
1308 CodeGen.setAction(Action);
1309 assert(!ThreadIDVar->getType()->isPointerType() &&
1310 "thread id variable must be of type kmp_int32 for tasks");
1311 const OpenMPDirectiveKind Region =
1312 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1313 : OMPD_task;
1314 const CapturedStmt *CS = D.getCapturedStmt(Region);
1315 bool HasCancel = false;
1316 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1323 HasCancel = TD->hasCancel();
1324
1325 CodeGenFunction CGF(CGM, true);
1326 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1327 InnermostKind, HasCancel, Action);
1328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1329 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1330 if (!Tied)
1331 NumberOfParts = Action.getNumberOfParts();
1332 return Res;
1333}
1334
1336 bool AtCurrentPoint) {
1337 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1338 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1339
1340 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1341 if (AtCurrentPoint) {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1343 CGF.Builder.GetInsertBlock());
1344 } else {
1345 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1346 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1347 }
1348}
1349
1351 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1352 if (Elem.ServiceInsertPt) {
1353 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1354 Elem.ServiceInsertPt = nullptr;
1355 Ptr->eraseFromParent();
1356 }
1357}
1358
1360 SourceLocation Loc,
1361 SmallString<128> &Buffer) {
1362 llvm::raw_svector_ostream OS(Buffer);
1363 // Build debug location
1365 OS << ";";
1366 if (auto *DbgInfo = CGF.getDebugInfo())
1367 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1368 else
1369 OS << PLoc.getFilename();
1370 OS << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1375}
1376
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 std::string FileName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1392 if (auto *DbgInfo = CGF.getDebugInfo())
1393 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1394 else
1395 FileName = PLoc.getFilename();
1396 unsigned Line = PLoc.getLine();
1397 unsigned Column = PLoc.getColumn();
1398 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1399 Column, SrcLocStrSize);
1400 }
1401 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1402 return OMPBuilder.getOrCreateIdent(
1403 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1404}
1405
1407 SourceLocation Loc) {
1408 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1409 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1410 // the clang invariants used below might be broken.
1411 if (CGM.getLangOpts().OpenMPIRBuilder) {
1412 SmallString<128> Buffer;
1413 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1414 uint32_t SrcLocStrSize;
1415 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1416 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1417 return OMPBuilder.getOrCreateThreadID(
1418 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1419 }
1420
1421 llvm::Value *ThreadID = nullptr;
1422 // Check whether we've already cached a load of the thread id in this
1423 // function.
1424 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1425 if (I != OpenMPLocThreadIDMap.end()) {
1426 ThreadID = I->second.ThreadID;
1427 if (ThreadID != nullptr)
1428 return ThreadID;
1429 }
1430 // If exceptions are enabled, do not use parameter to avoid possible crash.
1431 if (auto *OMPRegionInfo =
1432 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1433 if (OMPRegionInfo->getThreadIDVariable()) {
1434 // Check if this an outlined function with thread id passed as argument.
1435 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1436 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1437 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1438 !CGF.getLangOpts().CXXExceptions ||
1439 CGF.Builder.GetInsertBlock() == TopBlock ||
1440 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1441 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1442 TopBlock ||
1443 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1444 CGF.Builder.GetInsertBlock()) {
1445 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1446 // If value loaded in entry block, cache it and use it everywhere in
1447 // function.
1448 if (CGF.Builder.GetInsertBlock() == TopBlock)
1449 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1450 return ThreadID;
1451 }
1452 }
1453 }
1454
1455 // This is not an outlined function region - need to call __kmpc_int32
1456 // kmpc_global_thread_num(ident_t *loc).
1457 // Generate thread id value and cache this value for use across the
1458 // function.
1459 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1460 if (!Elem.ServiceInsertPt)
1462 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1463 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1465 llvm::CallInst *Call = CGF.Builder.CreateCall(
1466 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1467 OMPRTL___kmpc_global_thread_num),
1468 emitUpdateLocation(CGF, Loc));
1469 Call->setCallingConv(CGF.getRuntimeCC());
1470 Elem.ThreadID = Call;
1471 return Call;
1472}
1473
1475 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1476 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1478 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1479 }
1480 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1481 for (const auto *D : I->second)
1482 UDRMap.erase(D);
1483 FunctionUDRMap.erase(I);
1484 }
1485 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1486 for (const auto *D : I->second)
1487 UDMMap.erase(D);
1488 FunctionUDMMap.erase(I);
1489 }
1492}
1493
1495 return OMPBuilder.IdentPtr;
1496}
1497
1498static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1534 break;
1535 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1537 break;
1538 default:
1539 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1540 break;
1541 }
1542}
1543
1544static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1545 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1546 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1547
1548 auto FileInfoCallBack = [&]() {
1550 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1551
1552 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554
1555 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1556 };
1557
1558 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1559 *CGM.getFileSystem(), ParentName);
1560}
1561
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1566 return CGM.getLLVMLinkageVarDefinition(VD);
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572 CGM.getContext().getPointerType(VD->getType()));
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1575 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576 VD->isExternallyVisible(),
1578 VD->getCanonicalDecl()->getBeginLoc()),
1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581 LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586}
1587
1588llvm::Constant *
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591 !CGM.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName({"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596}
1597
1599 const VarDecl *VD,
1600 Address VDAddr,
1601 SourceLocation Loc) {
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1603 CGM.getContext().getTargetInfo().isTLSSupported())
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 OMPBuilder.getOrCreateRuntimeFunction(
1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616 Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628 OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 OMPBuilder.getOrCreateRuntimeFunction(
1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638 Args);
1639}
1640
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1645 CGM.getContext().getTargetInfo().isTLSSupported())
1646 return nullptr;
1647
1648 VD = VD->getDefinition(CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1662 Args.push_back(&Dst);
1663
1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665 CGM.getContext().VoidPtrTy, Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667 std::string Name = getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1669 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671 Args, Loc, Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674 CGM.getContext().VoidPtrTy, Dst.getLocation());
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1681 CGM.getContext().VoidPtrTy, Dst.getLocation());
1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1694 Args.push_back(&Dst);
1695
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(&Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 // Copying constructor for the threadprivate variable.
1722 // Must be NULL - reserved by runtime, but currently it requires that this
1723 // parameter is always NULL. Otherwise it fires assertion.
1724 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1725 if (Ctor == nullptr) {
1726 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1727 }
1728 if (Dtor == nullptr) {
1729 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1730 }
1731 if (!CGF) {
1732 auto *InitFunctionTy =
1733 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1734 std::string Name = getName({"__omp_threadprivate_init_", ""});
1735 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1737 CodeGenFunction InitCGF(CGM);
1738 FunctionArgList ArgList;
1739 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1740 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1741 Loc, Loc);
1742 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 InitCGF.FinishFunction();
1744 return InitFunction;
1745 }
1746 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747 }
1748 return nullptr;
1749}
1750
1752 llvm::GlobalValue *GV) {
1753 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1755
1756 // We only need to handle active 'indirect' declare target functions.
1757 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758 return;
1759
1760 // Get a mangled name to store the new device global in.
1761 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1763 SmallString<128> Name;
1764 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765
1766 // We need to generate a new global to hold the address of the indirectly
1767 // called device function. Doing this allows us to keep the visibility and
1768 // linkage of the associated function unchanged while allowing the runtime to
1769 // access its value.
1770 llvm::GlobalValue *Addr = GV;
1771 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1773 CGM.getLLVMContext(),
1774 CGM.getModule().getDataLayout().getProgramAddressSpace());
1775 Addr = new llvm::GlobalVariable(
1776 CGM.getModule(), FnPtrTy,
1777 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1778 nullptr, llvm::GlobalValue::NotThreadLocal,
1779 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1780 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1781 }
1782
1783 // Register the indirect Vtable:
1784 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1785 // size field refers to the size of memory pointed to, not the size of
1786 // the pointer symbol itself (which is implicitly the size of a pointer).
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1789 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1794 const VarDecl *VD) {
1795 // TODO: add logic to avoid duplicate vtable registrations per
1796 // translation unit; though for external linkage, this should no
1797 // longer be an issue - or at least we can avoid the issue by
1798 // checking for an existing offloading entry. But, perhaps the
1799 // better approach is to defer emission of the vtables and offload
1800 // entries until later (by tracking a list of items that need to be
1801 // emitted).
1802
1803 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1804
1805 // Generate a new externally visible global to point to the
1806 // internally visible vtable. Doing this allows us to keep the
1807 // visibility and linkage of the associated vtable unchanged while
1808 // allowing the runtime to access its value. The externally
1809 // visible global var needs to be emitted with a unique mangled
1810 // name that won't conflict with similarly named (internal)
1811 // vtables in other translation units.
1812
1813 // Register vtable with source location of dynamic object in map
1814 // clause.
1815 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1817 VTable->getName());
1818
1819 llvm::GlobalVariable *Addr = VTable;
1820 SmallString<128> AddrName;
1821 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1822 AddrName.append("addr");
1823
1824 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1825 Addr = new llvm::GlobalVariable(
1826 CGM.getModule(), VTable->getType(),
1827 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1828 AddrName,
1829 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1830 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1831 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1832 }
1833 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1834 AddrName, VTable,
1835 CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1836 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1837 llvm::GlobalValue::WeakODRLinkage);
1838}
1839
1842 const VarDecl *VD) {
1843 // Register C++ VTable to OpenMP Offload Entry if it's a new
1844 // CXXRecordDecl.
1845 if (CXXRecord && CXXRecord->isDynamicClass() &&
1846 !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1847 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1848 if (Res.second) {
1849 CGM.EmitVTable(CXXRecord);
1850 CodeGenVTables VTables = CGM.getVTables();
1851 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1852 assert(VTablesAddr && "Expected non-null VTable address");
1853 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1854 // Emit VTable for all the fields containing dynamic CXXRecord
1855 for (const FieldDecl *Field : CXXRecord->fields()) {
1856 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1858 }
1859 // Emit VTable for all dynamic parent class
1860 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1861 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1862 emitAndRegisterVTable(CGM, BaseDecl, VD);
1863 }
1864 }
1865 }
1866}
1867
1869 // Register VTable by scanning through the map clause of OpenMP target region.
1870 // Get CXXRecordDecl and VarDecl from Expr.
1871 auto GetVTableDecl = [](const Expr *E) {
1872 QualType VDTy = E->getType();
1873 CXXRecordDecl *CXXRecord = nullptr;
1874 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1875 VDTy = RefType->getPointeeType();
1876 if (VDTy->isPointerType())
1878 else
1879 CXXRecord = VDTy->getAsCXXRecordDecl();
1880
1881 const VarDecl *VD = nullptr;
1882 if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
1883 VD = cast<VarDecl>(DRE->getDecl());
1884 } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
1885 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
1886 if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
1887 VD = BaseVD;
1888 }
1889 }
1890 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1891 };
1892 // Collect VTable from OpenMP map clause.
1893 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1894 for (const auto *E : C->varlist()) {
1895 auto DeclPair = GetVTableDecl(E);
1896 // Ensure VD is not null
1897 if (DeclPair.second)
1898 emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1899 }
1900 }
1901}
1902
1904 QualType VarType,
1905 StringRef Name) {
1906 std::string Suffix = getName({"artificial", ""});
1907 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1908 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1909 VarLVType, Twine(Name).concat(Suffix).str());
1910 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1911 CGM.getTarget().isTLSSupported()) {
1912 GAddr->setThreadLocal(/*Val=*/true);
1913 return Address(GAddr, GAddr->getValueType(),
1914 CGM.getContext().getTypeAlignInChars(VarType));
1915 }
1916 std::string CacheSuffix = getName({"cache", ""});
1917 llvm::Value *Args[] = {
1920 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1921 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1922 /*isSigned=*/false),
1923 OMPBuilder.getOrCreateInternalVariable(
1924 CGM.VoidPtrPtrTy,
1925 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1926 return Address(
1928 CGF.EmitRuntimeCall(
1929 OMPBuilder.getOrCreateRuntimeFunction(
1930 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1931 Args),
1932 CGF.Builder.getPtrTy(0)),
1933 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1934}
1935
1937 const RegionCodeGenTy &ThenGen,
1938 const RegionCodeGenTy &ElseGen) {
1939 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1940
1941 // If the condition constant folds and can be elided, try to avoid emitting
1942 // the condition and the dead arm of the if/else.
1943 bool CondConstant;
1944 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1945 if (CondConstant)
1946 ThenGen(CGF);
1947 else
1948 ElseGen(CGF);
1949 return;
1950 }
1951
1952 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1953 // emit the conditional branch.
1954 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1955 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1956 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1957 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1958
1959 // Emit the 'then' code.
1960 CGF.EmitBlock(ThenBlock);
1961 ThenGen(CGF);
1962 CGF.EmitBranch(ContBlock);
1963 // Emit the 'else' code if present.
1964 // There is no need to emit line number for unconditional branch.
1966 CGF.EmitBlock(ElseBlock);
1967 ElseGen(CGF);
1968 // There is no need to emit line number for unconditional branch.
1970 CGF.EmitBranch(ContBlock);
1971 // Emit the continuation block for code after the if.
1972 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1973}
1974
1976 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1977 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1978 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1979 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1980 if (!CGF.HaveInsertPoint())
1981 return;
1982 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1983 auto &M = CGM.getModule();
1984 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1985 this](CodeGenFunction &CGF, PrePostActionTy &) {
1986 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1987 llvm::Value *Args[] = {
1988 RTLoc,
1989 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1990 OutlinedFn};
1992 RealArgs.append(std::begin(Args), std::end(Args));
1993 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1994
1995 llvm::FunctionCallee RTLFn =
1996 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1997 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1998 };
1999 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2000 this](CodeGenFunction &CGF, PrePostActionTy &) {
2002 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2003 // Build calls:
2004 // __kmpc_serialized_parallel(&Loc, GTid);
2005 llvm::Value *Args[] = {RTLoc, ThreadID};
2006 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2007 M, OMPRTL___kmpc_serialized_parallel),
2008 Args);
2009
2010 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2011 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2012 RawAddress ZeroAddrBound =
2014 /*Name=*/".bound.zero.addr");
2015 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2017 // ThreadId for serialized parallels is 0.
2018 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
2019 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2020 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2021
2022 // Ensure we do not inline the function. This is trivially true for the ones
2023 // passed to __kmpc_fork_call but the ones called in serialized regions
2024 // could be inlined. This is not a perfect but it is closer to the invariant
2025 // we want, namely, every data environment starts with a new function.
2026 // TODO: We should pass the if condition to the runtime function and do the
2027 // handling there. Much cleaner code.
2028 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2029 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2031
2032 // __kmpc_end_serialized_parallel(&Loc, GTid);
2033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2034 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2035 M, OMPRTL___kmpc_end_serialized_parallel),
2036 EndArgs);
2037 };
2038 if (IfCond) {
2039 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2040 } else {
2041 RegionCodeGenTy ThenRCG(ThenGen);
2042 ThenRCG(CGF);
2043 }
2044}
2045
2046// If we're inside an (outlined) parallel region, use the region info's
2047// thread-ID variable (it is passed in a first argument of the outlined function
2048// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2049// regular serial code region, get thread ID by calling kmp_int32
2050// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2051// return the address of that temp.
2053 SourceLocation Loc) {
2054 if (auto *OMPRegionInfo =
2055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2056 if (OMPRegionInfo->getThreadIDVariable())
2057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2058
2059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2060 QualType Int32Ty =
2061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2062 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2063 CGF.EmitStoreOfScalar(ThreadID,
2064 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2065
2066 return ThreadIDTemp;
2067}
2068
2069llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2070 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2071 std::string Name = getName({Prefix, "var"});
2072 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2073}
2074
2075namespace {
2076/// Common pre(post)-action for different OpenMP constructs.
2077class CommonActionTy final : public PrePostActionTy {
2078 llvm::FunctionCallee EnterCallee;
2079 ArrayRef<llvm::Value *> EnterArgs;
2080 llvm::FunctionCallee ExitCallee;
2081 ArrayRef<llvm::Value *> ExitArgs;
2082 bool Conditional;
2083 llvm::BasicBlock *ContBlock = nullptr;
2084
2085public:
2086 CommonActionTy(llvm::FunctionCallee EnterCallee,
2087 ArrayRef<llvm::Value *> EnterArgs,
2088 llvm::FunctionCallee ExitCallee,
2089 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2090 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2091 ExitArgs(ExitArgs), Conditional(Conditional) {}
2092 void Enter(CodeGenFunction &CGF) override {
2093 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2094 if (Conditional) {
2095 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2096 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2097 ContBlock = CGF.createBasicBlock("omp_if.end");
2098 // Generate the branch (If-stmt)
2099 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2100 CGF.EmitBlock(ThenBlock);
2101 }
2102 }
2103 void Done(CodeGenFunction &CGF) {
2104 // Emit the rest of blocks/branches
2105 CGF.EmitBranch(ContBlock);
2106 CGF.EmitBlock(ContBlock, true);
2107 }
2108 void Exit(CodeGenFunction &CGF) override {
2109 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2110 }
2111};
2112} // anonymous namespace
2113
2115 StringRef CriticalName,
2116 const RegionCodeGenTy &CriticalOpGen,
2117 SourceLocation Loc, const Expr *Hint) {
2118 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2119 // CriticalOpGen();
2120 // __kmpc_end_critical(ident_t *, gtid, Lock);
2121 // Prepare arguments and build a call to __kmpc_critical
2122 if (!CGF.HaveInsertPoint())
2123 return;
2124 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2125 CGM.getModule(),
2126 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2127 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2128 unsigned LockVarArgIdx = 2;
2129 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2130 RuntimeFcn.getFunctionType()
2131 ->getParamType(LockVarArgIdx)
2132 ->getPointerAddressSpace())
2133 LockVar = CGF.Builder.CreateAddrSpaceCast(
2134 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2135 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2136 LockVar};
2137 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2138 std::end(Args));
2139 if (Hint) {
2140 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2141 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2142 }
2143 CommonActionTy Action(RuntimeFcn, EnterArgs,
2144 OMPBuilder.getOrCreateRuntimeFunction(
2145 CGM.getModule(), OMPRTL___kmpc_end_critical),
2146 Args);
2147 CriticalOpGen.setAction(Action);
2148 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2149}
2150
2152 const RegionCodeGenTy &MasterOpGen,
2153 SourceLocation Loc) {
2154 if (!CGF.HaveInsertPoint())
2155 return;
2156 // if(__kmpc_master(ident_t *, gtid)) {
2157 // MasterOpGen();
2158 // __kmpc_end_master(ident_t *, gtid);
2159 // }
2160 // Prepare arguments and build a call to __kmpc_master
2161 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2162 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2163 CGM.getModule(), OMPRTL___kmpc_master),
2164 Args,
2165 OMPBuilder.getOrCreateRuntimeFunction(
2166 CGM.getModule(), OMPRTL___kmpc_end_master),
2167 Args,
2168 /*Conditional=*/true);
2169 MasterOpGen.setAction(Action);
2170 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2171 Action.Done(CGF);
2172}
2173
2175 const RegionCodeGenTy &MaskedOpGen,
2176 SourceLocation Loc, const Expr *Filter) {
2177 if (!CGF.HaveInsertPoint())
2178 return;
2179 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2180 // MaskedOpGen();
2181 // __kmpc_end_masked(iden_t *, gtid);
2182 // }
2183 // Prepare arguments and build a call to __kmpc_masked
2184 llvm::Value *FilterVal = Filter
2185 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2186 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2187 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2188 FilterVal};
2189 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2190 getThreadID(CGF, Loc)};
2191 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2192 CGM.getModule(), OMPRTL___kmpc_masked),
2193 Args,
2194 OMPBuilder.getOrCreateRuntimeFunction(
2195 CGM.getModule(), OMPRTL___kmpc_end_masked),
2196 ArgsEnd,
2197 /*Conditional=*/true);
2198 MaskedOpGen.setAction(Action);
2199 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2200 Action.Done(CGF);
2201}
2202
2204 SourceLocation Loc) {
2205 if (!CGF.HaveInsertPoint())
2206 return;
2207 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2208 OMPBuilder.createTaskyield(CGF.Builder);
2209 } else {
2210 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2211 llvm::Value *Args[] = {
2212 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2213 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2214 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2215 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2216 Args);
2217 }
2218
2219 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2220 Region->emitUntiedSwitch(CGF);
2221}
2222
2224 const RegionCodeGenTy &TaskgroupOpGen,
2225 SourceLocation Loc) {
2226 if (!CGF.HaveInsertPoint())
2227 return;
2228 // __kmpc_taskgroup(ident_t *, gtid);
2229 // TaskgroupOpGen();
2230 // __kmpc_end_taskgroup(ident_t *, gtid);
2231 // Prepare arguments and build a call to __kmpc_taskgroup
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2238 Args);
2239 TaskgroupOpGen.setAction(Action);
2240 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2241}
2242
2243/// Given an array of pointers to variables, project the address of a
2244/// given variable.
2246 unsigned Index, const VarDecl *Var) {
2247 // Pull out the pointer to the variable.
2248 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2249 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2250
2251 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2252 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2253}
2254
2256 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2257 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2258 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2259 SourceLocation Loc) {
2260 ASTContext &C = CGM.getContext();
2261 // void copy_func(void *LHSArg, void *RHSArg);
2262 FunctionArgList Args;
2263 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2265 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2267 Args.push_back(&LHSArg);
2268 Args.push_back(&RHSArg);
2269 const auto &CGFI =
2270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2271 std::string Name =
2272 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2273 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2274 llvm::GlobalValue::InternalLinkage, Name,
2275 &CGM.getModule());
2277 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2278 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
2279 Fn->setDoesNotRecurse();
2280 CodeGenFunction CGF(CGM);
2281 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2282 // Dest = (void*[n])(LHSArg);
2283 // Src = (void*[n])(RHSArg);
2285 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2286 CGF.Builder.getPtrTy(0)),
2287 ArgsElemType, CGF.getPointerAlign());
2289 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2290 CGF.Builder.getPtrTy(0)),
2291 ArgsElemType, CGF.getPointerAlign());
2292 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2293 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2294 // ...
2295 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2296 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2297 const auto *DestVar =
2298 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2299 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2300
2301 const auto *SrcVar =
2302 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2303 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2304
2305 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2306 QualType Type = VD->getType();
2307 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2308 }
2309 CGF.FinishFunction();
2310 return Fn;
2311}
2312
2314 const RegionCodeGenTy &SingleOpGen,
2315 SourceLocation Loc,
2316 ArrayRef<const Expr *> CopyprivateVars,
2317 ArrayRef<const Expr *> SrcExprs,
2318 ArrayRef<const Expr *> DstExprs,
2319 ArrayRef<const Expr *> AssignmentOps) {
2320 if (!CGF.HaveInsertPoint())
2321 return;
2322 assert(CopyprivateVars.size() == SrcExprs.size() &&
2323 CopyprivateVars.size() == DstExprs.size() &&
2324 CopyprivateVars.size() == AssignmentOps.size());
2325 ASTContext &C = CGM.getContext();
2326 // int32 did_it = 0;
2327 // if(__kmpc_single(ident_t *, gtid)) {
2328 // SingleOpGen();
2329 // __kmpc_end_single(ident_t *, gtid);
2330 // did_it = 1;
2331 // }
2332 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2333 // <copy_func>, did_it);
2334
2335 Address DidIt = Address::invalid();
2336 if (!CopyprivateVars.empty()) {
2337 // int32 did_it = 0;
2338 QualType KmpInt32Ty =
2339 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2340 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2341 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2342 }
2343 // Prepare arguments and build a call to __kmpc_single
2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2345 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2346 CGM.getModule(), OMPRTL___kmpc_single),
2347 Args,
2348 OMPBuilder.getOrCreateRuntimeFunction(
2349 CGM.getModule(), OMPRTL___kmpc_end_single),
2350 Args,
2351 /*Conditional=*/true);
2352 SingleOpGen.setAction(Action);
2353 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2354 if (DidIt.isValid()) {
2355 // did_it = 1;
2356 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2357 }
2358 Action.Done(CGF);
2359 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2360 // <copy_func>, did_it);
2361 if (DidIt.isValid()) {
2362 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2363 QualType CopyprivateArrayTy = C.getConstantArrayType(
2364 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2365 /*IndexTypeQuals=*/0);
2366 // Create a list of all private variables for copyprivate.
2367 Address CopyprivateList =
2368 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2369 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2370 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2371 CGF.Builder.CreateStore(
2373 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2374 CGF.VoidPtrTy),
2375 Elem);
2376 }
2377 // Build function that copies private values from single region to all other
2378 // threads in the corresponding parallel region.
2379 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2380 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2381 SrcExprs, DstExprs, AssignmentOps, Loc);
2382 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2384 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2385 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2386 llvm::Value *Args[] = {
2387 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2388 getThreadID(CGF, Loc), // i32 <gtid>
2389 BufSize, // size_t <buf_size>
2390 CL.emitRawPointer(CGF), // void *<copyprivate list>
2391 CpyFn, // void (*) (void *, void *) <copy_func>
2392 DidItVal // i32 did_it
2393 };
2394 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2395 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2396 Args);
2397 }
2398}
2399
2401 const RegionCodeGenTy &OrderedOpGen,
2402 SourceLocation Loc, bool IsThreads) {
2403 if (!CGF.HaveInsertPoint())
2404 return;
2405 // __kmpc_ordered(ident_t *, gtid);
2406 // OrderedOpGen();
2407 // __kmpc_end_ordered(ident_t *, gtid);
2408 // Prepare arguments and build a call to __kmpc_ordered
2409 if (IsThreads) {
2410 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2411 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2412 CGM.getModule(), OMPRTL___kmpc_ordered),
2413 Args,
2414 OMPBuilder.getOrCreateRuntimeFunction(
2415 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2416 Args);
2417 OrderedOpGen.setAction(Action);
2418 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2419 return;
2420 }
2421 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2422}
2423
2425 unsigned Flags;
2426 if (Kind == OMPD_for)
2427 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2428 else if (Kind == OMPD_sections)
2429 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2430 else if (Kind == OMPD_single)
2431 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2432 else if (Kind == OMPD_barrier)
2433 Flags = OMP_IDENT_BARRIER_EXPL;
2434 else
2435 Flags = OMP_IDENT_BARRIER_IMPL;
2436 return Flags;
2437}
2438
2440 CodeGenFunction &CGF, const OMPLoopDirective &S,
2441 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2442 // Check if the loop directive is actually a doacross loop directive. In this
2443 // case choose static, 1 schedule.
2444 if (llvm::any_of(
2445 S.getClausesOfKind<OMPOrderedClause>(),
2446 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2447 ScheduleKind = OMPC_SCHEDULE_static;
2448 // Chunk size is 1 in this case.
2449 llvm::APInt ChunkSize(32, 1);
2450 ChunkExpr = IntegerLiteral::Create(
2451 CGF.getContext(), ChunkSize,
2452 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2453 SourceLocation());
2454 }
2455}
2456
2458 OpenMPDirectiveKind Kind, bool EmitChecks,
2459 bool ForceSimpleCall) {
2460 // Check if we should use the OMPBuilder
2461 auto *OMPRegionInfo =
2462 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2463 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2464 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2465 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2466 EmitChecks));
2467 CGF.Builder.restoreIP(AfterIP);
2468 return;
2469 }
2470
2471 if (!CGF.HaveInsertPoint())
2472 return;
2473 // Build call __kmpc_cancel_barrier(loc, thread_id);
2474 // Build call __kmpc_barrier(loc, thread_id);
2475 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2476 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2477 // thread_id);
2478 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2479 getThreadID(CGF, Loc)};
2480 if (OMPRegionInfo) {
2481 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2482 llvm::Value *Result = CGF.EmitRuntimeCall(
2483 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2484 OMPRTL___kmpc_cancel_barrier),
2485 Args);
2486 if (EmitChecks) {
2487 // if (__kmpc_cancel_barrier()) {
2488 // exit from construct;
2489 // }
2490 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2491 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2492 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2493 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2494 CGF.EmitBlock(ExitBB);
2495 // exit from construct;
2496 CodeGenFunction::JumpDest CancelDestination =
2497 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2498 CGF.EmitBranchThroughCleanup(CancelDestination);
2499 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2500 }
2501 return;
2502 }
2503 }
2504 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2505 CGM.getModule(), OMPRTL___kmpc_barrier),
2506 Args);
2507}
2508
2510 Expr *ME, bool IsFatal) {
2511 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2512 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2513 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2514 // *message)
2515 llvm::Value *Args[] = {
2516 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2517 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2518 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2519 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2520 CGM.getModule(), OMPRTL___kmpc_error),
2521 Args);
2522}
2523
2524/// Map the OpenMP loop schedule to the runtime enumeration.
2525static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2526 bool Chunked, bool Ordered) {
2527 switch (ScheduleKind) {
2528 case OMPC_SCHEDULE_static:
2529 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2530 : (Ordered ? OMP_ord_static : OMP_sch_static);
2531 case OMPC_SCHEDULE_dynamic:
2532 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2533 case OMPC_SCHEDULE_guided:
2534 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2535 case OMPC_SCHEDULE_runtime:
2536 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2537 case OMPC_SCHEDULE_auto:
2538 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2540 assert(!Chunked && "chunk was specified but schedule kind not known");
2541 return Ordered ? OMP_ord_static : OMP_sch_static;
2542 }
2543 llvm_unreachable("Unexpected runtime schedule");
2544}
2545
2546/// Map the OpenMP distribute schedule to the runtime enumeration.
2547static OpenMPSchedType
2549 // only static is allowed for dist_schedule
2550 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2551}
2552
2554 bool Chunked) const {
2555 OpenMPSchedType Schedule =
2556 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2557 return Schedule == OMP_sch_static;
2558}
2559
2561 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2562 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2563 return Schedule == OMP_dist_sch_static;
2564}
2565
2567 bool Chunked) const {
2568 OpenMPSchedType Schedule =
2569 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2570 return Schedule == OMP_sch_static_chunked;
2571}
2572
2574 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2575 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2576 return Schedule == OMP_dist_sch_static_chunked;
2577}
2578
2580 OpenMPSchedType Schedule =
2581 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2582 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2583 return Schedule != OMP_sch_static;
2584}
2585
2586static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2589 int Modifier = 0;
2590 switch (M1) {
2591 case OMPC_SCHEDULE_MODIFIER_monotonic:
2592 Modifier = OMP_sch_modifier_monotonic;
2593 break;
2594 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2595 Modifier = OMP_sch_modifier_nonmonotonic;
2596 break;
2597 case OMPC_SCHEDULE_MODIFIER_simd:
2598 if (Schedule == OMP_sch_static_chunked)
2599 Schedule = OMP_sch_static_balanced_chunked;
2600 break;
2603 break;
2604 }
2605 switch (M2) {
2606 case OMPC_SCHEDULE_MODIFIER_monotonic:
2607 Modifier = OMP_sch_modifier_monotonic;
2608 break;
2609 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2610 Modifier = OMP_sch_modifier_nonmonotonic;
2611 break;
2612 case OMPC_SCHEDULE_MODIFIER_simd:
2613 if (Schedule == OMP_sch_static_chunked)
2614 Schedule = OMP_sch_static_balanced_chunked;
2615 break;
2618 break;
2619 }
2620 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2621 // If the static schedule kind is specified or if the ordered clause is
2622 // specified, and if the nonmonotonic modifier is not specified, the effect is
2623 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2624 // modifier is specified, the effect is as if the nonmonotonic modifier is
2625 // specified.
2626 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2627 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2628 Schedule == OMP_sch_static_balanced_chunked ||
2629 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2630 Schedule == OMP_dist_sch_static_chunked ||
2631 Schedule == OMP_dist_sch_static))
2632 Modifier = OMP_sch_modifier_nonmonotonic;
2633 }
2634 return Schedule | Modifier;
2635}
2636
2639 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2640 bool Ordered, const DispatchRTInput &DispatchValues) {
2641 if (!CGF.HaveInsertPoint())
2642 return;
2643 OpenMPSchedType Schedule = getRuntimeSchedule(
2644 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2645 assert(Ordered ||
2646 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2647 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2648 Schedule != OMP_sch_static_balanced_chunked));
2649 // Call __kmpc_dispatch_init(
2650 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2651 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2652 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2653
2654 // If the Chunk was not specified in the clause - use default value 1.
2655 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2656 : CGF.Builder.getIntN(IVSize, 1);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc),
2659 getThreadID(CGF, Loc),
2660 CGF.Builder.getInt32(addMonoNonMonoModifier(
2661 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2662 DispatchValues.LB, // Lower
2663 DispatchValues.UB, // Upper
2664 CGF.Builder.getIntN(IVSize, 1), // Stride
2665 Chunk // Chunk
2666 };
2667 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2668 Args);
2669}
2670
2672 SourceLocation Loc) {
2673 if (!CGF.HaveInsertPoint())
2674 return;
2675 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2676 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2677 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2678}
2679
2681 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2682 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2684 const CGOpenMPRuntime::StaticRTInput &Values) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687
2688 assert(!Values.Ordered);
2689 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2690 Schedule == OMP_sch_static_balanced_chunked ||
2691 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2692 Schedule == OMP_dist_sch_static ||
2693 Schedule == OMP_dist_sch_static_chunked);
2694
2695 // Call __kmpc_for_static_init(
2696 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2697 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2698 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2699 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2700 llvm::Value *Chunk = Values.Chunk;
2701 if (Chunk == nullptr) {
2702 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2703 Schedule == OMP_dist_sch_static) &&
2704 "expected static non-chunked schedule");
2705 // If the Chunk was not specified in the clause - use default value 1.
2706 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2707 } else {
2708 assert((Schedule == OMP_sch_static_chunked ||
2709 Schedule == OMP_sch_static_balanced_chunked ||
2710 Schedule == OMP_ord_static_chunked ||
2711 Schedule == OMP_dist_sch_static_chunked) &&
2712 "expected static chunked schedule");
2713 }
2714 llvm::Value *Args[] = {
2715 UpdateLocation,
2716 ThreadId,
2717 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2718 M2)), // Schedule type
2719 Values.IL.emitRawPointer(CGF), // &isLastIter
2720 Values.LB.emitRawPointer(CGF), // &LB
2721 Values.UB.emitRawPointer(CGF), // &UB
2722 Values.ST.emitRawPointer(CGF), // &Stride
2723 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2724 Chunk // Chunk
2725 };
2726 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2727}
2728
2730 SourceLocation Loc,
2731 OpenMPDirectiveKind DKind,
2732 const OpenMPScheduleTy &ScheduleKind,
2733 const StaticRTInput &Values) {
2734 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2735 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2736 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2737 "Expected loop-based or sections-based directive.");
2738 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2740 ? OMP_IDENT_WORK_LOOP
2741 : OMP_IDENT_WORK_SECTIONS);
2742 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2743 llvm::FunctionCallee StaticInitFunction =
2744 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2745 false);
2747 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2748 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2749}
2750
2754 const CGOpenMPRuntime::StaticRTInput &Values) {
2755 OpenMPSchedType ScheduleNum =
2756 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2757 llvm::Value *UpdatedLocation =
2758 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2759 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2760 llvm::FunctionCallee StaticInitFunction;
2761 bool isGPUDistribute =
2762 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2763 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2764 Values.IVSize, Values.IVSigned, isGPUDistribute);
2765
2766 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2767 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2769}
2770
2772 SourceLocation Loc,
2773 OpenMPDirectiveKind DKind) {
2774 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2775 DKind == OMPD_sections) &&
2776 "Expected distribute, for, or sections directive kind");
2777 if (!CGF.HaveInsertPoint())
2778 return;
2779 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2780 llvm::Value *Args[] = {
2781 emitUpdateLocation(CGF, Loc,
2783 (DKind == OMPD_target_teams_loop)
2784 ? OMP_IDENT_WORK_DISTRIBUTE
2785 : isOpenMPLoopDirective(DKind)
2786 ? OMP_IDENT_WORK_LOOP
2787 : OMP_IDENT_WORK_SECTIONS),
2788 getThreadID(CGF, Loc)};
2790 if (isOpenMPDistributeDirective(DKind) &&
2791 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2792 CGF.EmitRuntimeCall(
2793 OMPBuilder.getOrCreateRuntimeFunction(
2794 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2795 Args);
2796 else
2797 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2798 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2799 Args);
2800}
2801
2803 SourceLocation Loc,
2804 unsigned IVSize,
2805 bool IVSigned) {
2806 if (!CGF.HaveInsertPoint())
2807 return;
2808 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2809 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2810 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2811 Args);
2812}
2813
2815 SourceLocation Loc, unsigned IVSize,
2816 bool IVSigned, Address IL,
2817 Address LB, Address UB,
2818 Address ST) {
2819 // Call __kmpc_dispatch_next(
2820 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2821 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2822 // kmp_int[32|64] *p_stride);
2823 llvm::Value *Args[] = {
2824 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2825 IL.emitRawPointer(CGF), // &isLastIter
2826 LB.emitRawPointer(CGF), // &Lower
2827 UB.emitRawPointer(CGF), // &Upper
2828 ST.emitRawPointer(CGF) // &Stride
2829 };
2830 llvm::Value *Call = CGF.EmitRuntimeCall(
2831 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2832 return CGF.EmitScalarConversion(
2833 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2834 CGF.getContext().BoolTy, Loc);
2835}
2836
2838 const Expr *Message,
2839 SourceLocation Loc) {
2840 if (!Message)
2841 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2842 return CGF.EmitScalarExpr(Message);
2843}
2844
2845llvm::Value *
2847 SourceLocation Loc) {
2848 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2849 // as if sev-level is fatal."
2850 return llvm::ConstantInt::get(CGM.Int32Ty,
2851 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2852}
2853
2855 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2857 SourceLocation SeverityLoc, const Expr *Message,
2858 SourceLocation MessageLoc) {
2859 if (!CGF.HaveInsertPoint())
2860 return;
2862 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2863 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2864 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2865 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2866 // messsage) if strict modifier is used.
2867 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2868 if (Modifier == OMPC_NUMTHREADS_strict) {
2869 FnID = OMPRTL___kmpc_push_num_threads_strict;
2870 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2871 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2872 }
2873 CGF.EmitRuntimeCall(
2874 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2875}
2876
2878 ProcBindKind ProcBind,
2879 SourceLocation Loc) {
2880 if (!CGF.HaveInsertPoint())
2881 return;
2882 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2883 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2884 llvm::Value *Args[] = {
2885 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2886 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2889 Args);
2890}
2891
2893 SourceLocation Loc, llvm::AtomicOrdering AO) {
2894 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2895 OMPBuilder.createFlush(CGF.Builder);
2896 } else {
2897 if (!CGF.HaveInsertPoint())
2898 return;
2899 // Build call void __kmpc_flush(ident_t *loc)
2900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2901 CGM.getModule(), OMPRTL___kmpc_flush),
2902 emitUpdateLocation(CGF, Loc));
2903 }
2904}
2905
2906namespace {
2907/// Indexes of fields for type kmp_task_t.
2908enum KmpTaskTFields {
2909 /// List of shared variables.
2910 KmpTaskTShareds,
2911 /// Task routine.
2912 KmpTaskTRoutine,
2913 /// Partition id for the untied tasks.
2914 KmpTaskTPartId,
2915 /// Function with call of destructors for private variables.
2916 Data1,
2917 /// Task priority.
2918 Data2,
2919 /// (Taskloops only) Lower bound.
2920 KmpTaskTLowerBound,
2921 /// (Taskloops only) Upper bound.
2922 KmpTaskTUpperBound,
2923 /// (Taskloops only) Stride.
2924 KmpTaskTStride,
2925 /// (Taskloops only) Is last iteration flag.
2926 KmpTaskTLastIter,
2927 /// (Taskloops only) Reduction data.
2928 KmpTaskTReductions,
2929};
2930} // anonymous namespace
2931
2933 // If we are in simd mode or there are no entries, we don't need to do
2934 // anything.
2935 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2936 return;
2937
2938 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2939 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2940 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2941 SourceLocation Loc;
2942 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2943 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2944 E = CGM.getContext().getSourceManager().fileinfo_end();
2945 I != E; ++I) {
2946 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2947 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2948 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2949 I->getFirst(), EntryInfo.Line, 1);
2950 break;
2951 }
2952 }
2953 }
2954 switch (Kind) {
2955 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2956 CGM.getDiags().Report(Loc,
2957 diag::err_target_region_offloading_entry_incorrect)
2958 << EntryInfo.ParentName;
2959 } break;
2960 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2961 CGM.getDiags().Report(
2962 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2963 << EntryInfo.ParentName;
2964 } break;
2965 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2966 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2967 } break;
2968 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2969 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2970 DiagnosticsEngine::Error, "Offloading entry for indirect declare "
2971 "target variable is incorrect: the "
2972 "address is invalid.");
2973 CGM.getDiags().Report(DiagID);
2974 } break;
2975 }
2976 };
2977
2978 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2979}
2980
2982 if (!KmpRoutineEntryPtrTy) {
2983 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2984 ASTContext &C = CGM.getContext();
2985 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2987 KmpRoutineEntryPtrQTy = C.getPointerType(
2988 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2989 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2990 }
2991}
2992
2993namespace {
2994struct PrivateHelpersTy {
2995 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2996 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2997 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2998 PrivateElemInit(PrivateElemInit) {}
2999 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3000 const Expr *OriginalRef = nullptr;
3001 const VarDecl *Original = nullptr;
3002 const VarDecl *PrivateCopy = nullptr;
3003 const VarDecl *PrivateElemInit = nullptr;
3004 bool isLocalPrivate() const {
3005 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3006 }
3007};
3008typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3009} // anonymous namespace
3010
3011static bool isAllocatableDecl(const VarDecl *VD) {
3012 const VarDecl *CVD = VD->getCanonicalDecl();
3013 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3014 return false;
3015 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3016 // Use the default allocation.
3017 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3018 !AA->getAllocator());
3019}
3020
3021static RecordDecl *
3023 if (!Privates.empty()) {
3024 ASTContext &C = CGM.getContext();
3025 // Build struct .kmp_privates_t. {
3026 // /* private vars */
3027 // };
3028 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3029 RD->startDefinition();
3030 for (const auto &Pair : Privates) {
3031 const VarDecl *VD = Pair.second.Original;
3033 // If the private variable is a local variable with lvalue ref type,
3034 // allocate the pointer instead of the pointee type.
3035 if (Pair.second.isLocalPrivate()) {
3036 if (VD->getType()->isLValueReferenceType())
3037 Type = C.getPointerType(Type);
3038 if (isAllocatableDecl(VD))
3039 Type = C.getPointerType(Type);
3040 }
3042 if (VD->hasAttrs()) {
3043 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3044 E(VD->getAttrs().end());
3045 I != E; ++I)
3046 FD->addAttr(*I);
3047 }
3048 }
3049 RD->completeDefinition();
3050 return RD;
3051 }
3052 return nullptr;
3053}
3054
3055static RecordDecl *
3057 QualType KmpInt32Ty,
3058 QualType KmpRoutineEntryPointerQTy) {
3059 ASTContext &C = CGM.getContext();
3060 // Build struct kmp_task_t {
3061 // void * shareds;
3062 // kmp_routine_entry_t routine;
3063 // kmp_int32 part_id;
3064 // kmp_cmplrdata_t data1;
3065 // kmp_cmplrdata_t data2;
3066 // For taskloops additional fields:
3067 // kmp_uint64 lb;
3068 // kmp_uint64 ub;
3069 // kmp_int64 st;
3070 // kmp_int32 liter;
3071 // void * reductions;
3072 // };
3073 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
3074 UD->startDefinition();
3075 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3076 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3077 UD->completeDefinition();
3078 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
3079 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3080 RD->startDefinition();
3081 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3082 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3083 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3084 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3085 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3086 if (isOpenMPTaskLoopDirective(Kind)) {
3087 QualType KmpUInt64Ty =
3088 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3089 QualType KmpInt64Ty =
3090 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3091 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3092 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3093 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3094 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3095 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3096 }
3097 RD->completeDefinition();
3098 return RD;
3099}
3100
3101static RecordDecl *
3104 ASTContext &C = CGM.getContext();
3105 // Build struct kmp_task_t_with_privates {
3106 // kmp_task_t task_data;
3107 // .kmp_privates_t. privates;
3108 // };
3109 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3110 RD->startDefinition();
3111 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3112 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3113 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
3114 RD->completeDefinition();
3115 return RD;
3116}
3117
3118/// Emit a proxy function which accepts kmp_task_t as the second
3119/// argument.
3120/// \code
3121/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3122/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3123/// For taskloops:
3124/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3125/// tt->reductions, tt->shareds);
3126/// return 0;
3127/// }
3128/// \endcode
3129static llvm::Function *
3131 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3132 QualType KmpTaskTWithPrivatesPtrQTy,
3133 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3134 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3135 llvm::Value *TaskPrivatesMap) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3140 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3141 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3143 Args.push_back(&GtidArg);
3144 Args.push_back(&TaskTypeArg);
3145 const auto &TaskEntryFnInfo =
3146 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3147 llvm::FunctionType *TaskEntryTy =
3148 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3149 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3150 auto *TaskEntry = llvm::Function::Create(
3151 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3152 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3153 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3154 TaskEntry->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3155 TaskEntry->setDoesNotRecurse();
3156 CodeGenFunction CGF(CGM);
3157 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3158 Loc, Loc);
3159
3160 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3161 // tt,
3162 // For taskloops:
3163 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3164 // tt->task_data.shareds);
3165 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3166 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3167 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3168 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3169 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3170 const auto *KmpTaskTWithPrivatesQTyRD =
3171 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3172 LValue Base =
3173 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3174 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3175 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3176 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3177 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3178
3179 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3180 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3181 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3182 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3183 CGF.ConvertTypeForMem(SharedsPtrTy));
3184
3185 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3186 llvm::Value *PrivatesParam;
3187 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3188 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3189 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3190 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3191 } else {
3192 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3193 }
3194
3195 llvm::Value *CommonArgs[] = {
3196 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3197 CGF.Builder
3198 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3199 CGF.VoidPtrTy, CGF.Int8Ty)
3200 .emitRawPointer(CGF)};
3201 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3202 std::end(CommonArgs));
3203 if (isOpenMPTaskLoopDirective(Kind)) {
3204 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3205 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3206 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3207 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3208 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3209 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3210 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3211 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3212 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3213 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3214 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3215 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3216 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3217 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3218 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3219 CallArgs.push_back(LBParam);
3220 CallArgs.push_back(UBParam);
3221 CallArgs.push_back(StParam);
3222 CallArgs.push_back(LIParam);
3223 CallArgs.push_back(RParam);
3224 }
3225 CallArgs.push_back(SharedsParam);
3226
3227 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3228 CallArgs);
3229 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3230 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3231 CGF.FinishFunction();
3232 return TaskEntry;
3233}
3234
3236 SourceLocation Loc,
3237 QualType KmpInt32Ty,
3238 QualType KmpTaskTWithPrivatesPtrQTy,
3239 QualType KmpTaskTWithPrivatesQTy) {
3240 ASTContext &C = CGM.getContext();
3241 FunctionArgList Args;
3242 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3244 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3245 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3247 Args.push_back(&GtidArg);
3248 Args.push_back(&TaskTypeArg);
3249 const auto &DestructorFnInfo =
3250 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3251 llvm::FunctionType *DestructorFnTy =
3252 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3253 std::string Name =
3254 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3255 auto *DestructorFn =
3256 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3257 Name, &CGM.getModule());
3258 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3259 DestructorFnInfo);
3260 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3261 DestructorFn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3262 DestructorFn->setDoesNotRecurse();
3263 CodeGenFunction CGF(CGM);
3264 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3265 Args, Loc, Loc);
3266
3267 LValue Base = CGF.EmitLoadOfPointerLValue(
3268 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3269 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3270 const auto *KmpTaskTWithPrivatesQTyRD =
3271 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3273 Base = CGF.EmitLValueForField(Base, *FI);
3274 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3275 if (QualType::DestructionKind DtorKind =
3276 Field->getType().isDestructedType()) {
3277 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3278 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3279 }
3280 }
3281 CGF.FinishFunction();
3282 return DestructorFn;
3283}
3284
3285/// Emit a privates mapping function for correct handling of private and
3286/// firstprivate variables.
3287/// \code
3288/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3289/// **noalias priv1,..., <tyn> **noalias privn) {
3290/// *priv1 = &.privates.priv1;
3291/// ...;
3292/// *privn = &.privates.privn;
3293/// }
3294/// \endcode
3295static llvm::Value *
3297 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3299 ASTContext &C = CGM.getContext();
3300 FunctionArgList Args;
3301 ImplicitParamDecl TaskPrivatesArg(
3302 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3303 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3305 Args.push_back(&TaskPrivatesArg);
3306 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3307 unsigned Counter = 1;
3308 for (const Expr *E : Data.PrivateVars) {
3309 Args.push_back(ImplicitParamDecl::Create(
3310 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3311 C.getPointerType(C.getPointerType(E->getType()))
3312 .withConst()
3313 .withRestrict(),
3315 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3316 PrivateVarsPos[VD] = Counter;
3317 ++Counter;
3318 }
3319 for (const Expr *E : Data.FirstprivateVars) {
3320 Args.push_back(ImplicitParamDecl::Create(
3321 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3322 C.getPointerType(C.getPointerType(E->getType()))
3323 .withConst()
3324 .withRestrict(),
3326 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3327 PrivateVarsPos[VD] = Counter;
3328 ++Counter;
3329 }
3330 for (const Expr *E : Data.LastprivateVars) {
3331 Args.push_back(ImplicitParamDecl::Create(
3332 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3333 C.getPointerType(C.getPointerType(E->getType()))
3334 .withConst()
3335 .withRestrict(),
3337 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3338 PrivateVarsPos[VD] = Counter;
3339 ++Counter;
3340 }
3341 for (const VarDecl *VD : Data.PrivateLocals) {
3343 if (VD->getType()->isLValueReferenceType())
3344 Ty = C.getPointerType(Ty);
3345 if (isAllocatableDecl(VD))
3346 Ty = C.getPointerType(Ty);
3347 Args.push_back(ImplicitParamDecl::Create(
3348 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3349 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3351 PrivateVarsPos[VD] = Counter;
3352 ++Counter;
3353 }
3354 const auto &TaskPrivatesMapFnInfo =
3355 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3356 llvm::FunctionType *TaskPrivatesMapTy =
3357 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3358 std::string Name =
3359 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3360 auto *TaskPrivatesMap = llvm::Function::Create(
3361 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3362 &CGM.getModule());
3363 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3364 TaskPrivatesMapFnInfo);
3365 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3366 TaskPrivatesMap->addFnAttr("sample-profile-suffix-elision-policy",
3367 "selected");
3368 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3369 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3370 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3371 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3372 }
3373 CodeGenFunction CGF(CGM);
3374 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3375 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3376
3377 // *privi = &.privates.privi;
3378 LValue Base = CGF.EmitLoadOfPointerLValue(
3379 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3380 TaskPrivatesArg.getType()->castAs<PointerType>());
3381 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3382 Counter = 0;
3383 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3384 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3385 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3386 LValue RefLVal =
3387 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3388 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3389 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3390 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3391 ++Counter;
3392 }
3393 CGF.FinishFunction();
3394 return TaskPrivatesMap;
3395}
3396
3397/// Emit initialization for private variables in task-based directives.
3399 const OMPExecutableDirective &D,
3400 Address KmpTaskSharedsPtr, LValue TDBase,
3401 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3402 QualType SharedsTy, QualType SharedsPtrTy,
3403 const OMPTaskDataTy &Data,
3404 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3405 ASTContext &C = CGF.getContext();
3406 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3407 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3408 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3409 ? OMPD_taskloop
3410 : OMPD_task;
3411 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3412 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3413 LValue SrcBase;
3414 bool IsTargetTask =
3415 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3416 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3417 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3418 // PointersArray, SizesArray, and MappersArray. The original variables for
3419 // these arrays are not captured and we get their addresses explicitly.
3420 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3421 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3422 SrcBase = CGF.MakeAddrLValue(
3424 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3425 CGF.ConvertTypeForMem(SharedsTy)),
3426 SharedsTy);
3427 }
3428 FI = FI->getType()->castAsRecordDecl()->field_begin();
3429 for (const PrivateDataTy &Pair : Privates) {
3430 // Do not initialize private locals.
3431 if (Pair.second.isLocalPrivate()) {
3432 ++FI;
3433 continue;
3434 }
3435 const VarDecl *VD = Pair.second.PrivateCopy;
3436 const Expr *Init = VD->getAnyInitializer();
3437 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3438 !CGF.isTrivialInitializer(Init)))) {
3439 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3440 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3441 const VarDecl *OriginalVD = Pair.second.Original;
3442 // Check if the variable is the target-based BasePointersArray,
3443 // PointersArray, SizesArray, or MappersArray.
3444 LValue SharedRefLValue;
3445 QualType Type = PrivateLValue.getType();
3446 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3447 if (IsTargetTask && !SharedField) {
3448 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3449 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3450 cast<CapturedDecl>(OriginalVD->getDeclContext())
3451 ->getNumParams() == 0 &&
3453 cast<CapturedDecl>(OriginalVD->getDeclContext())
3454 ->getDeclContext()) &&
3455 "Expected artificial target data variable.");
3456 SharedRefLValue =
3457 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3458 } else if (ForDup) {
3459 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3460 SharedRefLValue = CGF.MakeAddrLValue(
3461 SharedRefLValue.getAddress().withAlignment(
3462 C.getDeclAlign(OriginalVD)),
3463 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3464 SharedRefLValue.getTBAAInfo());
3465 } else if (CGF.LambdaCaptureFields.count(
3466 Pair.second.Original->getCanonicalDecl()) > 0 ||
3467 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3468 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3469 } else {
3470 // Processing for implicitly captured variables.
3471 InlinedOpenMPRegionRAII Region(
3472 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3473 /*HasCancel=*/false, /*NoInheritance=*/true);
3474 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3475 }
3476 if (Type->isArrayType()) {
3477 // Initialize firstprivate array.
3479 // Perform simple memcpy.
3480 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3481 } else {
3482 // Initialize firstprivate array using element-by-element
3483 // initialization.
3485 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3486 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3487 Address SrcElement) {
3488 // Clean up any temporaries needed by the initialization.
3489 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3490 InitScope.addPrivate(Elem, SrcElement);
3491 (void)InitScope.Privatize();
3492 // Emit initialization for single element.
3493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3494 CGF, &CapturesInfo);
3495 CGF.EmitAnyExprToMem(Init, DestElement,
3496 Init->getType().getQualifiers(),
3497 /*IsInitializer=*/false);
3498 });
3499 }
3500 } else {
3501 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3502 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3503 (void)InitScope.Privatize();
3504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3505 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3506 /*capturedByInit=*/false);
3507 }
3508 } else {
3509 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3510 }
3511 }
3512 ++FI;
3513 }
3514}
3515
3516/// Check if duplication function is required for taskloops.
3519 bool InitRequired = false;
3520 for (const PrivateDataTy &Pair : Privates) {
3521 if (Pair.second.isLocalPrivate())
3522 continue;
3523 const VarDecl *VD = Pair.second.PrivateCopy;
3524 const Expr *Init = VD->getAnyInitializer();
3525 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3527 if (InitRequired)
3528 break;
3529 }
3530 return InitRequired;
3531}
3532
3533
3534/// Emit task_dup function (for initialization of
3535/// private/firstprivate/lastprivate vars and last_iter flag)
3536/// \code
3537/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3538/// lastpriv) {
3539/// // setup lastprivate flag
3540/// task_dst->last = lastpriv;
3541/// // could be constructor calls here...
3542/// }
3543/// \endcode
3544static llvm::Value *
3546 const OMPExecutableDirective &D,
3547 QualType KmpTaskTWithPrivatesPtrQTy,
3548 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3549 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3550 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3551 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3552 ASTContext &C = CGM.getContext();
3553 FunctionArgList Args;
3554 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3555 KmpTaskTWithPrivatesPtrQTy,
3557 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3558 KmpTaskTWithPrivatesPtrQTy,
3560 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3562 Args.push_back(&DstArg);
3563 Args.push_back(&SrcArg);
3564 Args.push_back(&LastprivArg);
3565 const auto &TaskDupFnInfo =
3566 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3567 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3568 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3569 auto *TaskDup = llvm::Function::Create(
3570 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3571 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3572 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3573 TaskDup->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3574 TaskDup->setDoesNotRecurse();
3575 CodeGenFunction CGF(CGM);
3576 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3577 Loc);
3578
3579 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3580 CGF.GetAddrOfLocalVar(&DstArg),
3581 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3582 // task_dst->liter = lastpriv;
3583 if (WithLastIter) {
3584 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3585 LValue Base = CGF.EmitLValueForField(
3586 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3587 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3588 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3589 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3590 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3591 }
3592
3593 // Emit initial values for private copies (if any).
3594 assert(!Privates.empty());
3595 Address KmpTaskSharedsPtr = Address::invalid();
3596 if (!Data.FirstprivateVars.empty()) {
3597 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3598 CGF.GetAddrOfLocalVar(&SrcArg),
3599 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3600 LValue Base = CGF.EmitLValueForField(
3601 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3602 KmpTaskSharedsPtr = Address(
3604 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3605 KmpTaskTShareds)),
3606 Loc),
3607 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3608 }
3609 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3610 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3611 CGF.FinishFunction();
3612 return TaskDup;
3613}
3614
3615/// Checks if destructor function is required to be generated.
3616/// \return true if cleanups are required, false otherwise.
3617static bool
3618checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3620 for (const PrivateDataTy &P : Privates) {
3621 if (P.second.isLocalPrivate())
3622 continue;
3623 QualType Ty = P.second.Original->getType().getNonReferenceType();
3624 if (Ty.isDestructedType())
3625 return true;
3626 }
3627 return false;
3628}
3629
3630namespace {
3631/// Loop generator for OpenMP iterator expression.
3632class OMPIteratorGeneratorScope final
3634 CodeGenFunction &CGF;
3635 const OMPIteratorExpr *E = nullptr;
3636 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3637 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3638 OMPIteratorGeneratorScope() = delete;
3639 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3640
3641public:
3642 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3643 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3644 if (!E)
3645 return;
3646 SmallVector<llvm::Value *, 4> Uppers;
3647 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3648 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3649 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3650 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3651 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3652 addPrivate(
3653 HelperData.CounterVD,
3654 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3655 }
3656 Privatize();
3657
3658 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3659 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3660 LValue CLVal =
3661 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3662 HelperData.CounterVD->getType());
3663 // Counter = 0;
3664 CGF.EmitStoreOfScalar(
3665 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3666 CLVal);
3667 CodeGenFunction::JumpDest &ContDest =
3668 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3669 CodeGenFunction::JumpDest &ExitDest =
3670 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3671 // N = <number-of_iterations>;
3672 llvm::Value *N = Uppers[I];
3673 // cont:
3674 // if (Counter < N) goto body; else goto exit;
3675 CGF.EmitBlock(ContDest.getBlock());
3676 auto *CVal =
3677 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3678 llvm::Value *Cmp =
3679 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3680 ? CGF.Builder.CreateICmpSLT(CVal, N)
3681 : CGF.Builder.CreateICmpULT(CVal, N);
3682 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3683 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3684 // body:
3685 CGF.EmitBlock(BodyBB);
3686 // Iteri = Begini + Counter * Stepi;
3687 CGF.EmitIgnoredExpr(HelperData.Update);
3688 }
3689 }
3690 ~OMPIteratorGeneratorScope() {
3691 if (!E)
3692 return;
3693 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3694 // Counter = Counter + 1;
3695 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3696 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3697 // goto cont;
3698 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3699 // exit:
3700 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3701 }
3702 }
3703};
3704} // namespace
3705
3706static std::pair<llvm::Value *, llvm::Value *>
3708 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3709 llvm::Value *Addr;
3710 if (OASE) {
3711 const Expr *Base = OASE->getBase();
3712 Addr = CGF.EmitScalarExpr(Base);
3713 } else {
3714 Addr = CGF.EmitLValue(E).getPointer(CGF);
3715 }
3716 llvm::Value *SizeVal;
3717 QualType Ty = E->getType();
3718 if (OASE) {
3719 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3720 for (const Expr *SE : OASE->getDimensions()) {
3721 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3722 Sz = CGF.EmitScalarConversion(
3723 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3724 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3725 }
3726 } else if (const auto *ASE =
3727 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3728 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3729 Address UpAddrAddress = UpAddrLVal.getAddress();
3730 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3731 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3732 /*Idx0=*/1);
3733 SizeVal = CGF.Builder.CreatePtrDiff(UpAddr, Addr, "", /*IsNUW=*/true);
3734 } else {
3735 SizeVal = CGF.getTypeSize(Ty);
3736 }
3737 return std::make_pair(Addr, SizeVal);
3738}
3739
3740/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3741static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3742 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3743 if (KmpTaskAffinityInfoTy.isNull()) {
3744 RecordDecl *KmpAffinityInfoRD =
3745 C.buildImplicitRecord("kmp_task_affinity_info_t");
3746 KmpAffinityInfoRD->startDefinition();
3747 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3748 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3749 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3750 KmpAffinityInfoRD->completeDefinition();
3751 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3752 }
3753}
3754
3757 const OMPExecutableDirective &D,
3758 llvm::Function *TaskFunction, QualType SharedsTy,
3759 Address Shareds, const OMPTaskDataTy &Data) {
3760 ASTContext &C = CGM.getContext();
3762 // Aggregate privates and sort them by the alignment.
3763 const auto *I = Data.PrivateCopies.begin();
3764 for (const Expr *E : Data.PrivateVars) {
3765 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3766 Privates.emplace_back(
3767 C.getDeclAlign(VD),
3768 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3769 /*PrivateElemInit=*/nullptr));
3770 ++I;
3771 }
3772 I = Data.FirstprivateCopies.begin();
3773 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3774 for (const Expr *E : Data.FirstprivateVars) {
3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776 Privates.emplace_back(
3777 C.getDeclAlign(VD),
3778 PrivateHelpersTy(
3779 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3780 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3781 ++I;
3782 ++IElemInitRef;
3783 }
3784 I = Data.LastprivateCopies.begin();
3785 for (const Expr *E : Data.LastprivateVars) {
3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787 Privates.emplace_back(
3788 C.getDeclAlign(VD),
3789 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3790 /*PrivateElemInit=*/nullptr));
3791 ++I;
3792 }
3793 for (const VarDecl *VD : Data.PrivateLocals) {
3794 if (isAllocatableDecl(VD))
3795 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3796 else
3797 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3798 }
3799 llvm::stable_sort(Privates,
3800 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3801 return L.first > R.first;
3802 });
3803 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3804 // Build type kmp_routine_entry_t (if not built yet).
3805 emitKmpRoutineEntryT(KmpInt32Ty);
3806 // Build type kmp_task_t (if not built yet).
3807 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3808 if (SavedKmpTaskloopTQTy.isNull()) {
3809 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3810 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3811 }
3813 } else {
3814 assert((D.getDirectiveKind() == OMPD_task ||
3815 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3816 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3817 "Expected taskloop, task or target directive");
3818 if (SavedKmpTaskTQTy.isNull()) {
3819 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3820 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3821 }
3823 }
3824 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3825 // Build particular struct kmp_task_t for the given task.
3826 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3828 CanQualType KmpTaskTWithPrivatesQTy =
3829 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3830 QualType KmpTaskTWithPrivatesPtrQTy =
3831 C.getPointerType(KmpTaskTWithPrivatesQTy);
3832 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3833 llvm::Value *KmpTaskTWithPrivatesTySize =
3834 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3835 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3836
3837 // Emit initial values for private copies (if any).
3838 llvm::Value *TaskPrivatesMap = nullptr;
3839 llvm::Type *TaskPrivatesMapTy =
3840 std::next(TaskFunction->arg_begin(), 3)->getType();
3841 if (!Privates.empty()) {
3842 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3843 TaskPrivatesMap =
3844 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3845 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3846 TaskPrivatesMap, TaskPrivatesMapTy);
3847 } else {
3848 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3849 cast<llvm::PointerType>(TaskPrivatesMapTy));
3850 }
3851 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3852 // kmp_task_t *tt);
3853 llvm::Function *TaskEntry = emitProxyTaskFunction(
3854 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3855 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3856 TaskPrivatesMap);
3857
3858 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3859 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3860 // kmp_routine_entry_t *task_entry);
3861 // Task flags. Format is taken from
3862 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3863 // description of kmp_tasking_flags struct.
3864 enum {
3865 TiedFlag = 0x1,
3866 FinalFlag = 0x2,
3867 DestructorsFlag = 0x8,
3868 PriorityFlag = 0x20,
3869 DetachableFlag = 0x40,
3870 FreeAgentFlag = 0x80,
3871 TransparentFlag = 0x100,
3872 };
3873 unsigned Flags = Data.Tied ? TiedFlag : 0;
3874 bool NeedsCleanup = false;
3875 if (!Privates.empty()) {
3876 NeedsCleanup =
3877 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3878 if (NeedsCleanup)
3879 Flags = Flags | DestructorsFlag;
3880 }
3881 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3882 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3883 if (Kind == OMPC_THREADSET_omp_pool)
3884 Flags = Flags | FreeAgentFlag;
3885 }
3886 if (D.getSingleClause<OMPTransparentClause>())
3887 Flags |= TransparentFlag;
3888
3889 if (Data.Priority.getInt())
3890 Flags = Flags | PriorityFlag;
3891 if (D.hasClausesOfKind<OMPDetachClause>())
3892 Flags = Flags | DetachableFlag;
3893 llvm::Value *TaskFlags =
3894 Data.Final.getPointer()
3895 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3896 CGF.Builder.getInt32(FinalFlag),
3897 CGF.Builder.getInt32(/*C=*/0))
3898 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3899 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3900 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3902 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3904 TaskEntry, KmpRoutineEntryPtrTy)};
3905 llvm::Value *NewTask;
3906 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3907 // Check if we have any device clause associated with the directive.
3908 const Expr *Device = nullptr;
3909 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3910 Device = C->getDevice();
3911 // Emit device ID if any otherwise use default value.
3912 llvm::Value *DeviceID;
3913 if (Device)
3914 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3915 CGF.Int64Ty, /*isSigned=*/true);
3916 else
3917 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3918 AllocArgs.push_back(DeviceID);
3919 NewTask = CGF.EmitRuntimeCall(
3920 OMPBuilder.getOrCreateRuntimeFunction(
3921 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3922 AllocArgs);
3923 } else {
3924 NewTask =
3925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3926 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3927 AllocArgs);
3928 }
3929 // Emit detach clause initialization.
3930 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3931 // task_descriptor);
3932 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3933 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3934 LValue EvtLVal = CGF.EmitLValue(Evt);
3935
3936 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3937 // int gtid, kmp_task_t *task);
3938 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3939 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3940 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3941 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3942 OMPBuilder.getOrCreateRuntimeFunction(
3943 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3944 {Loc, Tid, NewTask});
3945 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3946 Evt->getExprLoc());
3947 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3948 }
3949 // Process affinity clauses.
3950 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3951 // Process list of affinity data.
3952 ASTContext &C = CGM.getContext();
3953 Address AffinitiesArray = Address::invalid();
3954 // Calculate number of elements to form the array of affinity data.
3955 llvm::Value *NumOfElements = nullptr;
3956 unsigned NumAffinities = 0;
3957 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3958 if (const Expr *Modifier = C->getModifier()) {
3959 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3960 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3961 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3962 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3963 NumOfElements =
3964 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3965 }
3966 } else {
3967 NumAffinities += C->varlist_size();
3968 }
3969 }
3971 // Fields ids in kmp_task_affinity_info record.
3972 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3973
3974 QualType KmpTaskAffinityInfoArrayTy;
3975 if (NumOfElements) {
3976 NumOfElements = CGF.Builder.CreateNUWAdd(
3977 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3978 auto *OVE = new (C) OpaqueValueExpr(
3979 Loc,
3980 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3981 VK_PRValue);
3982 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3983 RValue::get(NumOfElements));
3984 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3986 /*IndexTypeQuals=*/0);
3987 // Properly emit variable-sized array.
3988 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3990 CGF.EmitVarDecl(*PD);
3991 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3992 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3993 /*isSigned=*/false);
3994 } else {
3995 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3997 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3998 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3999 AffinitiesArray =
4000 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4001 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4002 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4003 /*isSigned=*/false);
4004 }
4005
4006 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4007 // Fill array by elements without iterators.
4008 unsigned Pos = 0;
4009 bool HasIterator = false;
4010 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4011 if (C->getModifier()) {
4012 HasIterator = true;
4013 continue;
4014 }
4015 for (const Expr *E : C->varlist()) {
4016 llvm::Value *Addr;
4017 llvm::Value *Size;
4018 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4019 LValue Base =
4020 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4022 // affs[i].base_addr = &<Affinities[i].second>;
4023 LValue BaseAddrLVal = CGF.EmitLValueForField(
4024 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4025 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4026 BaseAddrLVal);
4027 // affs[i].len = sizeof(<Affinities[i].second>);
4028 LValue LenLVal = CGF.EmitLValueForField(
4029 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4030 CGF.EmitStoreOfScalar(Size, LenLVal);
4031 ++Pos;
4032 }
4033 }
4034 LValue PosLVal;
4035 if (HasIterator) {
4036 PosLVal = CGF.MakeAddrLValue(
4037 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4038 C.getSizeType());
4039 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4040 }
4041 // Process elements with iterators.
4042 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4043 const Expr *Modifier = C->getModifier();
4044 if (!Modifier)
4045 continue;
4046 OMPIteratorGeneratorScope IteratorScope(
4047 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4048 for (const Expr *E : C->varlist()) {
4049 llvm::Value *Addr;
4050 llvm::Value *Size;
4051 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4052 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4053 LValue Base =
4054 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
4056 // affs[i].base_addr = &<Affinities[i].second>;
4057 LValue BaseAddrLVal = CGF.EmitLValueForField(
4058 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4059 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4060 BaseAddrLVal);
4061 // affs[i].len = sizeof(<Affinities[i].second>);
4062 LValue LenLVal = CGF.EmitLValueForField(
4063 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4064 CGF.EmitStoreOfScalar(Size, LenLVal);
4065 Idx = CGF.Builder.CreateNUWAdd(
4066 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4067 CGF.EmitStoreOfScalar(Idx, PosLVal);
4068 }
4069 }
4070 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4071 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4072 // naffins, kmp_task_affinity_info_t *affin_list);
4073 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4074 llvm::Value *GTid = getThreadID(CGF, Loc);
4075 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
4077 // FIXME: Emit the function and ignore its result for now unless the
4078 // runtime function is properly implemented.
4079 (void)CGF.EmitRuntimeCall(
4080 OMPBuilder.getOrCreateRuntimeFunction(
4081 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4082 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4083 }
4084 llvm::Value *NewTaskNewTaskTTy =
4086 NewTask, KmpTaskTWithPrivatesPtrTy);
4087 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
4088 KmpTaskTWithPrivatesQTy);
4089 LValue TDBase =
4090 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4091 // Fill the data in the resulting kmp_task_t record.
4092 // Copy shareds if there are any.
4093 Address KmpTaskSharedsPtr = Address::invalid();
4094 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4095 KmpTaskSharedsPtr = Address(
4096 CGF.EmitLoadOfScalar(
4098 TDBase,
4099 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4100 Loc),
4101 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4102 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4103 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4104 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4105 }
4106 // Emit initial values for private copies (if any).
4108 if (!Privates.empty()) {
4109 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4110 SharedsTy, SharedsPtrTy, Data, Privates,
4111 /*ForDup=*/false);
4112 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4113 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4114 Result.TaskDupFn = emitTaskDupFunction(
4115 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4116 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4117 /*WithLastIter=*/!Data.LastprivateVars.empty());
4118 }
4119 }
4120 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4121 enum { Priority = 0, Destructors = 1 };
4122 // Provide pointer to function with destructors for privates.
4123 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4124 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4125 assert(KmpCmplrdataUD->isUnion());
4126 if (NeedsCleanup) {
4127 llvm::Value *DestructorFn = emitDestructorsFunction(
4128 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4129 KmpTaskTWithPrivatesQTy);
4130 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4131 LValue DestructorsLV = CGF.EmitLValueForField(
4132 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4134 DestructorFn, KmpRoutineEntryPtrTy),
4135 DestructorsLV);
4136 }
4137 // Set priority.
4138 if (Data.Priority.getInt()) {
4139 LValue Data2LV = CGF.EmitLValueForField(
4140 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4141 LValue PriorityLV = CGF.EmitLValueForField(
4142 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4143 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4144 }
4145 Result.NewTask = NewTask;
4146 Result.TaskEntry = TaskEntry;
4147 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4148 Result.TDBase = TDBase;
4149 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4150 return Result;
4151}
4152
4153/// Translates internal dependency kind into the runtime kind.
4155 RTLDependenceKindTy DepKind;
4156 switch (K) {
4157 case OMPC_DEPEND_in:
4158 DepKind = RTLDependenceKindTy::DepIn;
4159 break;
4160 // Out and InOut dependencies must use the same code.
4161 case OMPC_DEPEND_out:
4162 case OMPC_DEPEND_inout:
4163 DepKind = RTLDependenceKindTy::DepInOut;
4164 break;
4165 case OMPC_DEPEND_mutexinoutset:
4166 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4167 break;
4168 case OMPC_DEPEND_inoutset:
4169 DepKind = RTLDependenceKindTy::DepInOutSet;
4170 break;
4171 case OMPC_DEPEND_outallmemory:
4172 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4173 break;
4174 case OMPC_DEPEND_source:
4175 case OMPC_DEPEND_sink:
4176 case OMPC_DEPEND_depobj:
4177 case OMPC_DEPEND_inoutallmemory:
4179 llvm_unreachable("Unknown task dependence type");
4180 }
4181 return DepKind;
4182}
4183
4184/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4185static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4186 QualType &FlagsTy) {
4187 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4188 if (KmpDependInfoTy.isNull()) {
4189 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4190 KmpDependInfoRD->startDefinition();
4191 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4192 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4193 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4194 KmpDependInfoRD->completeDefinition();
4195 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4196 }
4197}
4198
4199std::pair<llvm::Value *, LValue>
4201 SourceLocation Loc) {
4202 ASTContext &C = CGM.getContext();
4203 QualType FlagsTy;
4204 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4205 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4206 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4208 DepobjLVal.getAddress().withElementType(
4209 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4210 KmpDependInfoPtrTy->castAs<PointerType>());
4211 Address DepObjAddr = CGF.Builder.CreateGEP(
4212 CGF, Base.getAddress(),
4213 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4214 LValue NumDepsBase = CGF.MakeAddrLValue(
4215 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4216 // NumDeps = deps[i].base_addr;
4217 LValue BaseAddrLVal = CGF.EmitLValueForField(
4218 NumDepsBase,
4219 *std::next(KmpDependInfoRD->field_begin(),
4220 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4221 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4222 return std::make_pair(NumDeps, Base);
4223}
4224
4225static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4226 llvm::PointerUnion<unsigned *, LValue *> Pos,
4228 Address DependenciesArray) {
4229 CodeGenModule &CGM = CGF.CGM;
4230 ASTContext &C = CGM.getContext();
4231 QualType FlagsTy;
4232 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4233 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4234 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4235
4236 OMPIteratorGeneratorScope IteratorScope(
4237 CGF, cast_or_null<OMPIteratorExpr>(
4238 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4239 : nullptr));
4240 for (const Expr *E : Data.DepExprs) {
4241 llvm::Value *Addr;
4242 llvm::Value *Size;
4243
4244 // The expression will be a nullptr in the 'omp_all_memory' case.
4245 if (E) {
4246 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4247 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4248 } else {
4249 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4250 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4251 }
4252 LValue Base;
4253 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4254 Base = CGF.MakeAddrLValue(
4255 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4256 } else {
4257 assert(E && "Expected a non-null expression");
4258 LValue &PosLVal = *cast<LValue *>(Pos);
4259 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4260 Base = CGF.MakeAddrLValue(
4261 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4262 }
4263 // deps[i].base_addr = &<Dependencies[i].second>;
4264 LValue BaseAddrLVal = CGF.EmitLValueForField(
4265 Base,
4266 *std::next(KmpDependInfoRD->field_begin(),
4267 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4268 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4269 // deps[i].len = sizeof(<Dependencies[i].second>);
4270 LValue LenLVal = CGF.EmitLValueForField(
4271 Base, *std::next(KmpDependInfoRD->field_begin(),
4272 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4273 CGF.EmitStoreOfScalar(Size, LenLVal);
4274 // deps[i].flags = <Dependencies[i].first>;
4275 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4276 LValue FlagsLVal = CGF.EmitLValueForField(
4277 Base,
4278 *std::next(KmpDependInfoRD->field_begin(),
4279 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4281 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4282 FlagsLVal);
4283 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4284 ++(*P);
4285 } else {
4286 LValue &PosLVal = *cast<LValue *>(Pos);
4287 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4288 Idx = CGF.Builder.CreateNUWAdd(Idx,
4289 llvm::ConstantInt::get(Idx->getType(), 1));
4290 CGF.EmitStoreOfScalar(Idx, PosLVal);
4291 }
4292 }
4293}
4294
4298 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4299 "Expected depobj dependency kind.");
4301 SmallVector<LValue, 4> SizeLVals;
4302 ASTContext &C = CGF.getContext();
4303 {
4304 OMPIteratorGeneratorScope IteratorScope(
4305 CGF, cast_or_null<OMPIteratorExpr>(
4306 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4307 : nullptr));
4308 for (const Expr *E : Data.DepExprs) {
4309 llvm::Value *NumDeps;
4310 LValue Base;
4311 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4312 std::tie(NumDeps, Base) =
4313 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4314 LValue NumLVal = CGF.MakeAddrLValue(
4315 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4316 C.getUIntPtrType());
4317 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4318 NumLVal.getAddress());
4319 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4320 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4321 CGF.EmitStoreOfScalar(Add, NumLVal);
4322 SizeLVals.push_back(NumLVal);
4323 }
4324 }
4325 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4326 llvm::Value *Size =
4327 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4328 Sizes.push_back(Size);
4329 }
4330 return Sizes;
4331}
4332
4335 LValue PosLVal,
4337 Address DependenciesArray) {
4338 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4339 "Expected depobj dependency kind.");
4340 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4341 {
4342 OMPIteratorGeneratorScope IteratorScope(
4343 CGF, cast_or_null<OMPIteratorExpr>(
4344 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4345 : nullptr));
4346 for (const Expr *E : Data.DepExprs) {
4347 llvm::Value *NumDeps;
4348 LValue Base;
4349 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4350 std::tie(NumDeps, Base) =
4351 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4352
4353 // memcopy dependency data.
4354 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4355 ElSize,
4356 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4357 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4358 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4359 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4360
4361 // Increase pos.
4362 // pos += size;
4363 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4364 CGF.EmitStoreOfScalar(Add, PosLVal);
4365 }
4366 }
4367}
4368
4369std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4371 SourceLocation Loc) {
4372 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4373 return D.DepExprs.empty();
4374 }))
4375 return std::make_pair(nullptr, Address::invalid());
4376 // Process list of dependencies.
4377 ASTContext &C = CGM.getContext();
4378 Address DependenciesArray = Address::invalid();
4379 llvm::Value *NumOfElements = nullptr;
4380 unsigned NumDependencies = std::accumulate(
4381 Dependencies.begin(), Dependencies.end(), 0,
4382 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4383 return D.DepKind == OMPC_DEPEND_depobj
4384 ? V
4385 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4386 });
4387 QualType FlagsTy;
4388 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4389 bool HasDepobjDeps = false;
4390 bool HasRegularWithIterators = false;
4391 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4392 llvm::Value *NumOfRegularWithIterators =
4393 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4394 // Calculate number of depobj dependencies and regular deps with the
4395 // iterators.
4396 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4397 if (D.DepKind == OMPC_DEPEND_depobj) {
4400 for (llvm::Value *Size : Sizes) {
4401 NumOfDepobjElements =
4402 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4403 }
4404 HasDepobjDeps = true;
4405 continue;
4406 }
4407 // Include number of iterations, if any.
4408
4409 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4410 llvm::Value *ClauseIteratorSpace =
4411 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4412 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4413 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4414 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4415 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4416 }
4417 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4418 ClauseIteratorSpace,
4419 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4420 NumOfRegularWithIterators =
4421 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4422 HasRegularWithIterators = true;
4423 continue;
4424 }
4425 }
4426
4427 QualType KmpDependInfoArrayTy;
4428 if (HasDepobjDeps || HasRegularWithIterators) {
4429 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4430 /*isSigned=*/false);
4431 if (HasDepobjDeps) {
4432 NumOfElements =
4433 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4434 }
4435 if (HasRegularWithIterators) {
4436 NumOfElements =
4437 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4438 }
4439 auto *OVE = new (C) OpaqueValueExpr(
4440 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4441 VK_PRValue);
4442 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4443 RValue::get(NumOfElements));
4444 KmpDependInfoArrayTy =
4445 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4446 /*IndexTypeQuals=*/0);
4447 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4448 // Properly emit variable-sized array.
4449 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4451 CGF.EmitVarDecl(*PD);
4452 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4453 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4454 /*isSigned=*/false);
4455 } else {
4456 KmpDependInfoArrayTy = C.getConstantArrayType(
4457 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4458 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4459 DependenciesArray =
4460 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4461 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4462 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4463 /*isSigned=*/false);
4464 }
4465 unsigned Pos = 0;
4466 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4467 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4468 continue;
4469 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4470 }
4471 // Copy regular dependencies with iterators.
4472 LValue PosLVal = CGF.MakeAddrLValue(
4473 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4474 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4475 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4476 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4477 continue;
4478 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4479 }
4480 // Copy final depobj arrays without iterators.
4481 if (HasDepobjDeps) {
4482 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4483 if (Dep.DepKind != OMPC_DEPEND_depobj)
4484 continue;
4485 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4486 }
4487 }
4488 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4489 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4490 return std::make_pair(NumOfElements, DependenciesArray);
4491}
4492
4494 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4495 SourceLocation Loc) {
4496 if (Dependencies.DepExprs.empty())
4497 return Address::invalid();
4498 // Process list of dependencies.
4499 ASTContext &C = CGM.getContext();
4500 Address DependenciesArray = Address::invalid();
4501 unsigned NumDependencies = Dependencies.DepExprs.size();
4502 QualType FlagsTy;
4503 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4504 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4505
4506 llvm::Value *Size;
4507 // Define type kmp_depend_info[<Dependencies.size()>];
4508 // For depobj reserve one extra element to store the number of elements.
4509 // It is required to handle depobj(x) update(in) construct.
4510 // kmp_depend_info[<Dependencies.size()>] deps;
4511 llvm::Value *NumDepsVal;
4512 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4513 if (const auto *IE =
4514 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4515 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4516 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4517 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4518 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4519 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4520 }
4521 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4522 NumDepsVal);
4523 CharUnits SizeInBytes =
4524 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4525 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4526 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4527 NumDepsVal =
4528 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4529 } else {
4530 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4531 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4532 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4533 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4534 Size = CGM.getSize(Sz.alignTo(Align));
4535 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4536 }
4537 // Need to allocate on the dynamic memory.
4538 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4539 // Use default allocator.
4540 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4541 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4542
4543 llvm::Value *Addr =
4544 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4545 CGM.getModule(), OMPRTL___kmpc_alloc),
4546 Args, ".dep.arr.addr");
4547 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4549 Addr, CGF.Builder.getPtrTy(0));
4550 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4551 // Write number of elements in the first element of array for depobj.
4552 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4553 // deps[i].base_addr = NumDependencies;
4554 LValue BaseAddrLVal = CGF.EmitLValueForField(
4555 Base,
4556 *std::next(KmpDependInfoRD->field_begin(),
4557 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4558 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4559 llvm::PointerUnion<unsigned *, LValue *> Pos;
4560 unsigned Idx = 1;
4561 LValue PosLVal;
4562 if (Dependencies.IteratorExpr) {
4563 PosLVal = CGF.MakeAddrLValue(
4564 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4565 C.getSizeType());
4566 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4567 /*IsInit=*/true);
4568 Pos = &PosLVal;
4569 } else {
4570 Pos = &Idx;
4571 }
4572 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4573 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4574 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4575 CGF.Int8Ty);
4576 return DependenciesArray;
4577}
4578
4580 SourceLocation Loc) {
4581 ASTContext &C = CGM.getContext();
4582 QualType FlagsTy;
4583 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4585 C.VoidPtrTy.castAs<PointerType>());
4586 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4588 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4590 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4591 Addr.getElementType(), Addr.emitRawPointer(CGF),
4592 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4593 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4594 CGF.VoidPtrTy);
4595 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4596 // Use default allocator.
4597 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4598 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4599
4600 // _kmpc_free(gtid, addr, nullptr);
4601 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4602 CGM.getModule(), OMPRTL___kmpc_free),
4603 Args);
4604}
4605
4607 OpenMPDependClauseKind NewDepKind,
4608 SourceLocation Loc) {
4609 ASTContext &C = CGM.getContext();
4610 QualType FlagsTy;
4611 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4612 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4613 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4614 llvm::Value *NumDeps;
4615 LValue Base;
4616 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4617
4618 Address Begin = Base.getAddress();
4619 // Cast from pointer to array type to pointer to single element.
4620 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4621 Begin.emitRawPointer(CGF), NumDeps);
4622 // The basic structure here is a while-do loop.
4623 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4624 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4625 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4626 CGF.EmitBlock(BodyBB);
4627 llvm::PHINode *ElementPHI =
4628 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4629 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4630 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4631 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4632 Base.getTBAAInfo());
4633 // deps[i].flags = NewDepKind;
4634 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4635 LValue FlagsLVal = CGF.EmitLValueForField(
4636 Base, *std::next(KmpDependInfoRD->field_begin(),
4637 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4639 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4640 FlagsLVal);
4641
4642 // Shift the address forward by one element.
4643 llvm::Value *ElementNext =
4644 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4645 .emitRawPointer(CGF);
4646 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4647 llvm::Value *IsEmpty =
4648 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4649 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4650 // Done.
4651 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4652}
4653
4655 const OMPExecutableDirective &D,
4656 llvm::Function *TaskFunction,
4657 QualType SharedsTy, Address Shareds,
4658 const Expr *IfCond,
4659 const OMPTaskDataTy &Data) {
4660 if (!CGF.HaveInsertPoint())
4661 return;
4662
4664 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4665 llvm::Value *NewTask = Result.NewTask;
4666 llvm::Function *TaskEntry = Result.TaskEntry;
4667 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4668 LValue TDBase = Result.TDBase;
4669 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4670 // Process list of dependences.
4671 Address DependenciesArray = Address::invalid();
4672 llvm::Value *NumOfElements;
4673 std::tie(NumOfElements, DependenciesArray) =
4674 emitDependClause(CGF, Data.Dependences, Loc);
4675
4676 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4677 // libcall.
4678 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4679 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4680 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4681 // list is not empty
4682 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4683 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4684 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4685 llvm::Value *DepTaskArgs[7];
4686 if (!Data.Dependences.empty()) {
4687 DepTaskArgs[0] = UpLoc;
4688 DepTaskArgs[1] = ThreadID;
4689 DepTaskArgs[2] = NewTask;
4690 DepTaskArgs[3] = NumOfElements;
4691 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4692 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4693 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4694 }
4695 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4696 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4697 if (!Data.Tied) {
4698 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4699 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4700 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4701 }
4702 if (!Data.Dependences.empty()) {
4703 CGF.EmitRuntimeCall(
4704 OMPBuilder.getOrCreateRuntimeFunction(
4705 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4706 DepTaskArgs);
4707 } else {
4708 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4709 CGM.getModule(), OMPRTL___kmpc_omp_task),
4710 TaskArgs);
4711 }
4712 // Check if parent region is untied and build return for untied task;
4713 if (auto *Region =
4714 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4715 Region->emitUntiedSwitch(CGF);
4716 };
4717
4718 llvm::Value *DepWaitTaskArgs[7];
4719 if (!Data.Dependences.empty()) {
4720 DepWaitTaskArgs[0] = UpLoc;
4721 DepWaitTaskArgs[1] = ThreadID;
4722 DepWaitTaskArgs[2] = NumOfElements;
4723 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4724 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4725 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4726 DepWaitTaskArgs[6] =
4727 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4728 }
4729 auto &M = CGM.getModule();
4730 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4731 TaskEntry, &Data, &DepWaitTaskArgs,
4732 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4733 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4734 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4735 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4736 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4737 // is specified.
4738 if (!Data.Dependences.empty())
4739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4740 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4741 DepWaitTaskArgs);
4742 // Call proxy_task_entry(gtid, new_task);
4743 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4744 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4745 Action.Enter(CGF);
4746 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4747 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4748 OutlinedFnArgs);
4749 };
4750
4751 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4752 // kmp_task_t *new_task);
4753 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4754 // kmp_task_t *new_task);
4756 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4757 M, OMPRTL___kmpc_omp_task_begin_if0),
4758 TaskArgs,
4759 OMPBuilder.getOrCreateRuntimeFunction(
4760 M, OMPRTL___kmpc_omp_task_complete_if0),
4761 TaskArgs);
4762 RCG.setAction(Action);
4763 RCG(CGF);
4764 };
4765
4766 if (IfCond) {
4767 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4768 } else {
4769 RegionCodeGenTy ThenRCG(ThenCodeGen);
4770 ThenRCG(CGF);
4771 }
4772}
4773
4775 const OMPLoopDirective &D,
4776 llvm::Function *TaskFunction,
4777 QualType SharedsTy, Address Shareds,
4778 const Expr *IfCond,
4779 const OMPTaskDataTy &Data) {
4780 if (!CGF.HaveInsertPoint())
4781 return;
4783 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4784 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4785 // libcall.
4786 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4787 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4788 // sched, kmp_uint64 grainsize, void *task_dup);
4789 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4790 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4791 llvm::Value *IfVal;
4792 if (IfCond) {
4793 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4794 /*isSigned=*/true);
4795 } else {
4796 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4797 }
4798
4799 LValue LBLVal = CGF.EmitLValueForField(
4800 Result.TDBase,
4801 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4802 const auto *LBVar =
4803 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4804 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4805 /*IsInitializer=*/true);
4806 LValue UBLVal = CGF.EmitLValueForField(
4807 Result.TDBase,
4808 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4809 const auto *UBVar =
4810 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4811 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4812 /*IsInitializer=*/true);
4813 LValue StLVal = CGF.EmitLValueForField(
4814 Result.TDBase,
4815 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4816 const auto *StVar =
4817 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 // Store reductions address.
4821 LValue RedLVal = CGF.EmitLValueForField(
4822 Result.TDBase,
4823 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4824 if (Data.Reductions) {
4825 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4826 } else {
4827 CGF.EmitNullInitialization(RedLVal.getAddress(),
4828 CGF.getContext().VoidPtrTy);
4829 }
4830 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4832 UpLoc,
4833 ThreadID,
4834 Result.NewTask,
4835 IfVal,
4836 LBLVal.getPointer(CGF),
4837 UBLVal.getPointer(CGF),
4838 CGF.EmitLoadOfScalar(StLVal, Loc),
4839 llvm::ConstantInt::getSigned(
4840 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4841 llvm::ConstantInt::getSigned(
4842 CGF.IntTy, Data.Schedule.getPointer()
4843 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4844 : NoSchedule),
4845 Data.Schedule.getPointer()
4846 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4847 /*isSigned=*/false)
4848 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4849 if (Data.HasModifier)
4850 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4851
4852 TaskArgs.push_back(Result.TaskDupFn
4854 Result.TaskDupFn, CGF.VoidPtrTy)
4855 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4856 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4857 CGM.getModule(), Data.HasModifier
4858 ? OMPRTL___kmpc_taskloop_5
4859 : OMPRTL___kmpc_taskloop),
4860 TaskArgs);
4861}
4862
4863/// Emit reduction operation for each element of array (required for
4864/// array sections) LHS op = RHS.
4865/// \param Type Type of array.
4866/// \param LHSVar Variable on the left side of the reduction operation
4867/// (references element of array in original variable).
4868/// \param RHSVar Variable on the right side of the reduction operation
4869/// (references element of array in original variable).
4870/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4871/// RHSVar.
4873 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4874 const VarDecl *RHSVar,
4875 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4876 const Expr *, const Expr *)> &RedOpGen,
4877 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4878 const Expr *UpExpr = nullptr) {
4879 // Perform element-by-element initialization.
4880 QualType ElementTy;
4881 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4882 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4883
4884 // Drill down to the base element type on both arrays.
4885 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4886 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4887
4888 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4889 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4890 // Cast from pointer to array type to pointer to single element.
4891 llvm::Value *LHSEnd =
4892 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4893 // The basic structure here is a while-do loop.
4894 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4895 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4896 llvm::Value *IsEmpty =
4897 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4898 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4899
4900 // Enter the loop body, making that address the current address.
4901 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4902 CGF.EmitBlock(BodyBB);
4903
4904 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4905
4906 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4907 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4908 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4909 Address RHSElementCurrent(
4910 RHSElementPHI, RHSAddr.getElementType(),
4911 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4912
4913 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4914 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4915 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4916 Address LHSElementCurrent(
4917 LHSElementPHI, LHSAddr.getElementType(),
4918 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4919
4920 // Emit copy.
4922 Scope.addPrivate(LHSVar, LHSElementCurrent);
4923 Scope.addPrivate(RHSVar, RHSElementCurrent);
4924 Scope.Privatize();
4925 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4926 Scope.ForceCleanup();
4927
4928 // Shift the address forward by one element.
4929 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4930 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4931 "omp.arraycpy.dest.element");
4932 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4933 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4934 "omp.arraycpy.src.element");
4935 // Check whether we've reached the end.
4936 llvm::Value *Done =
4937 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4938 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4939 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4940 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4941
4942 // Done.
4943 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4944}
4945
4946/// Emit reduction combiner. If the combiner is a simple expression emit it as
4947/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4948/// UDR combiner function.
4950 const Expr *ReductionOp) {
4951 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4952 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4953 if (const auto *DRE =
4954 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4955 if (const auto *DRD =
4956 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4957 std::pair<llvm::Function *, llvm::Function *> Reduction =
4961 CGF.EmitIgnoredExpr(ReductionOp);
4962 return;
4963 }
4964 CGF.EmitIgnoredExpr(ReductionOp);
4965}
4966
4968 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4970 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4971 ASTContext &C = CGM.getContext();
4972
4973 // void reduction_func(void *LHSArg, void *RHSArg);
4974 FunctionArgList Args;
4975 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4977 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4979 Args.push_back(&LHSArg);
4980 Args.push_back(&RHSArg);
4981 const auto &CGFI =
4982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4983 std::string Name = getReductionFuncName(ReducerName);
4984 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4985 llvm::GlobalValue::InternalLinkage, Name,
4986 &CGM.getModule());
4987 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4988 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4989 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
4990 Fn->setDoesNotRecurse();
4991 CodeGenFunction CGF(CGM);
4992 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4993
4994 // Dst = (void*[n])(LHSArg);
4995 // Src = (void*[n])(RHSArg);
4997 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4998 CGF.Builder.getPtrTy(0)),
4999 ArgsElemType, CGF.getPointerAlign());
5001 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5002 CGF.Builder.getPtrTy(0)),
5003 ArgsElemType, CGF.getPointerAlign());
5004
5005 // ...
5006 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5007 // ...
5009 const auto *IPriv = Privates.begin();
5010 unsigned Idx = 0;
5011 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5012 const auto *RHSVar =
5013 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5014 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5015 const auto *LHSVar =
5016 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5017 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5018 QualType PrivTy = (*IPriv)->getType();
5019 if (PrivTy->isVariablyModifiedType()) {
5020 // Get array size and emit VLA type.
5021 ++Idx;
5022 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5023 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5024 const VariableArrayType *VLA =
5025 CGF.getContext().getAsVariableArrayType(PrivTy);
5026 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5028 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5029 CGF.EmitVariablyModifiedType(PrivTy);
5030 }
5031 }
5032 Scope.Privatize();
5033 IPriv = Privates.begin();
5034 const auto *ILHS = LHSExprs.begin();
5035 const auto *IRHS = RHSExprs.begin();
5036 for (const Expr *E : ReductionOps) {
5037 if ((*IPriv)->getType()->isArrayType()) {
5038 // Emit reduction for array section.
5039 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5040 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5042 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5043 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5044 emitReductionCombiner(CGF, E);
5045 });
5046 } else {
5047 // Emit reduction for array subscript or single variable.
5048 emitReductionCombiner(CGF, E);
5049 }
5050 ++IPriv;
5051 ++ILHS;
5052 ++IRHS;
5053 }
5054 Scope.ForceCleanup();
5055 CGF.FinishFunction();
5056 return Fn;
5057}
5058
5060 const Expr *ReductionOp,
5061 const Expr *PrivateRef,
5062 const DeclRefExpr *LHS,
5063 const DeclRefExpr *RHS) {
5064 if (PrivateRef->getType()->isArrayType()) {
5065 // Emit reduction for array section.
5066 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5067 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5069 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5070 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5071 emitReductionCombiner(CGF, ReductionOp);
5072 });
5073 } else {
5074 // Emit reduction for array subscript or single variable.
5075 emitReductionCombiner(CGF, ReductionOp);
5076 }
5077}
5078
5079static std::string generateUniqueName(CodeGenModule &CGM,
5080 llvm::StringRef Prefix, const Expr *Ref);
5081
5083 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5084 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5085
5086 // Create a shared global variable (__shared_reduction_var) to accumulate the
5087 // final result.
5088 //
5089 // Call __kmpc_barrier to synchronize threads before initialization.
5090 //
5091 // The master thread (thread_id == 0) initializes __shared_reduction_var
5092 // with the identity value or initializer.
5093 //
5094 // Call __kmpc_barrier to synchronize before combining.
5095 // For each i:
5096 // - Thread enters critical section.
5097 // - Reads its private value from LHSExprs[i].
5098 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5099 // Privates[i]).
5100 // - Exits critical section.
5101 //
5102 // Call __kmpc_barrier after combining.
5103 //
5104 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5105 //
5106 // Final __kmpc_barrier to synchronize after broadcasting
5107 QualType PrivateType = Privates->getType();
5108 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
5109
5110 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
5111 std::string ReductionVarNameStr;
5112 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
5113 ReductionVarNameStr =
5114 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
5115 else
5116 ReductionVarNameStr = "unnamed_priv_var";
5117
5118 // Create an internal shared variable
5119 std::string SharedName =
5120 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
5121 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5122 LLVMType, ".omp.reduction." + SharedName);
5123
5124 SharedVar->setAlignment(
5125 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
5126
5127 Address SharedResult =
5128 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
5129
5130 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5131 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5132 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5133
5134 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5135 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5136
5137 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5138 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5139 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5140
5141 CGF.EmitBlock(InitBB);
5142
5143 auto EmitSharedInit = [&]() {
5144 if (UDR) { // Check if it's a User-Defined Reduction
5145 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5146 std::pair<llvm::Function *, llvm::Function *> FnPair =
5148 llvm::Function *InitializerFn = FnPair.second;
5149 if (InitializerFn) {
5150 if (const auto *CE =
5151 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5152 const auto *OutDRE = cast<DeclRefExpr>(
5153 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5154 ->getSubExpr());
5155 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5156
5157 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5158 LocalScope.addPrivate(OutVD, SharedResult);
5159
5160 (void)LocalScope.Privatize();
5161 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5162 CE->getCallee()->IgnoreParenImpCasts())) {
5164 CGF, OVE, RValue::get(InitializerFn));
5165 CGF.EmitIgnoredExpr(CE);
5166 } else {
5167 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5168 PrivateType.getQualifiers(),
5169 /*IsInitializer=*/true);
5170 }
5171 } else {
5172 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5173 PrivateType.getQualifiers(),
5174 /*IsInitializer=*/true);
5175 }
5176 } else {
5177 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5178 PrivateType.getQualifiers(),
5179 /*IsInitializer=*/true);
5180 }
5181 } else {
5182 // EmitNullInitialization handles default construction for C++ classes
5183 // and zeroing for scalars, which is a reasonable default.
5184 CGF.EmitNullInitialization(SharedResult, PrivateType);
5185 }
5186 return; // UDR initialization handled
5187 }
5188 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5189 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5190 if (const Expr *InitExpr = VD->getInit()) {
5191 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5192 PrivateType.getQualifiers(), true);
5193 return;
5194 }
5195 }
5196 }
5197 CGF.EmitNullInitialization(SharedResult, PrivateType);
5198 };
5199 EmitSharedInit();
5200 CGF.Builder.CreateBr(InitEndBB);
5201 CGF.EmitBlock(InitEndBB);
5202
5203 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5204 CGM.getModule(), OMPRTL___kmpc_barrier),
5205 BarrierArgs);
5206
5207 const Expr *ReductionOp = ReductionOps;
5208 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5209 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5210 LValue LHSLV = CGF.EmitLValue(Privates);
5211
5212 auto EmitCriticalReduction = [&](auto ReductionGen) {
5213 std::string CriticalName = getName({"reduction_critical"});
5214 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5215 };
5216
5217 if (CurrentUDR) {
5218 // Handle user-defined reduction.
5219 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5220 Action.Enter(CGF);
5221 std::pair<llvm::Function *, llvm::Function *> FnPair =
5222 getUserDefinedReduction(CurrentUDR);
5223 if (FnPair.first) {
5224 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5225 const auto *OutDRE = cast<DeclRefExpr>(
5226 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5227 ->getSubExpr());
5228 const auto *InDRE = cast<DeclRefExpr>(
5229 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5230 ->getSubExpr());
5231 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5232 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5233 SharedLV.getAddress());
5234 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5235 LHSLV.getAddress());
5236 (void)LocalScope.Privatize();
5237 emitReductionCombiner(CGF, ReductionOp);
5238 }
5239 }
5240 };
5241 EmitCriticalReduction(ReductionGen);
5242 } else {
5243 // Handle built-in reduction operations.
5244#ifndef NDEBUG
5245 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5246 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5247 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5248
5249 const Expr *AssignRHS = nullptr;
5250 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5251 if (BinOp->getOpcode() == BO_Assign)
5252 AssignRHS = BinOp->getRHS();
5253 } else if (const auto *OpCall =
5254 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5255 if (OpCall->getOperator() == OO_Equal)
5256 AssignRHS = OpCall->getArg(1);
5257 }
5258
5259 assert(AssignRHS &&
5260 "Private Variable Reduction : Invalid ReductionOp expression");
5261#endif
5262
5263 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5264 Action.Enter(CGF);
5265 const auto *OmpOutDRE =
5266 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5267 const auto *OmpInDRE =
5268 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5269 assert(
5270 OmpOutDRE && OmpInDRE &&
5271 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5272 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5273 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5274 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5275 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5276 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5277 (void)LocalScope.Privatize();
5278 // Emit the actual reduction operation
5279 CGF.EmitIgnoredExpr(ReductionOp);
5280 };
5281 EmitCriticalReduction(ReductionGen);
5282 }
5283
5284 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5285 CGM.getModule(), OMPRTL___kmpc_barrier),
5286 BarrierArgs);
5287
5288 // Broadcast final result
5289 bool IsAggregate = PrivateType->isAggregateType();
5290 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5291 llvm::Value *FinalResultVal = nullptr;
5292 Address FinalResultAddr = Address::invalid();
5293
5294 if (IsAggregate)
5295 FinalResultAddr = SharedResult;
5296 else
5297 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5298
5299 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5300 if (IsAggregate) {
5301 CGF.EmitAggregateCopy(TargetLHSLV,
5302 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5303 PrivateType, AggValueSlot::DoesNotOverlap, false);
5304 } else {
5305 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5306 }
5307 // Final synchronization barrier
5308 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5309 CGM.getModule(), OMPRTL___kmpc_barrier),
5310 BarrierArgs);
5311
5312 // Combiner with original list item
5313 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5314 PrePostActionTy &Action) {
5315 Action.Enter(CGF);
5316 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5317 cast<DeclRefExpr>(LHSExprs),
5318 cast<DeclRefExpr>(RHSExprs));
5319 };
5320 EmitCriticalReduction(OriginalListCombiner);
5321}
5322
5324 ArrayRef<const Expr *> OrgPrivates,
5325 ArrayRef<const Expr *> OrgLHSExprs,
5326 ArrayRef<const Expr *> OrgRHSExprs,
5327 ArrayRef<const Expr *> OrgReductionOps,
5328 ReductionOptionsTy Options) {
5329 if (!CGF.HaveInsertPoint())
5330 return;
5331
5332 bool WithNowait = Options.WithNowait;
5333 bool SimpleReduction = Options.SimpleReduction;
5334
5335 // Next code should be emitted for reduction:
5336 //
5337 // static kmp_critical_name lock = { 0 };
5338 //
5339 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5340 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5341 // ...
5342 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5343 // *(Type<n>-1*)rhs[<n>-1]);
5344 // }
5345 //
5346 // ...
5347 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5348 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5349 // RedList, reduce_func, &<lock>)) {
5350 // case 1:
5351 // ...
5352 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5353 // ...
5354 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5355 // break;
5356 // case 2:
5357 // ...
5358 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5359 // ...
5360 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5361 // break;
5362 // default:;
5363 // }
5364 //
5365 // if SimpleReduction is true, only the next code is generated:
5366 // ...
5367 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5368 // ...
5369
5370 ASTContext &C = CGM.getContext();
5371
5372 if (SimpleReduction) {
5374 const auto *IPriv = OrgPrivates.begin();
5375 const auto *ILHS = OrgLHSExprs.begin();
5376 const auto *IRHS = OrgRHSExprs.begin();
5377 for (const Expr *E : OrgReductionOps) {
5378 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5379 cast<DeclRefExpr>(*IRHS));
5380 ++IPriv;
5381 ++ILHS;
5382 ++IRHS;
5383 }
5384 return;
5385 }
5386
5387 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5388 // Only keep entries where the corresponding variable is not private.
5389 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5390 FilteredRHSExprs, FilteredReductionOps;
5391 for (unsigned I : llvm::seq<unsigned>(
5392 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5393 if (!Options.IsPrivateVarReduction[I]) {
5394 FilteredPrivates.emplace_back(OrgPrivates[I]);
5395 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5396 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5397 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5398 }
5399 }
5400 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5401 // processing.
5402 ArrayRef<const Expr *> Privates = FilteredPrivates;
5403 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5404 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5405 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5406
5407 // 1. Build a list of reduction variables.
5408 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5409 auto Size = RHSExprs.size();
5410 for (const Expr *E : Privates) {
5411 if (E->getType()->isVariablyModifiedType())
5412 // Reserve place for array size.
5413 ++Size;
5414 }
5415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5416 QualType ReductionArrayTy = C.getConstantArrayType(
5417 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5418 /*IndexTypeQuals=*/0);
5419 RawAddress ReductionList =
5420 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5421 const auto *IPriv = Privates.begin();
5422 unsigned Idx = 0;
5423 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5424 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5425 CGF.Builder.CreateStore(
5427 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5428 Elem);
5429 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5430 // Store array size.
5431 ++Idx;
5432 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5433 llvm::Value *Size = CGF.Builder.CreateIntCast(
5434 CGF.getVLASize(
5435 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5436 .NumElts,
5437 CGF.SizeTy, /*isSigned=*/false);
5438 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5439 Elem);
5440 }
5441 }
5442
5443 // 2. Emit reduce_func().
5444 llvm::Function *ReductionFn = emitReductionFunction(
5445 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5446 Privates, LHSExprs, RHSExprs, ReductionOps);
5447
5448 // 3. Create static kmp_critical_name lock = { 0 };
5449 std::string Name = getName({"reduction"});
5450 llvm::Value *Lock = getCriticalRegionLock(Name);
5451
5452 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5453 // RedList, reduce_func, &<lock>);
5454 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5455 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5456 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5457 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5458 ReductionList.getPointer(), CGF.VoidPtrTy);
5459 llvm::Value *Args[] = {
5460 IdentTLoc, // ident_t *<loc>
5461 ThreadId, // i32 <gtid>
5462 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5463 ReductionArrayTySize, // size_type sizeof(RedList)
5464 RL, // void *RedList
5465 ReductionFn, // void (*) (void *, void *) <reduce_func>
5466 Lock // kmp_critical_name *&<lock>
5467 };
5468 llvm::Value *Res = CGF.EmitRuntimeCall(
5469 OMPBuilder.getOrCreateRuntimeFunction(
5470 CGM.getModule(),
5471 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5472 Args);
5473
5474 // 5. Build switch(res)
5475 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5476 llvm::SwitchInst *SwInst =
5477 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5478
5479 // 6. Build case 1:
5480 // ...
5481 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5482 // ...
5483 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5484 // break;
5485 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5486 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5487 CGF.EmitBlock(Case1BB);
5488
5489 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5490 llvm::Value *EndArgs[] = {
5491 IdentTLoc, // ident_t *<loc>
5492 ThreadId, // i32 <gtid>
5493 Lock // kmp_critical_name *&<lock>
5494 };
5495 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5496 CodeGenFunction &CGF, PrePostActionTy &Action) {
5498 const auto *IPriv = Privates.begin();
5499 const auto *ILHS = LHSExprs.begin();
5500 const auto *IRHS = RHSExprs.begin();
5501 for (const Expr *E : ReductionOps) {
5502 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5503 cast<DeclRefExpr>(*IRHS));
5504 ++IPriv;
5505 ++ILHS;
5506 ++IRHS;
5507 }
5508 };
5510 CommonActionTy Action(
5511 nullptr, {},
5512 OMPBuilder.getOrCreateRuntimeFunction(
5513 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5514 : OMPRTL___kmpc_end_reduce),
5515 EndArgs);
5516 RCG.setAction(Action);
5517 RCG(CGF);
5518
5519 CGF.EmitBranch(DefaultBB);
5520
5521 // 7. Build case 2:
5522 // ...
5523 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5524 // ...
5525 // break;
5526 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5527 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5528 CGF.EmitBlock(Case2BB);
5529
5530 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5531 CodeGenFunction &CGF, PrePostActionTy &Action) {
5532 const auto *ILHS = LHSExprs.begin();
5533 const auto *IRHS = RHSExprs.begin();
5534 const auto *IPriv = Privates.begin();
5535 for (const Expr *E : ReductionOps) {
5536 const Expr *XExpr = nullptr;
5537 const Expr *EExpr = nullptr;
5538 const Expr *UpExpr = nullptr;
5539 BinaryOperatorKind BO = BO_Comma;
5540 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5541 if (BO->getOpcode() == BO_Assign) {
5542 XExpr = BO->getLHS();
5543 UpExpr = BO->getRHS();
5544 }
5545 }
5546 // Try to emit update expression as a simple atomic.
5547 const Expr *RHSExpr = UpExpr;
5548 if (RHSExpr) {
5549 // Analyze RHS part of the whole expression.
5550 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5551 RHSExpr->IgnoreParenImpCasts())) {
5552 // If this is a conditional operator, analyze its condition for
5553 // min/max reduction operator.
5554 RHSExpr = ACO->getCond();
5555 }
5556 if (const auto *BORHS =
5557 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5558 EExpr = BORHS->getRHS();
5559 BO = BORHS->getOpcode();
5560 }
5561 }
5562 if (XExpr) {
5563 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5564 auto &&AtomicRedGen = [BO, VD,
5565 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5566 const Expr *EExpr, const Expr *UpExpr) {
5567 LValue X = CGF.EmitLValue(XExpr);
5568 RValue E;
5569 if (EExpr)
5570 E = CGF.EmitAnyExpr(EExpr);
5571 CGF.EmitOMPAtomicSimpleUpdateExpr(
5572 X, E, BO, /*IsXLHSInRHSPart=*/true,
5573 llvm::AtomicOrdering::Monotonic, Loc,
5574 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5575 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5576 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5577 CGF.emitOMPSimpleStore(
5578 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5579 VD->getType().getNonReferenceType(), Loc);
5580 PrivateScope.addPrivate(VD, LHSTemp);
5581 (void)PrivateScope.Privatize();
5582 return CGF.EmitAnyExpr(UpExpr);
5583 });
5584 };
5585 if ((*IPriv)->getType()->isArrayType()) {
5586 // Emit atomic reduction for array section.
5587 const auto *RHSVar =
5588 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5589 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5590 AtomicRedGen, XExpr, EExpr, UpExpr);
5591 } else {
5592 // Emit atomic reduction for array subscript or single variable.
5593 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5594 }
5595 } else {
5596 // Emit as a critical region.
5597 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5598 const Expr *, const Expr *) {
5599 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5600 std::string Name = RT.getName({"atomic_reduction"});
5602 CGF, Name,
5603 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5604 Action.Enter(CGF);
5605 emitReductionCombiner(CGF, E);
5606 },
5607 Loc);
5608 };
5609 if ((*IPriv)->getType()->isArrayType()) {
5610 const auto *LHSVar =
5611 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5612 const auto *RHSVar =
5613 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5614 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5615 CritRedGen);
5616 } else {
5617 CritRedGen(CGF, nullptr, nullptr, nullptr);
5618 }
5619 }
5620 ++ILHS;
5621 ++IRHS;
5622 ++IPriv;
5623 }
5624 };
5625 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5626 if (!WithNowait) {
5627 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5628 llvm::Value *EndArgs[] = {
5629 IdentTLoc, // ident_t *<loc>
5630 ThreadId, // i32 <gtid>
5631 Lock // kmp_critical_name *&<lock>
5632 };
5633 CommonActionTy Action(nullptr, {},
5634 OMPBuilder.getOrCreateRuntimeFunction(
5635 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5636 EndArgs);
5637 AtomicRCG.setAction(Action);
5638 AtomicRCG(CGF);
5639 } else {
5640 AtomicRCG(CGF);
5641 }
5642
5643 CGF.EmitBranch(DefaultBB);
5644 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5645 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5646 "PrivateVarReduction: Privates size mismatch");
5647 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5648 "PrivateVarReduction: ReductionOps size mismatch");
5649 for (unsigned I : llvm::seq<unsigned>(
5650 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5651 if (Options.IsPrivateVarReduction[I])
5652 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5653 OrgRHSExprs[I], OrgReductionOps[I]);
5654 }
5655}
5656
5657/// Generates unique name for artificial threadprivate variables.
5658/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5659static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5660 const Expr *Ref) {
5661 SmallString<256> Buffer;
5662 llvm::raw_svector_ostream Out(Buffer);
5663 const clang::DeclRefExpr *DE;
5664 const VarDecl *D = ::getBaseDecl(Ref, DE);
5665 if (!D)
5666 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5667 D = D->getCanonicalDecl();
5668 std::string Name = CGM.getOpenMPRuntime().getName(
5669 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5670 Out << Prefix << Name << "_"
5672 return std::string(Out.str());
5673}
5674
5675/// Emits reduction initializer function:
5676/// \code
5677/// void @.red_init(void* %arg, void* %orig) {
5678/// %0 = bitcast void* %arg to <type>*
5679/// store <type> <init>, <type>* %0
5680/// ret void
5681/// }
5682/// \endcode
5683static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5684 SourceLocation Loc,
5685 ReductionCodeGen &RCG, unsigned N) {
5686 ASTContext &C = CGM.getContext();
5687 QualType VoidPtrTy = C.VoidPtrTy;
5688 VoidPtrTy.addRestrict();
5689 FunctionArgList Args;
5690 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5692 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5694 Args.emplace_back(&Param);
5695 Args.emplace_back(&ParamOrig);
5696 const auto &FnInfo =
5697 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5698 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5699 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5700 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5701 Name, &CGM.getModule());
5702 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5703 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5704 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5705 Fn->setDoesNotRecurse();
5706 CodeGenFunction CGF(CGM);
5707 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5708 QualType PrivateType = RCG.getPrivateType(N);
5709 Address PrivateAddr = CGF.EmitLoadOfPointer(
5710 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5711 C.getPointerType(PrivateType)->castAs<PointerType>());
5712 llvm::Value *Size = nullptr;
5713 // If the size of the reduction item is non-constant, load it from global
5714 // threadprivate variable.
5715 if (RCG.getSizes(N).second) {
5717 CGF, CGM.getContext().getSizeType(),
5718 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5719 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5720 CGM.getContext().getSizeType(), Loc);
5721 }
5722 RCG.emitAggregateType(CGF, N, Size);
5723 Address OrigAddr = Address::invalid();
5724 // If initializer uses initializer from declare reduction construct, emit a
5725 // pointer to the address of the original reduction item (reuired by reduction
5726 // initializer)
5727 if (RCG.usesReductionInitializer(N)) {
5728 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5729 OrigAddr = CGF.EmitLoadOfPointer(
5730 SharedAddr,
5731 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5732 }
5733 // Emit the initializer:
5734 // %0 = bitcast void* %arg to <type>*
5735 // store <type> <init>, <type>* %0
5736 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5737 [](CodeGenFunction &) { return false; });
5738 CGF.FinishFunction();
5739 return Fn;
5740}
5741
5742/// Emits reduction combiner function:
5743/// \code
5744/// void @.red_comb(void* %arg0, void* %arg1) {
5745/// %lhs = bitcast void* %arg0 to <type>*
5746/// %rhs = bitcast void* %arg1 to <type>*
5747/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5748/// store <type> %2, <type>* %lhs
5749/// ret void
5750/// }
5751/// \endcode
5752static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5753 SourceLocation Loc,
5754 ReductionCodeGen &RCG, unsigned N,
5755 const Expr *ReductionOp,
5756 const Expr *LHS, const Expr *RHS,
5757 const Expr *PrivateRef) {
5758 ASTContext &C = CGM.getContext();
5759 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5760 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5761 FunctionArgList Args;
5762 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5763 C.VoidPtrTy, ImplicitParamKind::Other);
5764 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5766 Args.emplace_back(&ParamInOut);
5767 Args.emplace_back(&ParamIn);
5768 const auto &FnInfo =
5769 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5770 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5771 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5772 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5773 Name, &CGM.getModule());
5774 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5775 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5776 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5777 Fn->setDoesNotRecurse();
5778 CodeGenFunction CGF(CGM);
5779 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5780 llvm::Value *Size = nullptr;
5781 // If the size of the reduction item is non-constant, load it from global
5782 // threadprivate variable.
5783 if (RCG.getSizes(N).second) {
5785 CGF, CGM.getContext().getSizeType(),
5786 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5787 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5788 CGM.getContext().getSizeType(), Loc);
5789 }
5790 RCG.emitAggregateType(CGF, N, Size);
5791 // Remap lhs and rhs variables to the addresses of the function arguments.
5792 // %lhs = bitcast void* %arg0 to <type>*
5793 // %rhs = bitcast void* %arg1 to <type>*
5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795 PrivateScope.addPrivate(
5796 LHSVD,
5797 // Pull out the pointer to the variable.
5799 CGF.GetAddrOfLocalVar(&ParamInOut)
5800 .withElementType(CGF.Builder.getPtrTy(0)),
5801 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5802 PrivateScope.addPrivate(
5803 RHSVD,
5804 // Pull out the pointer to the variable.
5806 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5807 CGF.Builder.getPtrTy(0)),
5808 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5809 PrivateScope.Privatize();
5810 // Emit the combiner body:
5811 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5812 // store <type> %2, <type>* %lhs
5814 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5815 cast<DeclRefExpr>(RHS));
5816 CGF.FinishFunction();
5817 return Fn;
5818}
5819
5820/// Emits reduction finalizer function:
5821/// \code
5822/// void @.red_fini(void* %arg) {
5823/// %0 = bitcast void* %arg to <type>*
5824/// <destroy>(<type>* %0)
5825/// ret void
5826/// }
5827/// \endcode
5828static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5829 SourceLocation Loc,
5830 ReductionCodeGen &RCG, unsigned N) {
5831 if (!RCG.needCleanups(N))
5832 return nullptr;
5833 ASTContext &C = CGM.getContext();
5834 FunctionArgList Args;
5835 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5837 Args.emplace_back(&Param);
5838 const auto &FnInfo =
5839 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5840 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5841 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5842 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5843 Name, &CGM.getModule());
5844 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5845 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5846 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5847 Fn->setDoesNotRecurse();
5848 CodeGenFunction CGF(CGM);
5849 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5850 Address PrivateAddr = CGF.EmitLoadOfPointer(
5851 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5852 llvm::Value *Size = nullptr;
5853 // If the size of the reduction item is non-constant, load it from global
5854 // threadprivate variable.
5855 if (RCG.getSizes(N).second) {
5857 CGF, CGM.getContext().getSizeType(),
5858 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5859 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5860 CGM.getContext().getSizeType(), Loc);
5861 }
5862 RCG.emitAggregateType(CGF, N, Size);
5863 // Emit the finalizer body:
5864 // <destroy>(<type>* %0)
5865 RCG.emitCleanups(CGF, N, PrivateAddr);
5866 CGF.FinishFunction(Loc);
5867 return Fn;
5868}
5869
5872 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5873 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5874 return nullptr;
5875
5876 // Build typedef struct:
5877 // kmp_taskred_input {
5878 // void *reduce_shar; // shared reduction item
5879 // void *reduce_orig; // original reduction item used for initialization
5880 // size_t reduce_size; // size of data item
5881 // void *reduce_init; // data initialization routine
5882 // void *reduce_fini; // data finalization routine
5883 // void *reduce_comb; // data combiner routine
5884 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5885 // } kmp_taskred_input_t;
5886 ASTContext &C = CGM.getContext();
5887 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5888 RD->startDefinition();
5889 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5890 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5891 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5892 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5893 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5894 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5895 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5896 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5897 RD->completeDefinition();
5898 CanQualType RDType = C.getCanonicalTagType(RD);
5899 unsigned Size = Data.ReductionVars.size();
5900 llvm::APInt ArraySize(/*numBits=*/64, Size);
5901 QualType ArrayRDType =
5902 C.getConstantArrayType(RDType, ArraySize, nullptr,
5903 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5904 // kmp_task_red_input_t .rd_input.[Size];
5905 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5906 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5907 Data.ReductionCopies, Data.ReductionOps);
5908 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5909 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5910 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5911 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5912 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5913 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5914 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5915 ".rd_input.gep.");
5916 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5917 // ElemLVal.reduce_shar = &Shareds[Cnt];
5918 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5919 RCG.emitSharedOrigLValue(CGF, Cnt);
5920 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5921 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5922 // ElemLVal.reduce_orig = &Origs[Cnt];
5923 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5924 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5925 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5926 RCG.emitAggregateType(CGF, Cnt);
5927 llvm::Value *SizeValInChars;
5928 llvm::Value *SizeVal;
5929 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5930 // We use delayed creation/initialization for VLAs and array sections. It is
5931 // required because runtime does not provide the way to pass the sizes of
5932 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5933 // threadprivate global variables are used to store these values and use
5934 // them in the functions.
5935 bool DelayedCreation = !!SizeVal;
5936 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5937 /*isSigned=*/false);
5938 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5939 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5940 // ElemLVal.reduce_init = init;
5941 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5942 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5943 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5944 // ElemLVal.reduce_fini = fini;
5945 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5946 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5947 llvm::Value *FiniAddr =
5948 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5949 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5950 // ElemLVal.reduce_comb = comb;
5951 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5952 llvm::Value *CombAddr = emitReduceCombFunction(
5953 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5954 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5955 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5956 // ElemLVal.flags = 0;
5957 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5958 if (DelayedCreation) {
5960 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5961 FlagsLVal);
5962 } else
5963 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5964 }
5965 if (Data.IsReductionWithTaskMod) {
5966 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5967 // is_ws, int num, void *data);
5968 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5969 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5970 CGM.IntTy, /*isSigned=*/true);
5971 llvm::Value *Args[] = {
5972 IdentTLoc, GTid,
5973 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5974 /*isSigned=*/true),
5975 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5977 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5978 return CGF.EmitRuntimeCall(
5979 OMPBuilder.getOrCreateRuntimeFunction(
5980 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5981 Args);
5982 }
5983 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5984 llvm::Value *Args[] = {
5985 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5986 /*isSigned=*/true),
5987 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5989 CGM.VoidPtrTy)};
5990 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5991 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5992 Args);
5993}
5994
5996 SourceLocation Loc,
5997 bool IsWorksharingReduction) {
5998 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5999 // is_ws, int num, void *data);
6000 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6001 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6002 CGM.IntTy, /*isSigned=*/true);
6003 llvm::Value *Args[] = {IdentTLoc, GTid,
6004 llvm::ConstantInt::get(CGM.IntTy,
6005 IsWorksharingReduction ? 1 : 0,
6006 /*isSigned=*/true)};
6007 (void)CGF.EmitRuntimeCall(
6008 OMPBuilder.getOrCreateRuntimeFunction(
6009 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6010 Args);
6011}
6012
6014 SourceLocation Loc,
6015 ReductionCodeGen &RCG,
6016 unsigned N) {
6017 auto Sizes = RCG.getSizes(N);
6018 // Emit threadprivate global variable if the type is non-constant
6019 // (Sizes.second = nullptr).
6020 if (Sizes.second) {
6021 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6022 /*isSigned=*/false);
6024 CGF, CGM.getContext().getSizeType(),
6025 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6026 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6027 }
6028}
6029
6031 SourceLocation Loc,
6032 llvm::Value *ReductionsPtr,
6033 LValue SharedLVal) {
6034 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6035 // *d);
6036 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6037 CGM.IntTy,
6038 /*isSigned=*/true),
6039 ReductionsPtr,
6041 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6042 return Address(
6043 CGF.EmitRuntimeCall(
6044 OMPBuilder.getOrCreateRuntimeFunction(
6045 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6046 Args),
6047 CGF.Int8Ty, SharedLVal.getAlignment());
6048}
6049
6051 const OMPTaskDataTy &Data) {
6052 if (!CGF.HaveInsertPoint())
6053 return;
6054
6055 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6056 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6057 OMPBuilder.createTaskwait(CGF.Builder);
6058 } else {
6059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6060 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6061 auto &M = CGM.getModule();
6062 Address DependenciesArray = Address::invalid();
6063 llvm::Value *NumOfElements;
6064 std::tie(NumOfElements, DependenciesArray) =
6065 emitDependClause(CGF, Data.Dependences, Loc);
6066 if (!Data.Dependences.empty()) {
6067 llvm::Value *DepWaitTaskArgs[7];
6068 DepWaitTaskArgs[0] = UpLoc;
6069 DepWaitTaskArgs[1] = ThreadID;
6070 DepWaitTaskArgs[2] = NumOfElements;
6071 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6072 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6073 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6074 DepWaitTaskArgs[6] =
6075 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
6076
6077 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6078
6079 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6080 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6081 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6082 // kmp_int32 has_no_wait); if dependence info is specified.
6083 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6084 M, OMPRTL___kmpc_omp_taskwait_deps_51),
6085 DepWaitTaskArgs);
6086
6087 } else {
6088
6089 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6090 // global_tid);
6091 llvm::Value *Args[] = {UpLoc, ThreadID};
6092 // Ignore return result until untied tasks are supported.
6093 CGF.EmitRuntimeCall(
6094 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6095 Args);
6096 }
6097 }
6098
6099 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6100 Region->emitUntiedSwitch(CGF);
6101}
6102
6104 OpenMPDirectiveKind InnerKind,
6105 const RegionCodeGenTy &CodeGen,
6106 bool HasCancel) {
6107 if (!CGF.HaveInsertPoint())
6108 return;
6109 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6110 InnerKind != OMPD_critical &&
6111 InnerKind != OMPD_master &&
6112 InnerKind != OMPD_masked);
6113 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6114}
6115
6116namespace {
6117enum RTCancelKind {
6118 CancelNoreq = 0,
6119 CancelParallel = 1,
6120 CancelLoop = 2,
6121 CancelSections = 3,
6122 CancelTaskgroup = 4
6123};
6124} // anonymous namespace
6125
6126static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6127 RTCancelKind CancelKind = CancelNoreq;
6128 if (CancelRegion == OMPD_parallel)
6129 CancelKind = CancelParallel;
6130 else if (CancelRegion == OMPD_for)
6131 CancelKind = CancelLoop;
6132 else if (CancelRegion == OMPD_sections)
6133 CancelKind = CancelSections;
6134 else {
6135 assert(CancelRegion == OMPD_taskgroup);
6136 CancelKind = CancelTaskgroup;
6137 }
6138 return CancelKind;
6139}
6140
6143 OpenMPDirectiveKind CancelRegion) {
6144 if (!CGF.HaveInsertPoint())
6145 return;
6146 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6147 // global_tid, kmp_int32 cncl_kind);
6148 if (auto *OMPRegionInfo =
6149 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6150 // For 'cancellation point taskgroup', the task region info may not have a
6151 // cancel. This may instead happen in another adjacent task.
6152 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6153 llvm::Value *Args[] = {
6154 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6155 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6156 // Ignore return result until untied tasks are supported.
6157 llvm::Value *Result = CGF.EmitRuntimeCall(
6158 OMPBuilder.getOrCreateRuntimeFunction(
6159 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6160 Args);
6161 // if (__kmpc_cancellationpoint()) {
6162 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6163 // exit from construct;
6164 // }
6165 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6166 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6167 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6168 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6169 CGF.EmitBlock(ExitBB);
6170 if (CancelRegion == OMPD_parallel)
6171 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6172 // exit from construct;
6173 CodeGenFunction::JumpDest CancelDest =
6174 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6175 CGF.EmitBranchThroughCleanup(CancelDest);
6176 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6177 }
6178 }
6179}
6180
6182 const Expr *IfCond,
6183 OpenMPDirectiveKind CancelRegion) {
6184 if (!CGF.HaveInsertPoint())
6185 return;
6186 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6187 // kmp_int32 cncl_kind);
6188 auto &M = CGM.getModule();
6189 if (auto *OMPRegionInfo =
6190 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6191 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6192 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6193 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6194 llvm::Value *Args[] = {
6195 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6196 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6197 // Ignore return result until untied tasks are supported.
6198 llvm::Value *Result = CGF.EmitRuntimeCall(
6199 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6200 // if (__kmpc_cancel()) {
6201 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6202 // exit from construct;
6203 // }
6204 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6205 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6206 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6207 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6208 CGF.EmitBlock(ExitBB);
6209 if (CancelRegion == OMPD_parallel)
6210 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6211 // exit from construct;
6212 CodeGenFunction::JumpDest CancelDest =
6213 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6214 CGF.EmitBranchThroughCleanup(CancelDest);
6215 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6216 };
6217 if (IfCond) {
6218 emitIfClause(CGF, IfCond, ThenGen,
6219 [](CodeGenFunction &, PrePostActionTy &) {});
6220 } else {
6221 RegionCodeGenTy ThenRCG(ThenGen);
6222 ThenRCG(CGF);
6223 }
6224 }
6225}
6226
6227namespace {
6228/// Cleanup action for uses_allocators support.
6229class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6231
6232public:
6233 OMPUsesAllocatorsActionTy(
6234 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6235 : Allocators(Allocators) {}
6236 void Enter(CodeGenFunction &CGF) override {
6237 if (!CGF.HaveInsertPoint())
6238 return;
6239 for (const auto &AllocatorData : Allocators) {
6241 CGF, AllocatorData.first, AllocatorData.second);
6242 }
6243 }
6244 void Exit(CodeGenFunction &CGF) override {
6245 if (!CGF.HaveInsertPoint())
6246 return;
6247 for (const auto &AllocatorData : Allocators) {
6249 AllocatorData.first);
6250 }
6251 }
6252};
6253} // namespace
6254
6256 const OMPExecutableDirective &D, StringRef ParentName,
6257 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6258 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6259 assert(!ParentName.empty() && "Invalid target entry parent name!");
6262 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6263 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6264 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6265 if (!D.AllocatorTraits)
6266 continue;
6267 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6268 }
6269 }
6270 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6271 CodeGen.setAction(UsesAllocatorAction);
6272 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6273 IsOffloadEntry, CodeGen);
6274}
6275
6277 const Expr *Allocator,
6278 const Expr *AllocatorTraits) {
6279 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6280 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6281 // Use default memspace handle.
6282 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6283 llvm::Value *NumTraits = llvm::ConstantInt::get(
6285 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6286 ->getSize()
6287 .getLimitedValue());
6288 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6290 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6291 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6292 AllocatorTraitsLVal.getBaseInfo(),
6293 AllocatorTraitsLVal.getTBAAInfo());
6294 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6295
6296 llvm::Value *AllocatorVal =
6297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6298 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6299 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6300 // Store to allocator.
6302 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6303 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6304 AllocatorVal =
6305 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6306 Allocator->getType(), Allocator->getExprLoc());
6307 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6308}
6309
6311 const Expr *Allocator) {
6312 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6313 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6314 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6317 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6318 CGF.getContext().VoidPtrTy,
6319 Allocator->getExprLoc());
6320 (void)CGF.EmitRuntimeCall(
6321 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6322 OMPRTL___kmpc_destroy_allocator),
6323 {ThreadId, AllocatorVal});
6324}
6325
6328 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6329 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6330 "invalid default attrs structure");
6331 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6332 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6333
6334 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6335 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6336 /*UpperBoundOnly=*/true);
6337
6338 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6339 for (auto *A : C->getAttrs()) {
6340 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6341 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6342 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6343 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6344 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6345 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6346 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6347 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6348 &AttrMaxThreadsVal);
6349 else
6350 continue;
6351
6352 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6353 if (AttrMaxThreadsVal > 0)
6354 MaxThreadsVal = MaxThreadsVal > 0
6355 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6356 : AttrMaxThreadsVal;
6357 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6358 if (AttrMaxBlocksVal > 0)
6359 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6360 : AttrMaxBlocksVal;
6361 }
6362 }
6363}
6364
6366 const OMPExecutableDirective &D, StringRef ParentName,
6367 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6368 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6369
6370 llvm::TargetRegionEntryInfo EntryInfo =
6371 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6372
6373 CodeGenFunction CGF(CGM, true);
6374 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6375 [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
6376 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6377
6378 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6379 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6380 if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
6382 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6383 };
6384
6385 cantFail(OMPBuilder.emitTargetRegionFunction(
6386 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6387 OutlinedFnID));
6388
6389 if (!OutlinedFn)
6390 return;
6391
6392 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6393
6394 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6395 for (auto *A : C->getAttrs()) {
6396 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6397 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6398 }
6399 }
6400 registerVTable(D);
6401}
6402
6403/// Checks if the expression is constant or does not have non-trivial function
6404/// calls.
6405static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6406 // We can skip constant expressions.
6407 // We can skip expressions with trivial calls or simple expressions.
6409 !E->hasNonTrivialCall(Ctx)) &&
6410 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6411}
6412
6414 const Stmt *Body) {
6415 const Stmt *Child = Body->IgnoreContainers();
6416 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6417 Child = nullptr;
6418 for (const Stmt *S : C->body()) {
6419 if (const auto *E = dyn_cast<Expr>(S)) {
6420 if (isTrivial(Ctx, E))
6421 continue;
6422 }
6423 // Some of the statements can be ignored.
6426 continue;
6427 // Analyze declarations.
6428 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6429 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6430 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6431 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6432 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6433 isa<UsingDirectiveDecl>(D) ||
6434 isa<OMPDeclareReductionDecl>(D) ||
6435 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6436 return true;
6437 const auto *VD = dyn_cast<VarDecl>(D);
6438 if (!VD)
6439 return false;
6440 return VD->hasGlobalStorage() || !VD->isUsed();
6441 }))
6442 continue;
6443 }
6444 // Found multiple children - cannot get the one child only.
6445 if (Child)
6446 return nullptr;
6447 Child = S;
6448 }
6449 if (Child)
6450 Child = Child->IgnoreContainers();
6451 }
6452 return Child;
6453}
6454
6456 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6457 int32_t &MaxTeamsVal) {
6458
6459 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6460 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6461 "Expected target-based executable directive.");
6462 switch (DirectiveKind) {
6463 case OMPD_target: {
6464 const auto *CS = D.getInnermostCapturedStmt();
6465 const auto *Body =
6466 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6467 const Stmt *ChildStmt =
6469 if (const auto *NestedDir =
6470 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6471 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6472 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6473 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6474 ->getNumTeams()
6475 .front();
6476 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6477 if (auto Constant =
6478 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6479 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6480 return NumTeams;
6481 }
6482 MinTeamsVal = MaxTeamsVal = 0;
6483 return nullptr;
6484 }
6485 MinTeamsVal = MaxTeamsVal = 1;
6486 return nullptr;
6487 }
6488 // A value of -1 is used to check if we need to emit no teams region
6489 MinTeamsVal = MaxTeamsVal = -1;
6490 return nullptr;
6491 }
6492 case OMPD_target_teams_loop:
6493 case OMPD_target_teams:
6494 case OMPD_target_teams_distribute:
6495 case OMPD_target_teams_distribute_simd:
6496 case OMPD_target_teams_distribute_parallel_for:
6497 case OMPD_target_teams_distribute_parallel_for_simd: {
6498 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6499 const Expr *NumTeams =
6500 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6501 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6502 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6503 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6504 return NumTeams;
6505 }
6506 MinTeamsVal = MaxTeamsVal = 0;
6507 return nullptr;
6508 }
6509 case OMPD_target_parallel:
6510 case OMPD_target_parallel_for:
6511 case OMPD_target_parallel_for_simd:
6512 case OMPD_target_parallel_loop:
6513 case OMPD_target_simd:
6514 MinTeamsVal = MaxTeamsVal = 1;
6515 return nullptr;
6516 case OMPD_parallel:
6517 case OMPD_for:
6518 case OMPD_parallel_for:
6519 case OMPD_parallel_loop:
6520 case OMPD_parallel_master:
6521 case OMPD_parallel_sections:
6522 case OMPD_for_simd:
6523 case OMPD_parallel_for_simd:
6524 case OMPD_cancel:
6525 case OMPD_cancellation_point:
6526 case OMPD_ordered:
6527 case OMPD_threadprivate:
6528 case OMPD_allocate:
6529 case OMPD_task:
6530 case OMPD_simd:
6531 case OMPD_tile:
6532 case OMPD_unroll:
6533 case OMPD_sections:
6534 case OMPD_section:
6535 case OMPD_single:
6536 case OMPD_master:
6537 case OMPD_critical:
6538 case OMPD_taskyield:
6539 case OMPD_barrier:
6540 case OMPD_taskwait:
6541 case OMPD_taskgroup:
6542 case OMPD_atomic:
6543 case OMPD_flush:
6544 case OMPD_depobj:
6545 case OMPD_scan:
6546 case OMPD_teams:
6547 case OMPD_target_data:
6548 case OMPD_target_exit_data:
6549 case OMPD_target_enter_data:
6550 case OMPD_distribute:
6551 case OMPD_distribute_simd:
6552 case OMPD_distribute_parallel_for:
6553 case OMPD_distribute_parallel_for_simd:
6554 case OMPD_teams_distribute:
6555 case OMPD_teams_distribute_simd:
6556 case OMPD_teams_distribute_parallel_for:
6557 case OMPD_teams_distribute_parallel_for_simd:
6558 case OMPD_target_update:
6559 case OMPD_declare_simd:
6560 case OMPD_declare_variant:
6561 case OMPD_begin_declare_variant:
6562 case OMPD_end_declare_variant:
6563 case OMPD_declare_target:
6564 case OMPD_end_declare_target:
6565 case OMPD_declare_reduction:
6566 case OMPD_declare_mapper:
6567 case OMPD_taskloop:
6568 case OMPD_taskloop_simd:
6569 case OMPD_master_taskloop:
6570 case OMPD_master_taskloop_simd:
6571 case OMPD_parallel_master_taskloop:
6572 case OMPD_parallel_master_taskloop_simd:
6573 case OMPD_requires:
6574 case OMPD_metadirective:
6575 case OMPD_unknown:
6576 break;
6577 default:
6578 break;
6579 }
6580 llvm_unreachable("Unexpected directive kind.");
6581}
6582
6584 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6585 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6586 "Clauses associated with the teams directive expected to be emitted "
6587 "only for the host!");
6588 CGBuilderTy &Bld = CGF.Builder;
6589 int32_t MinNT = -1, MaxNT = -1;
6590 const Expr *NumTeams =
6591 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6592 if (NumTeams != nullptr) {
6593 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6594
6595 switch (DirectiveKind) {
6596 case OMPD_target: {
6597 const auto *CS = D.getInnermostCapturedStmt();
6598 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6599 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6600 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6601 /*IgnoreResultAssign*/ true);
6602 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6603 /*isSigned=*/true);
6604 }
6605 case OMPD_target_teams:
6606 case OMPD_target_teams_distribute:
6607 case OMPD_target_teams_distribute_simd:
6608 case OMPD_target_teams_distribute_parallel_for:
6609 case OMPD_target_teams_distribute_parallel_for_simd: {
6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6612 /*IgnoreResultAssign*/ true);
6613 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6614 /*isSigned=*/true);
6615 }
6616 default:
6617 break;
6618 }
6619 }
6620
6621 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6622 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6623}
6624
6625/// Check for a num threads constant value (stored in \p DefaultVal), or
6626/// expression (stored in \p E). If the value is conditional (via an if-clause),
6627/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6628/// nullptr, no expression evaluation is perfomed.
6629static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6630 const Expr **E, int32_t &UpperBound,
6631 bool UpperBoundOnly, llvm::Value **CondVal) {
6633 CGF.getContext(), CS->getCapturedStmt());
6634 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6635 if (!Dir)
6636 return;
6637
6638 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6639 // Handle if clause. If if clause present, the number of threads is
6640 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6641 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6642 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6643 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6644 const OMPIfClause *IfClause = nullptr;
6645 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6646 if (C->getNameModifier() == OMPD_unknown ||
6647 C->getNameModifier() == OMPD_parallel) {
6648 IfClause = C;
6649 break;
6650 }
6651 }
6652 if (IfClause) {
6653 const Expr *CondExpr = IfClause->getCondition();
6654 bool Result;
6655 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6656 if (!Result) {
6657 UpperBound = 1;
6658 return;
6659 }
6660 } else {
6662 if (const auto *PreInit =
6663 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6664 for (const auto *I : PreInit->decls()) {
6665 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6666 CGF.EmitVarDecl(cast<VarDecl>(*I));
6667 } else {
6670 CGF.EmitAutoVarCleanups(Emission);
6671 }
6672 }
6673 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6674 }
6675 }
6676 }
6677 }
6678 // Check the value of num_threads clause iff if clause was not specified
6679 // or is not evaluated to false.
6680 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6681 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6682 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6683 const auto *NumThreadsClause =
6684 Dir->getSingleClause<OMPNumThreadsClause>();
6685 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6686 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6687 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6688 UpperBound =
6689 UpperBound
6690 ? Constant->getZExtValue()
6691 : std::min(UpperBound,
6692 static_cast<int32_t>(Constant->getZExtValue()));
6693 // If we haven't found a upper bound, remember we saw a thread limiting
6694 // clause.
6695 if (UpperBound == -1)
6696 UpperBound = 0;
6697 if (!E)
6698 return;
6699 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6700 if (const auto *PreInit =
6701 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6702 for (const auto *I : PreInit->decls()) {
6703 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6704 CGF.EmitVarDecl(cast<VarDecl>(*I));
6705 } else {
6708 CGF.EmitAutoVarCleanups(Emission);
6709 }
6710 }
6711 }
6712 *E = NTExpr;
6713 }
6714 return;
6715 }
6716 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6717 UpperBound = 1;
6718}
6719
6721 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6722 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6723 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6724 "Clauses associated with the teams directive expected to be emitted "
6725 "only for the host!");
6726 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6727 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6728 "Expected target-based executable directive.");
6729
6730 const Expr *NT = nullptr;
6731 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6732
6733 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6734 if (E->isIntegerConstantExpr(CGF.getContext())) {
6735 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6736 UpperBound = UpperBound ? Constant->getZExtValue()
6737 : std::min(UpperBound,
6738 int32_t(Constant->getZExtValue()));
6739 }
6740 // If we haven't found a upper bound, remember we saw a thread limiting
6741 // clause.
6742 if (UpperBound == -1)
6743 UpperBound = 0;
6744 if (EPtr)
6745 *EPtr = E;
6746 };
6747
6748 auto ReturnSequential = [&]() {
6749 UpperBound = 1;
6750 return NT;
6751 };
6752
6753 switch (DirectiveKind) {
6754 case OMPD_target: {
6755 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6756 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6758 CGF.getContext(), CS->getCapturedStmt());
6759 // TODO: The standard is not clear how to resolve two thread limit clauses,
6760 // let's pick the teams one if it's present, otherwise the target one.
6761 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6762 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6763 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6764 ThreadLimitClause = TLC;
6765 if (ThreadLimitExpr) {
6766 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6767 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6769 CGF,
6770 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6771 if (const auto *PreInit =
6772 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6773 for (const auto *I : PreInit->decls()) {
6774 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6775 CGF.EmitVarDecl(cast<VarDecl>(*I));
6776 } else {
6779 CGF.EmitAutoVarCleanups(Emission);
6780 }
6781 }
6782 }
6783 }
6784 }
6785 }
6786 if (ThreadLimitClause)
6787 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6788 ThreadLimitExpr);
6789 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6790 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6791 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6792 CS = Dir->getInnermostCapturedStmt();
6794 CGF.getContext(), CS->getCapturedStmt());
6795 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6796 }
6797 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6798 CS = Dir->getInnermostCapturedStmt();
6799 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6800 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6801 return ReturnSequential();
6802 }
6803 return NT;
6804 }
6805 case OMPD_target_teams: {
6806 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6807 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6808 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6809 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6810 ThreadLimitExpr);
6811 }
6812 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6813 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6815 CGF.getContext(), CS->getCapturedStmt());
6816 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6817 if (Dir->getDirectiveKind() == OMPD_distribute) {
6818 CS = Dir->getInnermostCapturedStmt();
6819 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6820 }
6821 }
6822 return NT;
6823 }
6824 case OMPD_target_teams_distribute:
6825 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6826 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6827 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6828 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6829 ThreadLimitExpr);
6830 }
6831 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6832 UpperBoundOnly, CondVal);
6833 return NT;
6834 case OMPD_target_teams_loop:
6835 case OMPD_target_parallel_loop:
6836 case OMPD_target_parallel:
6837 case OMPD_target_parallel_for:
6838 case OMPD_target_parallel_for_simd:
6839 case OMPD_target_teams_distribute_parallel_for:
6840 case OMPD_target_teams_distribute_parallel_for_simd: {
6841 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6842 const OMPIfClause *IfClause = nullptr;
6843 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6844 if (C->getNameModifier() == OMPD_unknown ||
6845 C->getNameModifier() == OMPD_parallel) {
6846 IfClause = C;
6847 break;
6848 }
6849 }
6850 if (IfClause) {
6851 const Expr *Cond = IfClause->getCondition();
6852 bool Result;
6853 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6854 if (!Result)
6855 return ReturnSequential();
6856 } else {
6858 *CondVal = CGF.EvaluateExprAsBool(Cond);
6859 }
6860 }
6861 }
6862 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6863 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6864 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6865 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6866 ThreadLimitExpr);
6867 }
6868 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6869 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6870 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6871 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6872 return NumThreadsClause->getNumThreads();
6873 }
6874 return NT;
6875 }
6876 case OMPD_target_teams_distribute_simd:
6877 case OMPD_target_simd:
6878 return ReturnSequential();
6879 default:
6880 break;
6881 }
6882 llvm_unreachable("Unsupported directive kind.");
6883}
6884
6886 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6887 llvm::Value *NumThreadsVal = nullptr;
6888 llvm::Value *CondVal = nullptr;
6889 llvm::Value *ThreadLimitVal = nullptr;
6890 const Expr *ThreadLimitExpr = nullptr;
6891 int32_t UpperBound = -1;
6892
6894 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6895 &ThreadLimitExpr);
6896
6897 // Thread limit expressions are used below, emit them.
6898 if (ThreadLimitExpr) {
6899 ThreadLimitVal =
6900 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6901 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6902 /*isSigned=*/false);
6903 }
6904
6905 // Generate the num teams expression.
6906 if (UpperBound == 1) {
6907 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6908 } else if (NT) {
6909 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6910 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6911 /*isSigned=*/false);
6912 } else if (ThreadLimitVal) {
6913 // If we do not have a num threads value but a thread limit, replace the
6914 // former with the latter. We know handled the thread limit expression.
6915 NumThreadsVal = ThreadLimitVal;
6916 ThreadLimitVal = nullptr;
6917 } else {
6918 // Default to "0" which means runtime choice.
6919 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6920 NumThreadsVal = CGF.Builder.getInt32(0);
6921 }
6922
6923 // Handle if clause. If if clause present, the number of threads is
6924 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6925 if (CondVal) {
6927 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6928 CGF.Builder.getInt32(1));
6929 }
6930
6931 // If the thread limit and num teams expression were present, take the
6932 // minimum.
6933 if (ThreadLimitVal) {
6934 NumThreadsVal = CGF.Builder.CreateSelect(
6935 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6936 ThreadLimitVal, NumThreadsVal);
6937 }
6938
6939 return NumThreadsVal;
6940}
6941
6942namespace {
6944
6945// Utility to handle information from clauses associated with a given
6946// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6947// It provides a convenient interface to obtain the information and generate
6948// code for that information.
6949class MappableExprsHandler {
6950public:
6951 /// Custom comparator for attach-pointer expressions that compares them by
6952 /// complexity (i.e. their component-depth) first, then by the order in which
6953 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6954 /// different.
6955 struct AttachPtrExprComparator {
6956 const MappableExprsHandler &Handler;
6957 // Cache of previous equality comparison results.
6958 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6959 CachedEqualityComparisons;
6960
6961 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6962 AttachPtrExprComparator() = delete;
6963
6964 // Return true iff LHS is "less than" RHS.
6965 bool operator()(const Expr *LHS, const Expr *RHS) const {
6966 if (LHS == RHS)
6967 return false;
6968
6969 // First, compare by complexity (depth)
6970 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6971 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6972
6973 std::optional<size_t> DepthLHS =
6974 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6975 : std::nullopt;
6976 std::optional<size_t> DepthRHS =
6977 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6978 : std::nullopt;
6979
6980 // std::nullopt (no attach pointer) has lowest complexity
6981 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6982 // Both have same complexity, now check semantic equality
6983 if (areEqual(LHS, RHS))
6984 return false;
6985 // Different semantically, compare by computation order
6986 return wasComputedBefore(LHS, RHS);
6987 }
6988 if (!DepthLHS.has_value())
6989 return true; // LHS has lower complexity
6990 if (!DepthRHS.has_value())
6991 return false; // RHS has lower complexity
6992
6993 // Both have values, compare by depth (lower depth = lower complexity)
6994 if (DepthLHS.value() != DepthRHS.value())
6995 return DepthLHS.value() < DepthRHS.value();
6996
6997 // Same complexity, now check semantic equality
6998 if (areEqual(LHS, RHS))
6999 return false;
7000 // Different semantically, compare by computation order
7001 return wasComputedBefore(LHS, RHS);
7002 }
7003
7004 public:
7005 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7006 /// results, if available, otherwise does a recursive semantic comparison.
7007 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7008 // Check cache first for faster lookup
7009 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
7010 if (CachedResultIt != CachedEqualityComparisons.end())
7011 return CachedResultIt->second;
7012
7013 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7014
7015 // Cache the result for future lookups (both orders since semantic
7016 // equality is commutative)
7017 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7018 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7019 return ComparisonResult;
7020 }
7021
7022 /// Compare the two attach-ptr expressions by their computation order.
7023 /// Returns true iff LHS was computed before RHS by
7024 /// collectAttachPtrExprInfo().
7025 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7026 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
7027 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
7028
7029 return OrderLHS < OrderRHS;
7030 }
7031
7032 private:
7033 /// Helper function to compare attach-pointer expressions semantically.
7034 /// This function handles various expression types that can be part of an
7035 /// attach-pointer.
7036 /// TODO: Not urgent, but we should ideally return true when comparing
7037 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7038 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7039 if (LHS == RHS)
7040 return true;
7041
7042 // If only one is null, they aren't equal
7043 if (!LHS || !RHS)
7044 return false;
7045
7046 ASTContext &Ctx = Handler.CGF.getContext();
7047 // Strip away parentheses and no-op casts to get to the core expression
7048 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7049 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7050
7051 // Direct pointer comparison of the underlying expressions
7052 if (LHS == RHS)
7053 return true;
7054
7055 // Check if the expression classes match
7056 if (LHS->getStmtClass() != RHS->getStmtClass())
7057 return false;
7058
7059 // Handle DeclRefExpr (variable references)
7060 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
7061 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
7062 if (!RD)
7063 return false;
7064 return LD->getDecl()->getCanonicalDecl() ==
7065 RD->getDecl()->getCanonicalDecl();
7066 }
7067
7068 // Handle ArraySubscriptExpr (array indexing like a[i])
7069 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
7070 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
7071 if (!RA)
7072 return false;
7073 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
7074 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
7075 }
7076
7077 // Handle MemberExpr (member access like s.m or p->m)
7078 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
7079 const auto *RM = dyn_cast<MemberExpr>(RHS);
7080 if (!RM)
7081 return false;
7082 if (LM->getMemberDecl()->getCanonicalDecl() !=
7083 RM->getMemberDecl()->getCanonicalDecl())
7084 return false;
7085 return areSemanticallyEqual(LM->getBase(), RM->getBase());
7086 }
7087
7088 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7089 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
7090 const auto *RU = dyn_cast<UnaryOperator>(RHS);
7091 if (!RU)
7092 return false;
7093 if (LU->getOpcode() != RU->getOpcode())
7094 return false;
7095 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
7096 }
7097
7098 // Handle BinaryOperator (binary operations like p + offset)
7099 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
7100 const auto *RB = dyn_cast<BinaryOperator>(RHS);
7101 if (!RB)
7102 return false;
7103 if (LB->getOpcode() != RB->getOpcode())
7104 return false;
7105 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
7106 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
7107 }
7108
7109 // Handle ArraySectionExpr (array sections like a[0:1])
7110 // Attach pointers should not contain array-sections, but currently we
7111 // don't emit an error.
7112 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
7113 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
7114 if (!RAS)
7115 return false;
7116 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
7117 areSemanticallyEqual(LAS->getLowerBound(),
7118 RAS->getLowerBound()) &&
7119 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
7120 }
7121
7122 // Handle CastExpr (explicit casts)
7123 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
7124 const auto *RC = dyn_cast<CastExpr>(RHS);
7125 if (!RC)
7126 return false;
7127 if (LC->getCastKind() != RC->getCastKind())
7128 return false;
7129 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
7130 }
7131
7132 // Handle CXXThisExpr (this pointer)
7133 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
7134 return true;
7135
7136 // Handle IntegerLiteral (integer constants)
7137 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
7138 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
7139 if (!RI)
7140 return false;
7141 return LI->getValue() == RI->getValue();
7142 }
7143
7144 // Handle CharacterLiteral (character constants)
7145 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7146 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7147 if (!RC)
7148 return false;
7149 return LC->getValue() == RC->getValue();
7150 }
7151
7152 // Handle FloatingLiteral (floating point constants)
7153 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7154 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7155 if (!RF)
7156 return false;
7157 // Use bitwise comparison for floating point literals
7158 return LF->getValue().bitwiseIsEqual(RF->getValue());
7159 }
7160
7161 // Handle StringLiteral (string constants)
7162 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7163 const auto *RS = dyn_cast<StringLiteral>(RHS);
7164 if (!RS)
7165 return false;
7166 return LS->getString() == RS->getString();
7167 }
7168
7169 // Handle CXXNullPtrLiteralExpr (nullptr)
7171 return true;
7172
7173 // Handle CXXBoolLiteralExpr (true/false)
7174 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7175 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7176 if (!RB)
7177 return false;
7178 return LB->getValue() == RB->getValue();
7179 }
7180
7181 // Fallback for other forms - use the existing comparison method
7182 return Expr::isSameComparisonOperand(LHS, RHS);
7183 }
7184 };
7185
7186 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7187 static unsigned getFlagMemberOffset() {
7188 unsigned Offset = 0;
7189 for (uint64_t Remain =
7190 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7191 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7192 !(Remain & 1); Remain = Remain >> 1)
7193 Offset++;
7194 return Offset;
7195 }
7196
7197 /// Class that holds debugging information for a data mapping to be passed to
7198 /// the runtime library.
7199 class MappingExprInfo {
7200 /// The variable declaration used for the data mapping.
7201 const ValueDecl *MapDecl = nullptr;
7202 /// The original expression used in the map clause, or null if there is
7203 /// none.
7204 const Expr *MapExpr = nullptr;
7205
7206 public:
7207 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7208 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7209
7210 const ValueDecl *getMapDecl() const { return MapDecl; }
7211 const Expr *getMapExpr() const { return MapExpr; }
7212 };
7213
7214 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7215 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7216 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7217 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7218 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7219 using MapNonContiguousArrayTy =
7220 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7221 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7222 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7223 using MapData =
7225 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7226 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7227 using MapDataArrayTy = SmallVector<MapData, 4>;
7228
7229 /// This structure contains combined information generated for mappable
7230 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7231 /// mappers, and non-contiguous information.
7232 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7233 MapExprsArrayTy Exprs;
7234 MapValueDeclsArrayTy Mappers;
7235 MapValueDeclsArrayTy DevicePtrDecls;
7236
7237 /// Append arrays in \a CurInfo.
7238 void append(MapCombinedInfoTy &CurInfo) {
7239 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7240 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7241 CurInfo.DevicePtrDecls.end());
7242 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7243 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7244 }
7245 };
7246
7247 /// Map between a struct and the its lowest & highest elements which have been
7248 /// mapped.
7249 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7250 /// HE(FieldIndex, Pointer)}
7251 struct StructRangeInfoTy {
7252 MapCombinedInfoTy PreliminaryMapData;
7253 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7254 0, Address::invalid()};
7255 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7256 0, Address::invalid()};
7257 Address Base = Address::invalid();
7258 Address LB = Address::invalid();
7259 bool IsArraySection = false;
7260 bool HasCompleteRecord = false;
7261 };
7262
7263 /// A struct to store the attach pointer and pointee information, to be used
7264 /// when emitting an attach entry.
7265 struct AttachInfoTy {
7266 Address AttachPtrAddr = Address::invalid();
7267 Address AttachPteeAddr = Address::invalid();
7268 const ValueDecl *AttachPtrDecl = nullptr;
7269 const Expr *AttachMapExpr = nullptr;
7270
7271 bool isValid() const {
7272 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7273 }
7274 };
7275
7276 /// Check if there's any component list where the attach pointer expression
7277 /// matches the given captured variable.
7278 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7279 for (const auto &AttachEntry : AttachPtrExprMap) {
7280 if (AttachEntry.second) {
7281 // Check if the attach pointer expression is a DeclRefExpr that
7282 // references the captured variable
7283 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7284 if (DRE->getDecl() == VD)
7285 return true;
7286 }
7287 }
7288 return false;
7289 }
7290
7291 /// Get the previously-cached attach pointer for a component list, if-any.
7292 const Expr *getAttachPtrExpr(
7294 const {
7295 const auto It = AttachPtrExprMap.find(Components);
7296 if (It != AttachPtrExprMap.end())
7297 return It->second;
7298
7299 return nullptr;
7300 }
7301
7302private:
7303 /// Kind that defines how a device pointer has to be returned.
7304 struct MapInfo {
7307 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7308 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7309 bool ReturnDevicePointer = false;
7310 bool IsImplicit = false;
7311 const ValueDecl *Mapper = nullptr;
7312 const Expr *VarRef = nullptr;
7313 bool ForDeviceAddr = false;
7314 bool HasUdpFbNullify = false;
7315
7316 MapInfo() = default;
7317 MapInfo(
7319 OpenMPMapClauseKind MapType,
7320 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7321 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7322 bool ReturnDevicePointer, bool IsImplicit,
7323 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7324 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7325 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7326 MotionModifiers(MotionModifiers),
7327 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7328 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7329 HasUdpFbNullify(HasUdpFbNullify) {}
7330 };
7331
7332 /// The target directive from where the mappable clauses were extracted. It
7333 /// is either a executable directive or a user-defined mapper directive.
7334 llvm::PointerUnion<const OMPExecutableDirective *,
7335 const OMPDeclareMapperDecl *>
7336 CurDir;
7337
7338 /// Function the directive is being generated for.
7339 CodeGenFunction &CGF;
7340
7341 /// Set of all first private variables in the current directive.
7342 /// bool data is set to true if the variable is implicitly marked as
7343 /// firstprivate, false otherwise.
7344 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7345
7346 /// Set of defaultmap clause kinds that use firstprivate behavior.
7347 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7348
7349 /// Map between device pointer declarations and their expression components.
7350 /// The key value for declarations in 'this' is null.
7351 llvm::DenseMap<
7352 const ValueDecl *,
7353 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7354 DevPointersMap;
7355
7356 /// Map between device addr declarations and their expression components.
7357 /// The key value for declarations in 'this' is null.
7358 llvm::DenseMap<
7359 const ValueDecl *,
7360 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7361 HasDevAddrsMap;
7362
7363 /// Map between lambda declarations and their map type.
7364 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7365
7366 /// Map from component lists to their attach pointer expressions.
7368 const Expr *>
7369 AttachPtrExprMap;
7370
7371 /// Map from attach pointer expressions to their component depth.
7372 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7373 /// expressions with increasing/decreasing depth.
7374 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7375 /// TODO: Not urgent, but we should ideally use the number of pointer
7376 /// dereferences in an expr as an indicator of its complexity, instead of the
7377 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7378 /// `*(p + 5 + 5)` together.
7379 llvm::DenseMap<const Expr *, std::optional<size_t>>
7380 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7381
7382 /// Map from attach pointer expressions to the order they were computed in, in
7383 /// collectAttachPtrExprInfo().
7384 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7385 {nullptr, 0}};
7386
7387 /// An instance of attach-ptr-expr comparator that can be used throughout the
7388 /// lifetime of this handler.
7389 AttachPtrExprComparator AttachPtrComparator;
7390
7391 llvm::Value *getExprTypeSize(const Expr *E) const {
7392 QualType ExprTy = E->getType().getCanonicalType();
7393
7394 // Calculate the size for array shaping expression.
7395 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7396 llvm::Value *Size =
7397 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7398 for (const Expr *SE : OAE->getDimensions()) {
7399 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7400 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7401 CGF.getContext().getSizeType(),
7402 SE->getExprLoc());
7403 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7404 }
7405 return Size;
7406 }
7407
7408 // Reference types are ignored for mapping purposes.
7409 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7410 ExprTy = RefTy->getPointeeType().getCanonicalType();
7411
7412 // Given that an array section is considered a built-in type, we need to
7413 // do the calculation based on the length of the section instead of relying
7414 // on CGF.getTypeSize(E->getType()).
7415 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7416 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7417 OAE->getBase()->IgnoreParenImpCasts())
7419
7420 // If there is no length associated with the expression and lower bound is
7421 // not specified too, that means we are using the whole length of the
7422 // base.
7423 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7424 !OAE->getLowerBound())
7425 return CGF.getTypeSize(BaseTy);
7426
7427 llvm::Value *ElemSize;
7428 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7429 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7430 } else {
7431 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7432 assert(ATy && "Expecting array type if not a pointer type.");
7433 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7434 }
7435
7436 // If we don't have a length at this point, that is because we have an
7437 // array section with a single element.
7438 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7439 return ElemSize;
7440
7441 if (const Expr *LenExpr = OAE->getLength()) {
7442 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7443 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7444 CGF.getContext().getSizeType(),
7445 LenExpr->getExprLoc());
7446 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7447 }
7448 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7449 OAE->getLowerBound() && "expected array_section[lb:].");
7450 // Size = sizetype - lb * elemtype;
7451 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7452 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7453 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7454 CGF.getContext().getSizeType(),
7455 OAE->getLowerBound()->getExprLoc());
7456 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7457 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7458 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7459 LengthVal = CGF.Builder.CreateSelect(
7460 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7461 return LengthVal;
7462 }
7463 return CGF.getTypeSize(ExprTy);
7464 }
7465
7466 /// Return the corresponding bits for a given map clause modifier. Add
7467 /// a flag marking the map as a pointer if requested. Add a flag marking the
7468 /// map as the first one of a series of maps that relate to the same map
7469 /// expression.
7470 OpenMPOffloadMappingFlags getMapTypeBits(
7471 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7472 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7473 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7474 OpenMPOffloadMappingFlags Bits =
7475 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7476 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7477 switch (MapType) {
7478 case OMPC_MAP_alloc:
7479 case OMPC_MAP_release:
7480 // alloc and release is the default behavior in the runtime library, i.e.
7481 // if we don't pass any bits alloc/release that is what the runtime is
7482 // going to do. Therefore, we don't need to signal anything for these two
7483 // type modifiers.
7484 break;
7485 case OMPC_MAP_to:
7486 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7487 break;
7488 case OMPC_MAP_from:
7489 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7490 break;
7491 case OMPC_MAP_tofrom:
7492 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7493 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7494 break;
7495 case OMPC_MAP_delete:
7496 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7497 break;
7498 case OMPC_MAP_unknown:
7499 llvm_unreachable("Unexpected map type!");
7500 }
7501 if (AddPtrFlag)
7502 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7503 if (AddIsTargetParamFlag)
7504 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7505 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7506 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7507 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7508 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7509 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7510 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7512 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7513 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7514 if (IsNonContiguous)
7515 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7516 return Bits;
7517 }
7518
7519 /// Return true if the provided expression is a final array section. A
7520 /// final array section, is one whose length can't be proved to be one.
7521 bool isFinalArraySectionExpression(const Expr *E) const {
7522 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7523
7524 // It is not an array section and therefore not a unity-size one.
7525 if (!OASE)
7526 return false;
7527
7528 // An array section with no colon always refer to a single element.
7529 if (OASE->getColonLocFirst().isInvalid())
7530 return false;
7531
7532 const Expr *Length = OASE->getLength();
7533
7534 // If we don't have a length we have to check if the array has size 1
7535 // for this dimension. Also, we should always expect a length if the
7536 // base type is pointer.
7537 if (!Length) {
7538 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7539 OASE->getBase()->IgnoreParenImpCasts())
7541 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7542 return ATy->getSExtSize() != 1;
7543 // If we don't have a constant dimension length, we have to consider
7544 // the current section as having any size, so it is not necessarily
7545 // unitary. If it happen to be unity size, that's user fault.
7546 return true;
7547 }
7548
7549 // Check if the length evaluates to 1.
7550 Expr::EvalResult Result;
7551 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7552 return true; // Can have more that size 1.
7553
7554 llvm::APSInt ConstLength = Result.Val.getInt();
7555 return ConstLength.getSExtValue() != 1;
7556 }
7557
7558 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7559 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7560 /// an attach entry has the following form:
7561 /// &p, &p[1], sizeof(void*), ATTACH
7562 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7563 const AttachInfoTy &AttachInfo) const {
7564 assert(AttachInfo.isValid() &&
7565 "Expected valid attach pointer/pointee information!");
7566
7567 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7568 // size
7569 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7570 llvm::ConstantInt::get(
7571 CGF.CGM.SizeTy, CGF.getContext()
7573 .getQuantity()),
7574 CGF.Int64Ty, /*isSigned=*/true);
7575
7576 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7577 AttachInfo.AttachMapExpr);
7578 CombinedInfo.BasePointers.push_back(
7579 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7580 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7581 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7582 CombinedInfo.Pointers.push_back(
7583 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7584 CombinedInfo.Sizes.push_back(PointerSize);
7585 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7586 CombinedInfo.Mappers.push_back(nullptr);
7587 CombinedInfo.NonContigInfo.Dims.push_back(1);
7588 }
7589
7590 /// A helper class to copy structures with overlapped elements, i.e. those
7591 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7592 /// are not explicitly copied have mapping nodes synthesized for them,
7593 /// taking care to avoid generating zero-sized copies.
7594 class CopyOverlappedEntryGaps {
7595 CodeGenFunction &CGF;
7596 MapCombinedInfoTy &CombinedInfo;
7597 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7598 const ValueDecl *MapDecl = nullptr;
7599 const Expr *MapExpr = nullptr;
7600 Address BP = Address::invalid();
7601 bool IsNonContiguous = false;
7602 uint64_t DimSize = 0;
7603 // These elements track the position as the struct is iterated over
7604 // (in order of increasing element address).
7605 const RecordDecl *LastParent = nullptr;
7606 uint64_t Cursor = 0;
7607 unsigned LastIndex = -1u;
7608 Address LB = Address::invalid();
7609
7610 public:
7611 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7612 MapCombinedInfoTy &CombinedInfo,
7613 OpenMPOffloadMappingFlags Flags,
7614 const ValueDecl *MapDecl, const Expr *MapExpr,
7615 Address BP, Address LB, bool IsNonContiguous,
7616 uint64_t DimSize)
7617 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7618 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7619 DimSize(DimSize), LB(LB) {}
7620
7621 void processField(
7622 const OMPClauseMappableExprCommon::MappableComponent &MC,
7623 const FieldDecl *FD,
7624 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7625 EmitMemberExprBase) {
7626 const RecordDecl *RD = FD->getParent();
7627 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7628 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7629 uint64_t FieldSize =
7631 Address ComponentLB = Address::invalid();
7632
7633 if (FD->getType()->isLValueReferenceType()) {
7634 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7635 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7636 ComponentLB =
7637 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7638 } else {
7639 ComponentLB =
7641 }
7642
7643 if (!LastParent)
7644 LastParent = RD;
7645 if (FD->getParent() == LastParent) {
7646 if (FD->getFieldIndex() != LastIndex + 1)
7647 copyUntilField(FD, ComponentLB);
7648 } else {
7649 LastParent = FD->getParent();
7650 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7651 copyUntilField(FD, ComponentLB);
7652 }
7653 Cursor = FieldOffset + FieldSize;
7654 LastIndex = FD->getFieldIndex();
7655 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7656 }
7657
7658 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7659 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7660 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7661 llvm::Value *Size = CGF.Builder.CreatePtrDiff(ComponentLBPtr, LBPtr);
7662 copySizedChunk(LBPtr, Size);
7663 }
7664
7665 void copyUntilEnd(Address HB) {
7666 if (LastParent) {
7667 const ASTRecordLayout &RL =
7668 CGF.getContext().getASTRecordLayout(LastParent);
7669 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7670 return;
7671 }
7672 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7673 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7674 CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), LBPtr);
7675 copySizedChunk(LBPtr, Size);
7676 }
7677
7678 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7679 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7680 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7681 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7682 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7683 CombinedInfo.Pointers.push_back(Base);
7684 CombinedInfo.Sizes.push_back(
7685 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/false));
7686 CombinedInfo.Types.push_back(Flags);
7687 CombinedInfo.Mappers.push_back(nullptr);
7688 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7689 }
7690 };
7691
7692 /// Generate the base pointers, section pointers, sizes, map type bits, and
7693 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7694 /// map type, map or motion modifiers, and expression components.
7695 /// \a IsFirstComponent should be set to true if the provided set of
7696 /// components is the first associated with a capture.
7697 void generateInfoForComponentList(
7698 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7699 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7701 MapCombinedInfoTy &CombinedInfo,
7702 MapCombinedInfoTy &StructBaseCombinedInfo,
7703 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7704 bool IsFirstComponentList, bool IsImplicit,
7705 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7706 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7707 const Expr *MapExpr = nullptr,
7708 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7709 OverlappedElements = {}) const {
7710
7711 // The following summarizes what has to be generated for each map and the
7712 // types below. The generated information is expressed in this order:
7713 // base pointer, section pointer, size, flags
7714 // (to add to the ones that come from the map type and modifier).
7715 // Entries annotated with (+) are only generated for "target" constructs,
7716 // and only if the variable at the beginning of the expression is used in
7717 // the region.
7718 //
7719 // double d;
7720 // int i[100];
7721 // float *p;
7722 // int **a = &i;
7723 //
7724 // struct S1 {
7725 // int i;
7726 // float f[50];
7727 // }
7728 // struct S2 {
7729 // int i;
7730 // float f[50];
7731 // S1 s;
7732 // double *p;
7733 // double *&pref;
7734 // struct S2 *ps;
7735 // int &ref;
7736 // }
7737 // S2 s;
7738 // S2 *ps;
7739 //
7740 // map(d)
7741 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7742 //
7743 // map(i)
7744 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7745 //
7746 // map(i[1:23])
7747 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7748 //
7749 // map(p)
7750 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7751 //
7752 // map(p[1:24])
7753 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7754 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7755 // // are present, and either is new
7756 //
7757 // map(([22])p)
7758 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7759 // &p, p, sizeof(void*), ATTACH
7760 //
7761 // map((*a)[0:3])
7762 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7763 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7764 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7765 // (+) Only on target, if a is used in the region
7766 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7767 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7768 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7769 // referenced in the target region, because it is a pointer.
7770 //
7771 // map(**a)
7772 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7773 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7774 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7775 // (+) Only on target, if a is used in the region
7776 //
7777 // map(s)
7778 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7779 // effect is supposed to be same as if the user had a map for every element
7780 // of the struct. We currently do a shallow-map of s.
7781 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7782 //
7783 // map(s.i)
7784 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7785 //
7786 // map(s.s.f)
7787 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7788 //
7789 // map(s.p)
7790 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7791 //
7792 // map(to: s.p[:22])
7793 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7794 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7795 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7796 //
7797 // map(to: s.ref)
7798 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7799 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7800 // (*) alloc space for struct members, only this is a target parameter.
7801 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7802 // optimizes this entry out, same in the examples below)
7803 // (***) map the pointee (map: to)
7804 // Note: ptr(s.ref) represents the referring pointer of s.ref
7805 // ptee(s.ref) represents the referenced pointee of s.ref
7806 //
7807 // map(to: s.pref)
7808 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7809 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7810 //
7811 // map(to: s.pref[:22])
7812 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7813 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7814 // FROM | IMPLICIT // (+)
7815 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7816 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7817 //
7818 // map(s.ps)
7819 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7820 //
7821 // map(from: s.ps->s.i)
7822 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7823 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7824 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7825 //
7826 // map(to: s.ps->ps)
7827 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7828 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7829 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7830 //
7831 // map(s.ps->ps->ps)
7832 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7833 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7834 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7835 //
7836 // map(to: s.ps->ps->s.f[:22])
7837 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7838 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7839 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7840 //
7841 // map(ps)
7842 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7843 //
7844 // map(ps->i)
7845 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7846 // &ps, &(ps->i), sizeof(void*), ATTACH
7847 //
7848 // map(ps->s.f)
7849 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7850 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7851 //
7852 // map(from: ps->p)
7853 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7854 // &ps, &(ps->p), sizeof(ps), ATTACH
7855 //
7856 // map(to: ps->p[:22])
7857 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7858 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7859 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7860 //
7861 // map(ps->ps)
7862 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7863 // &ps, &(ps->ps), sizeof(ps), ATTACH
7864 //
7865 // map(from: ps->ps->s.i)
7866 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7867 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7868 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7869 //
7870 // map(from: ps->ps->ps)
7871 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7872 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7873 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7874 //
7875 // map(ps->ps->ps->ps)
7876 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7877 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7878 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7879 //
7880 // map(to: ps->ps->ps->s.f[:22])
7881 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7882 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7883 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7884 //
7885 // map(to: s.f[:22]) map(from: s.p[:33])
7886 // On target, and if s is used in the region:
7887 //
7888 // &s, &(s.f[0]), 50*sizeof(float) +
7889 // sizeof(struct S1) +
7890 // sizeof(double*) (**), TARGET_PARAM
7891 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7892 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7893 // FROM | IMPLICIT
7894 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7895 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7896 // (**) allocate contiguous space needed to fit all mapped members even if
7897 // we allocate space for members not mapped (in this example,
7898 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7899 // them as well because they fall between &s.f[0] and &s.p)
7900 //
7901 // On other constructs, and, if s is not used in the region, on target:
7902 // &s, &(s.f[0]), 22*sizeof(float), TO
7903 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7904 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7905 //
7906 // map(from: s.f[:22]) map(to: ps->p[:33])
7907 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7908 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7909 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7910 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7911 //
7912 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7913 // &s, &(s.f[0]), 50*sizeof(float) +
7914 // sizeof(struct S1), TARGET_PARAM
7915 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7916 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7917 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7918 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7919 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7920 //
7921 // map(p[:100], p)
7922 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7923 // p, &p[0], 100*sizeof(float), TO | FROM
7924 // &p, &p[0], sizeof(float*), ATTACH
7925
7926 // Track if the map information being generated is the first for a capture.
7927 bool IsCaptureFirstInfo = IsFirstComponentList;
7928 // When the variable is on a declare target link or in a to clause with
7929 // unified memory, a reference is needed to hold the host/device address
7930 // of the variable.
7931 bool RequiresReference = false;
7932
7933 // Scan the components from the base to the complete expression.
7934 auto CI = Components.rbegin();
7935 auto CE = Components.rend();
7936 auto I = CI;
7937
7938 // Track if the map information being generated is the first for a list of
7939 // components.
7940 bool IsExpressionFirstInfo = true;
7941 bool FirstPointerInComplexData = false;
7942 Address BP = Address::invalid();
7943 Address FinalLowestElem = Address::invalid();
7944 const Expr *AssocExpr = I->getAssociatedExpression();
7945 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7946 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7947 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7948
7949 // Get the pointer-attachment base-pointer for the given list, if any.
7950 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7951 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7952 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7953
7954 bool HasAttachPtr = AttachPtrExpr != nullptr;
7955 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7956 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7957
7958 if (FirstComponentIsForAttachPtr) {
7959 // No need to process AttachPtr here. It will be processed at the end
7960 // after we have computed the pointee's address.
7961 ++I;
7962 } else if (isa<MemberExpr>(AssocExpr)) {
7963 // The base is the 'this' pointer. The content of the pointer is going
7964 // to be the base of the field being mapped.
7965 BP = CGF.LoadCXXThisAddress();
7966 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7967 (OASE &&
7968 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7969 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7970 } else if (OAShE &&
7971 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7972 BP = Address(
7973 CGF.EmitScalarExpr(OAShE->getBase()),
7974 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7975 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7976 } else {
7977 // The base is the reference to the variable.
7978 // BP = &Var.
7979 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7980 if (const auto *VD =
7981 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7982 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7983 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7984 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7985 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7986 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
7987 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
7989 RequiresReference = true;
7991 }
7992 }
7993 }
7994
7995 // If the variable is a pointer and is being dereferenced (i.e. is not
7996 // the last component), the base has to be the pointer itself, not its
7997 // reference. References are ignored for mapping purposes.
7998 QualType Ty =
7999 I->getAssociatedDeclaration()->getType().getNonReferenceType();
8000 if (Ty->isAnyPointerType() && std::next(I) != CE) {
8001 // No need to generate individual map information for the pointer, it
8002 // can be associated with the combined storage if shared memory mode is
8003 // active or the base declaration is not global variable.
8004 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
8006 !VD || VD->hasLocalStorage() || HasAttachPtr)
8007 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8008 else
8009 FirstPointerInComplexData = true;
8010 ++I;
8011 }
8012 }
8013
8014 // Track whether a component of the list should be marked as MEMBER_OF some
8015 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8016 // in a component list should be marked as MEMBER_OF, all subsequent entries
8017 // do not belong to the base struct. E.g.
8018 // struct S2 s;
8019 // s.ps->ps->ps->f[:]
8020 // (1) (2) (3) (4)
8021 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8022 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8023 // is the pointee of ps(2) which is not member of struct s, so it should not
8024 // be marked as such (it is still PTR_AND_OBJ).
8025 // The variable is initialized to false so that PTR_AND_OBJ entries which
8026 // are not struct members are not considered (e.g. array of pointers to
8027 // data).
8028 bool ShouldBeMemberOf = false;
8029
8030 // Variable keeping track of whether or not we have encountered a component
8031 // in the component list which is a member expression. Useful when we have a
8032 // pointer or a final array section, in which case it is the previous
8033 // component in the list which tells us whether we have a member expression.
8034 // E.g. X.f[:]
8035 // While processing the final array section "[:]" it is "f" which tells us
8036 // whether we are dealing with a member of a declared struct.
8037 const MemberExpr *EncounteredME = nullptr;
8038
8039 // Track for the total number of dimension. Start from one for the dummy
8040 // dimension.
8041 uint64_t DimSize = 1;
8042
8043 // Detects non-contiguous updates due to strided accesses.
8044 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8045 // correctly when generating information to be passed to the runtime. The
8046 // flag is set to true if any array section has a stride not equal to 1, or
8047 // if the stride is not a constant expression (conservatively assumed
8048 // non-contiguous).
8049 bool IsNonContiguous =
8050 CombinedInfo.NonContigInfo.IsNonContiguous ||
8051 any_of(Components, [&](const auto &Component) {
8052 const auto *OASE =
8053 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8054 if (!OASE)
8055 return false;
8056
8057 const Expr *StrideExpr = OASE->getStride();
8058 if (!StrideExpr)
8059 return false;
8060
8061 assert(StrideExpr->getType()->isIntegerType() &&
8062 "Stride expression must be of integer type");
8063
8064 // If stride is not evaluatable as a constant, treat as
8065 // non-contiguous.
8066 const auto Constant =
8067 StrideExpr->getIntegerConstantExpr(CGF.getContext());
8068 if (!Constant)
8069 return true;
8070
8071 // Treat non-unitary strides as non-contiguous.
8072 return !Constant->isOne();
8073 });
8074
8075 bool IsPrevMemberReference = false;
8076
8077 bool IsPartialMapped =
8078 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8079
8080 // We need to check if we will be encountering any MEs. If we do not
8081 // encounter any ME expression it means we will be mapping the whole struct.
8082 // In that case we need to skip adding an entry for the struct to the
8083 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8084 // list only when generating all info for clauses.
8085 bool IsMappingWholeStruct = true;
8086 if (!GenerateAllInfoForClauses) {
8087 IsMappingWholeStruct = false;
8088 } else {
8089 for (auto TempI = I; TempI != CE; ++TempI) {
8090 const MemberExpr *PossibleME =
8091 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
8092 if (PossibleME) {
8093 IsMappingWholeStruct = false;
8094 break;
8095 }
8096 }
8097 }
8098
8099 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8100 for (; I != CE; ++I) {
8101 // If we have a valid attach-ptr, we skip processing all components until
8102 // after the attach-ptr.
8103 if (HasAttachPtr && !SeenAttachPtr) {
8104 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8105 continue;
8106 }
8107
8108 // After finding the attach pointer, skip binary-ops, to skip past
8109 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8110 // the attach-ptr.
8111 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8112 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8113 if (BO)
8114 continue;
8115
8116 // Found the first non-binary-operator component after attach
8117 SeenFirstNonBinOpExprAfterAttachPtr = true;
8118 BP = AttachPteeBaseAddr;
8119 }
8120
8121 // If the current component is member of a struct (parent struct) mark it.
8122 if (!EncounteredME) {
8123 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8124 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8125 // as MEMBER_OF the parent struct.
8126 if (EncounteredME) {
8127 ShouldBeMemberOf = true;
8128 // Do not emit as complex pointer if this is actually not array-like
8129 // expression.
8130 if (FirstPointerInComplexData) {
8131 QualType Ty = std::prev(I)
8132 ->getAssociatedDeclaration()
8133 ->getType()
8134 .getNonReferenceType();
8135 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8136 FirstPointerInComplexData = false;
8137 }
8138 }
8139 }
8140
8141 auto Next = std::next(I);
8142
8143 // We need to generate the addresses and sizes if this is the last
8144 // component, if the component is a pointer or if it is an array section
8145 // whose length can't be proved to be one. If this is a pointer, it
8146 // becomes the base address for the following components.
8147
8148 // A final array section, is one whose length can't be proved to be one.
8149 // If the map item is non-contiguous then we don't treat any array section
8150 // as final array section.
8151 bool IsFinalArraySection =
8152 !IsNonContiguous &&
8153 isFinalArraySectionExpression(I->getAssociatedExpression());
8154
8155 // If we have a declaration for the mapping use that, otherwise use
8156 // the base declaration of the map clause.
8157 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8158 ? I->getAssociatedDeclaration()
8159 : BaseDecl;
8160 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8161 : MapExpr;
8162
8163 // Get information on whether the element is a pointer. Have to do a
8164 // special treatment for array sections given that they are built-in
8165 // types.
8166 const auto *OASE =
8167 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8168 const auto *OAShE =
8169 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8170 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8171 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8172 bool IsPointer =
8173 OAShE ||
8176 ->isAnyPointerType()) ||
8177 I->getAssociatedExpression()->getType()->isAnyPointerType();
8178 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8179 MapDecl &&
8180 MapDecl->getType()->isLValueReferenceType();
8181 bool IsNonDerefPointer = IsPointer &&
8182 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8183 !IsNonContiguous;
8184
8185 if (OASE)
8186 ++DimSize;
8187
8188 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8189 IsFinalArraySection) {
8190 // If this is not the last component, we expect the pointer to be
8191 // associated with an array expression or member expression.
8192 assert((Next == CE ||
8193 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8194 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8195 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8196 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8197 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8198 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8199 "Unexpected expression");
8200
8201 Address LB = Address::invalid();
8202 Address LowestElem = Address::invalid();
8203 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8204 const MemberExpr *E) {
8205 const Expr *BaseExpr = E->getBase();
8206 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8207 // scalar.
8208 LValue BaseLV;
8209 if (E->isArrow()) {
8210 LValueBaseInfo BaseInfo;
8211 TBAAAccessInfo TBAAInfo;
8212 Address Addr =
8213 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8214 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8215 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8216 } else {
8217 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8218 }
8219 return BaseLV;
8220 };
8221 if (OAShE) {
8222 LowestElem = LB =
8223 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8225 OAShE->getBase()->getType()->getPointeeType()),
8227 OAShE->getBase()->getType()));
8228 } else if (IsMemberReference) {
8229 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8230 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8231 LowestElem = CGF.EmitLValueForFieldInitialization(
8232 BaseLVal, cast<FieldDecl>(MapDecl))
8233 .getAddress();
8234 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8235 .getAddress();
8236 } else {
8237 LowestElem = LB =
8238 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8239 .getAddress();
8240 }
8241
8242 // Save the final LowestElem, to use it as the pointee in attach maps,
8243 // if emitted.
8244 if (Next == CE)
8245 FinalLowestElem = LowestElem;
8246
8247 // If this component is a pointer inside the base struct then we don't
8248 // need to create any entry for it - it will be combined with the object
8249 // it is pointing to into a single PTR_AND_OBJ entry.
8250 bool IsMemberPointerOrAddr =
8251 EncounteredME &&
8252 (((IsPointer || ForDeviceAddr) &&
8253 I->getAssociatedExpression() == EncounteredME) ||
8254 (IsPrevMemberReference && !IsPointer) ||
8255 (IsMemberReference && Next != CE &&
8256 !Next->getAssociatedExpression()->getType()->isPointerType()));
8257 if (!OverlappedElements.empty() && Next == CE) {
8258 // Handle base element with the info for overlapped elements.
8259 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8260 assert(!IsPointer &&
8261 "Unexpected base element with the pointer type.");
8262 // Mark the whole struct as the struct that requires allocation on the
8263 // device.
8264 PartialStruct.LowestElem = {0, LowestElem};
8265 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8266 I->getAssociatedExpression()->getType());
8267 Address HB = CGF.Builder.CreateConstGEP(
8269 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8270 TypeSize.getQuantity() - 1);
8271 PartialStruct.HighestElem = {
8272 std::numeric_limits<decltype(
8273 PartialStruct.HighestElem.first)>::max(),
8274 HB};
8275 PartialStruct.Base = BP;
8276 PartialStruct.LB = LB;
8277 assert(
8278 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8279 "Overlapped elements must be used only once for the variable.");
8280 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8281 // Emit data for non-overlapped data.
8282 OpenMPOffloadMappingFlags Flags =
8283 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8284 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8285 /*AddPtrFlag=*/false,
8286 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8287 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8288 MapExpr, BP, LB, IsNonContiguous,
8289 DimSize);
8290 // Do bitcopy of all non-overlapped structure elements.
8292 Component : OverlappedElements) {
8293 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8294 Component) {
8295 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8296 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8297 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8298 }
8299 }
8300 }
8301 }
8302 CopyGaps.copyUntilEnd(HB);
8303 break;
8304 }
8305 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8306 // Skip adding an entry in the CurInfo of this combined entry if the
8307 // whole struct is currently being mapped. The struct needs to be added
8308 // in the first position before any data internal to the struct is being
8309 // mapped.
8310 // Skip adding an entry in the CurInfo of this combined entry if the
8311 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8312 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8313 (Next == CE && MapType != OMPC_MAP_unknown)) {
8314 if (!IsMappingWholeStruct) {
8315 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8316 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8317 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8318 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8319 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8320 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8321 Size, CGF.Int64Ty, /*isSigned=*/true));
8322 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8323 : 1);
8324 } else {
8325 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8326 StructBaseCombinedInfo.BasePointers.push_back(
8327 BP.emitRawPointer(CGF));
8328 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8329 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8330 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8331 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8332 Size, CGF.Int64Ty, /*isSigned=*/true));
8333 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8334 IsNonContiguous ? DimSize : 1);
8335 }
8336
8337 // If Mapper is valid, the last component inherits the mapper.
8338 bool HasMapper = Mapper && Next == CE;
8339 if (!IsMappingWholeStruct)
8340 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8341 else
8342 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8343 : nullptr);
8344
8345 // We need to add a pointer flag for each map that comes from the
8346 // same expression except for the first one. We also need to signal
8347 // this map is the first one that relates with the current capture
8348 // (there is a set of entries for each capture).
8349 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8350 MapType, MapModifiers, MotionModifiers, IsImplicit,
8351 !IsExpressionFirstInfo || RequiresReference ||
8352 FirstPointerInComplexData || IsMemberReference,
8353 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8354
8355 if (!IsExpressionFirstInfo || IsMemberReference) {
8356 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8357 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8358 if (IsPointer || (IsMemberReference && Next != CE))
8359 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8360 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8361 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8362 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8363 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8364
8365 if (ShouldBeMemberOf) {
8366 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8367 // should be later updated with the correct value of MEMBER_OF.
8368 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8369 // From now on, all subsequent PTR_AND_OBJ entries should not be
8370 // marked as MEMBER_OF.
8371 ShouldBeMemberOf = false;
8372 }
8373 }
8374
8375 if (!IsMappingWholeStruct)
8376 CombinedInfo.Types.push_back(Flags);
8377 else
8378 StructBaseCombinedInfo.Types.push_back(Flags);
8379 }
8380
8381 // If we have encountered a member expression so far, keep track of the
8382 // mapped member. If the parent is "*this", then the value declaration
8383 // is nullptr.
8384 if (EncounteredME) {
8385 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8386 unsigned FieldIndex = FD->getFieldIndex();
8387
8388 // Update info about the lowest and highest elements for this struct
8389 if (!PartialStruct.Base.isValid()) {
8390 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8391 if (IsFinalArraySection && OASE) {
8392 Address HB =
8393 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8394 .getAddress();
8395 PartialStruct.HighestElem = {FieldIndex, HB};
8396 } else {
8397 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8398 }
8399 PartialStruct.Base = BP;
8400 PartialStruct.LB = BP;
8401 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8402 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8403 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8404 if (IsFinalArraySection && OASE) {
8405 Address HB =
8406 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8407 .getAddress();
8408 PartialStruct.HighestElem = {FieldIndex, HB};
8409 } else {
8410 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8411 }
8412 }
8413 }
8414
8415 // Need to emit combined struct for array sections.
8416 if (IsFinalArraySection || IsNonContiguous)
8417 PartialStruct.IsArraySection = true;
8418
8419 // If we have a final array section, we are done with this expression.
8420 if (IsFinalArraySection)
8421 break;
8422
8423 // The pointer becomes the base for the next element.
8424 if (Next != CE)
8425 BP = IsMemberReference ? LowestElem : LB;
8426 if (!IsPartialMapped)
8427 IsExpressionFirstInfo = false;
8428 IsCaptureFirstInfo = false;
8429 FirstPointerInComplexData = false;
8430 IsPrevMemberReference = IsMemberReference;
8431 } else if (FirstPointerInComplexData) {
8432 QualType Ty = Components.rbegin()
8433 ->getAssociatedDeclaration()
8434 ->getType()
8435 .getNonReferenceType();
8436 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8437 FirstPointerInComplexData = false;
8438 }
8439 }
8440 // If ran into the whole component - allocate the space for the whole
8441 // record.
8442 if (!EncounteredME)
8443 PartialStruct.HasCompleteRecord = true;
8444
8445 // Populate ATTACH information for later processing by emitAttachEntry.
8446 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8447 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8448 AttachInfo.AttachPteeAddr = FinalLowestElem;
8449 AttachInfo.AttachPtrDecl = BaseDecl;
8450 AttachInfo.AttachMapExpr = MapExpr;
8451 }
8452
8453 if (!IsNonContiguous)
8454 return;
8455
8456 const ASTContext &Context = CGF.getContext();
8457
8458 // For supporting stride in array section, we need to initialize the first
8459 // dimension size as 1, first offset as 0, and first count as 1
8460 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8461 MapValuesArrayTy CurCounts;
8462 MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8463 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8464 uint64_t ElementTypeSize;
8465
8466 // Collect Size information for each dimension and get the element size as
8467 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8468 // should be [10, 10] and the first stride is 4 btyes.
8469 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8470 Components) {
8471 const Expr *AssocExpr = Component.getAssociatedExpression();
8472 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8473
8474 if (!OASE)
8475 continue;
8476
8477 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8478 auto *CAT = Context.getAsConstantArrayType(Ty);
8479 auto *VAT = Context.getAsVariableArrayType(Ty);
8480
8481 // We need all the dimension size except for the last dimension.
8482 assert((VAT || CAT || &Component == &*Components.begin()) &&
8483 "Should be either ConstantArray or VariableArray if not the "
8484 "first Component");
8485
8486 // Get element size if CurCounts is empty.
8487 if (CurCounts.empty()) {
8488 const Type *ElementType = nullptr;
8489 if (CAT)
8490 ElementType = CAT->getElementType().getTypePtr();
8491 else if (VAT)
8492 ElementType = VAT->getElementType().getTypePtr();
8493 else if (&Component == &*Components.begin()) {
8494 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8495 // there was no earlier CAT/VAT/array handling to establish
8496 // ElementType. Capture the pointee type now so that subsequent
8497 // components (offset/length/stride) have a concrete element type to
8498 // work with. This makes pointer-backed sections behave consistently
8499 // with CAT/VAT/array bases.
8500 if (const auto *PtrType = Ty->getAs<PointerType>())
8501 ElementType = PtrType->getPointeeType().getTypePtr();
8502 } else {
8503 // Any component after the first should never have a raw pointer type;
8504 // by this point. ElementType must already be known (set above or in
8505 // prior array / CAT / VAT handling).
8506 assert(!Ty->isPointerType() &&
8507 "Non-first components should not be raw pointers");
8508 }
8509
8510 // At this stage, if ElementType was a base pointer and we are in the
8511 // first iteration, it has been computed.
8512 if (ElementType) {
8513 // For the case that having pointer as base, we need to remove one
8514 // level of indirection.
8515 if (&Component != &*Components.begin())
8516 ElementType = ElementType->getPointeeOrArrayElementType();
8517 ElementTypeSize =
8518 Context.getTypeSizeInChars(ElementType).getQuantity();
8519 CurCounts.push_back(
8520 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8521 }
8522 }
8523 // Get dimension value except for the last dimension since we don't need
8524 // it.
8525 if (DimSizes.size() < Components.size() - 1) {
8526 if (CAT)
8527 DimSizes.push_back(
8528 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8529 else if (VAT)
8530 DimSizes.push_back(CGF.Builder.CreateIntCast(
8531 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8532 /*IsSigned=*/false));
8533 }
8534 }
8535
8536 // Skip the dummy dimension since we have already have its information.
8537 auto *DI = DimSizes.begin() + 1;
8538 // Product of dimension.
8539 llvm::Value *DimProd =
8540 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8541
8542 // Collect info for non-contiguous. Notice that offset, count, and stride
8543 // are only meaningful for array-section, so we insert a null for anything
8544 // other than array-section.
8545 // Also, the size of offset, count, and stride are not the same as
8546 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8547 // count, and stride are the same as the number of non-contiguous
8548 // declaration in target update to/from clause.
8549 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8550 Components) {
8551 const Expr *AssocExpr = Component.getAssociatedExpression();
8552
8553 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8554 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8555 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8556 /*isSigned=*/false);
8557 CurOffsets.push_back(Offset);
8558 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8559 CurStrides.push_back(CurStrides.back());
8560 continue;
8561 }
8562
8563 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8564
8565 if (!OASE)
8566 continue;
8567
8568 // Offset
8569 const Expr *OffsetExpr = OASE->getLowerBound();
8570 llvm::Value *Offset = nullptr;
8571 if (!OffsetExpr) {
8572 // If offset is absent, then we just set it to zero.
8573 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8574 } else {
8575 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8576 CGF.Int64Ty,
8577 /*isSigned=*/false);
8578 }
8579
8580 // Count
8581 const Expr *CountExpr = OASE->getLength();
8582 llvm::Value *Count = nullptr;
8583 if (!CountExpr) {
8584 // In Clang, once a high dimension is an array section, we construct all
8585 // the lower dimension as array section, however, for case like
8586 // arr[0:2][2], Clang construct the inner dimension as an array section
8587 // but it actually is not in an array section form according to spec.
8588 if (!OASE->getColonLocFirst().isValid() &&
8589 !OASE->getColonLocSecond().isValid()) {
8590 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8591 } else {
8592 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8593 // When the length is absent it defaults to ⌈(size −
8594 // lower-bound)/stride⌉, where size is the size of the array
8595 // dimension.
8596 const Expr *StrideExpr = OASE->getStride();
8597 llvm::Value *Stride =
8598 StrideExpr
8599 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8600 CGF.Int64Ty, /*isSigned=*/false)
8601 : nullptr;
8602 if (Stride)
8603 Count = CGF.Builder.CreateUDiv(
8604 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8605 else
8606 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8607 }
8608 } else {
8609 Count = CGF.EmitScalarExpr(CountExpr);
8610 }
8611 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8612 CurCounts.push_back(Count);
8613
8614 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8615 // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
8616 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8617 // Offset Count Stride
8618 // D0 0 4 1 (int) <- dummy dimension
8619 // D1 0 2 8 (2 * (1) * 4)
8620 // D2 100 2 20 (1 * (1 * 5) * 4)
8621 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8622 const Expr *StrideExpr = OASE->getStride();
8623 llvm::Value *Stride =
8624 StrideExpr
8625 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8626 CGF.Int64Ty, /*isSigned=*/false)
8627 : nullptr;
8628 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8629 if (Stride)
8630 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8631 else
8632 CurStrides.push_back(DimProd);
8633
8634 Offset = CGF.Builder.CreateNUWMul(DimProd, Offset);
8635 CurOffsets.push_back(Offset);
8636
8637 if (DI != DimSizes.end())
8638 ++DI;
8639 }
8640
8641 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8642 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8643 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8644 }
8645
8646 /// Return the adjusted map modifiers if the declaration a capture refers to
8647 /// appears in a first-private clause. This is expected to be used only with
8648 /// directives that start with 'target'.
8649 OpenMPOffloadMappingFlags
8650 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8651 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8652
8653 // A first private variable captured by reference will use only the
8654 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8655 // declaration is known as first-private in this handler.
8656 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8657 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8658 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8659 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8660 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8661 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8662 }
8663 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8664 if (I != LambdasMap.end())
8665 // for map(to: lambda): using user specified map type.
8666 return getMapTypeBits(
8667 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8668 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8669 /*AddPtrFlag=*/false,
8670 /*AddIsTargetParamFlag=*/false,
8671 /*isNonContiguous=*/false);
8672 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8673 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8674 }
8675
8676 void getPlainLayout(const CXXRecordDecl *RD,
8677 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8678 bool AsBase) const {
8679 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8680
8681 llvm::StructType *St =
8682 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8683
8684 unsigned NumElements = St->getNumElements();
8685 llvm::SmallVector<
8686 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8687 RecordLayout(NumElements);
8688
8689 // Fill bases.
8690 for (const auto &I : RD->bases()) {
8691 if (I.isVirtual())
8692 continue;
8693
8694 QualType BaseTy = I.getType();
8695 const auto *Base = BaseTy->getAsCXXRecordDecl();
8696 // Ignore empty bases.
8697 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8698 CGF.getContext()
8699 .getASTRecordLayout(Base)
8701 .isZero())
8702 continue;
8703
8704 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8705 RecordLayout[FieldIndex] = Base;
8706 }
8707 // Fill in virtual bases.
8708 for (const auto &I : RD->vbases()) {
8709 QualType BaseTy = I.getType();
8710 // Ignore empty bases.
8711 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8712 continue;
8713
8714 const auto *Base = BaseTy->getAsCXXRecordDecl();
8715 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8716 if (RecordLayout[FieldIndex])
8717 continue;
8718 RecordLayout[FieldIndex] = Base;
8719 }
8720 // Fill in all the fields.
8721 assert(!RD->isUnion() && "Unexpected union.");
8722 for (const auto *Field : RD->fields()) {
8723 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8724 // will fill in later.)
8725 if (!Field->isBitField() &&
8726 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8727 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8728 RecordLayout[FieldIndex] = Field;
8729 }
8730 }
8731 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8732 &Data : RecordLayout) {
8733 if (Data.isNull())
8734 continue;
8735 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8736 getPlainLayout(Base, Layout, /*AsBase=*/true);
8737 else
8738 Layout.push_back(cast<const FieldDecl *>(Data));
8739 }
8740 }
8741
8742 /// Returns the address corresponding to \p PointerExpr.
8743 static Address getAttachPtrAddr(const Expr *PointerExpr,
8744 CodeGenFunction &CGF) {
8745 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8746 Address AttachPtrAddr = Address::invalid();
8747
8748 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8749 // If the pointer is a variable, we can use its address directly.
8750 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8751 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8752 AttachPtrAddr =
8753 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8754 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8755 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8756 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8757 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8758 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8759 assert(UO->getOpcode() == UO_Deref &&
8760 "Unexpected unary-operator on attach-ptr-expr");
8761 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8762 }
8763 assert(AttachPtrAddr.isValid() &&
8764 "Failed to get address for attach pointer expression");
8765 return AttachPtrAddr;
8766 }
8767
8768 /// Get the address of the attach pointer, and a load from it, to get the
8769 /// pointee base address.
8770 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8771 /// contains invalid addresses if \p AttachPtrExpr is null.
8772 static std::pair<Address, Address>
8773 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8774 CodeGenFunction &CGF) {
8775
8776 if (!AttachPtrExpr)
8777 return {Address::invalid(), Address::invalid()};
8778
8779 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8780 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8781
8782 QualType AttachPtrType =
8785
8786 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8787 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8788 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8789
8790 return {AttachPtrAddr, AttachPteeBaseAddr};
8791 }
8792
8793 /// Returns whether an attach entry should be emitted for a map on
8794 /// \p MapBaseDecl on the directive \p CurDir.
8795 static bool
8796 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8797 CodeGenFunction &CGF,
8798 llvm::PointerUnion<const OMPExecutableDirective *,
8799 const OMPDeclareMapperDecl *>
8800 CurDir) {
8801 if (!PointerExpr)
8802 return false;
8803
8804 // Pointer attachment is needed at map-entering time or for declare
8805 // mappers.
8806 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8809 ->getDirectiveKind());
8810 }
8811
8812 /// Computes the attach-ptr expr for \p Components, and updates various maps
8813 /// with the information.
8814 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8815 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8816 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8817 /// AttachPtrExprMap.
8818 void collectAttachPtrExprInfo(
8820 llvm::PointerUnion<const OMPExecutableDirective *,
8821 const OMPDeclareMapperDecl *>
8822 CurDir) {
8823
8824 OpenMPDirectiveKind CurDirectiveID =
8826 ? OMPD_declare_mapper
8827 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8828
8829 const auto &[AttachPtrExpr, Depth] =
8831 CurDirectiveID);
8832
8833 AttachPtrComputationOrderMap.try_emplace(
8834 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8835 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8836 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8837 }
8838
8839 /// Generate all the base pointers, section pointers, sizes, map types, and
8840 /// mappers for the extracted mappable expressions (all included in \a
8841 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8842 /// pair of the relevant declaration and index where it occurs is appended to
8843 /// the device pointers info array.
8844 void generateAllInfoForClauses(
8845 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8846 llvm::OpenMPIRBuilder &OMPBuilder,
8847 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8848 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8849 // We have to process the component lists that relate with the same
8850 // declaration in a single chunk so that we can generate the map flags
8851 // correctly. Therefore, we organize all lists in a map.
8852 enum MapKind { Present, Allocs, Other, Total };
8853 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8854 SmallVector<SmallVector<MapInfo, 8>, 4>>
8855 Info;
8856
8857 // Helper function to fill the information map for the different supported
8858 // clauses.
8859 auto &&InfoGen =
8860 [&Info, &SkipVarSet](
8861 const ValueDecl *D, MapKind Kind,
8863 OpenMPMapClauseKind MapType,
8864 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8865 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8866 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8867 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8868 if (SkipVarSet.contains(D))
8869 return;
8870 auto It = Info.try_emplace(D, Total).first;
8871 It->second[Kind].emplace_back(
8872 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8873 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8874 };
8875
8876 for (const auto *Cl : Clauses) {
8877 const auto *C = dyn_cast<OMPMapClause>(Cl);
8878 if (!C)
8879 continue;
8880 MapKind Kind = Other;
8881 if (llvm::is_contained(C->getMapTypeModifiers(),
8882 OMPC_MAP_MODIFIER_present))
8883 Kind = Present;
8884 else if (C->getMapType() == OMPC_MAP_alloc)
8885 Kind = Allocs;
8886 const auto *EI = C->getVarRefs().begin();
8887 for (const auto L : C->component_lists()) {
8888 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8889 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8890 C->getMapTypeModifiers(), {},
8891 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8892 E);
8893 ++EI;
8894 }
8895 }
8896 for (const auto *Cl : Clauses) {
8897 const auto *C = dyn_cast<OMPToClause>(Cl);
8898 if (!C)
8899 continue;
8900 MapKind Kind = Other;
8901 if (llvm::is_contained(C->getMotionModifiers(),
8902 OMPC_MOTION_MODIFIER_present))
8903 Kind = Present;
8904 if (llvm::is_contained(C->getMotionModifiers(),
8905 OMPC_MOTION_MODIFIER_iterator)) {
8906 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8907 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8908 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8909 CGF.EmitVarDecl(*VD);
8910 }
8911 }
8912
8913 const auto *EI = C->getVarRefs().begin();
8914 for (const auto L : C->component_lists()) {
8915 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8916 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8917 C->isImplicit(), std::get<2>(L), *EI);
8918 ++EI;
8919 }
8920 }
8921 for (const auto *Cl : Clauses) {
8922 const auto *C = dyn_cast<OMPFromClause>(Cl);
8923 if (!C)
8924 continue;
8925 MapKind Kind = Other;
8926 if (llvm::is_contained(C->getMotionModifiers(),
8927 OMPC_MOTION_MODIFIER_present))
8928 Kind = Present;
8929 if (llvm::is_contained(C->getMotionModifiers(),
8930 OMPC_MOTION_MODIFIER_iterator)) {
8931 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8932 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8933 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8934 CGF.EmitVarDecl(*VD);
8935 }
8936 }
8937
8938 const auto *EI = C->getVarRefs().begin();
8939 for (const auto L : C->component_lists()) {
8940 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8941 C->getMotionModifiers(),
8942 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8943 *EI);
8944 ++EI;
8945 }
8946 }
8947
8948 // Look at the use_device_ptr and use_device_addr clauses information and
8949 // mark the existing map entries as such. If there is no map information for
8950 // an entry in the use_device_ptr and use_device_addr list, we create one
8951 // with map type 'return_param' and zero size section. It is the user's
8952 // fault if that was not mapped before. If there is no map information, then
8953 // we defer the emission of that entry until all the maps for the same VD
8954 // have been handled.
8955 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8956
8957 auto &&UseDeviceDataCombinedInfoGen =
8958 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8959 CodeGenFunction &CGF, bool IsDevAddr,
8960 bool HasUdpFbNullify = false) {
8961 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8962 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8963 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8964 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8965 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8966 // FIXME: For use_device_addr on array-sections, this should
8967 // be the starting address of the section.
8968 // e.g. int *p;
8969 // ... use_device_addr(p[3])
8970 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8971 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8972 UseDeviceDataCombinedInfo.Sizes.push_back(
8973 llvm::Constant::getNullValue(CGF.Int64Ty));
8974 OpenMPOffloadMappingFlags Flags =
8975 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8976 if (HasUdpFbNullify)
8977 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8978 UseDeviceDataCombinedInfo.Types.push_back(Flags);
8979 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8980 };
8981
8982 auto &&MapInfoGen =
8983 [&UseDeviceDataCombinedInfoGen](
8984 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8986 Components,
8987 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8988 bool HasUdpFbNullify = false) {
8989 // We didn't find any match in our map information - generate a zero
8990 // size array section.
8991 llvm::Value *Ptr;
8992 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8993 if (IE->isGLValue())
8994 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8995 else
8996 Ptr = CGF.EmitScalarExpr(IE);
8997 } else {
8998 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8999 }
9000 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
9001 // For the purpose of address-translation, treat something like the
9002 // following:
9003 // int *p;
9004 // ... use_device_addr(p[1])
9005 // equivalent to
9006 // ... use_device_ptr(p)
9007 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9008 !TreatDevAddrAsDevPtr,
9009 HasUdpFbNullify);
9010 };
9011
9012 auto &&IsMapInfoExist =
9013 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9014 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9015 bool HasUdpFbNullify = false) -> bool {
9016 // We potentially have map information for this declaration already.
9017 // Look for the first set of components that refer to it. If found,
9018 // return true.
9019 // If the first component is a member expression, we have to look into
9020 // 'this', which maps to null in the map of map information. Otherwise
9021 // look directly for the information.
9022 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
9023 if (It != Info.end()) {
9024 bool Found = false;
9025 for (auto &Data : It->second) {
9026 MapInfo *CI = nullptr;
9027 // We potentially have multiple maps for the same decl. We need to
9028 // only consider those for which the attach-ptr matches the desired
9029 // attach-ptr.
9030 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
9031 if (MI.Components.back().getAssociatedDeclaration() != VD)
9032 return false;
9033
9034 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
9035 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
9036 DesiredAttachPtrExpr);
9037 return Match;
9038 });
9039
9040 if (It != Data.end())
9041 CI = &*It;
9042
9043 if (CI) {
9044 if (IsDevAddr) {
9045 CI->ForDeviceAddr = true;
9046 CI->ReturnDevicePointer = true;
9047 CI->HasUdpFbNullify = HasUdpFbNullify;
9048 Found = true;
9049 break;
9050 } else {
9051 auto PrevCI = std::next(CI->Components.rbegin());
9052 const auto *VarD = dyn_cast<VarDecl>(VD);
9053 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
9054 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9055 isa<MemberExpr>(IE) ||
9056 !VD->getType().getNonReferenceType()->isPointerType() ||
9057 PrevCI == CI->Components.rend() ||
9058 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
9059 VarD->hasLocalStorage() ||
9060 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
9061 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
9062 CI->ForDeviceAddr = IsDevAddr;
9063 CI->ReturnDevicePointer = true;
9064 CI->HasUdpFbNullify = HasUdpFbNullify;
9065 Found = true;
9066 break;
9067 }
9068 }
9069 }
9070 }
9071 return Found;
9072 }
9073 return false;
9074 };
9075
9076 // Look at the use_device_ptr clause information and mark the existing map
9077 // entries as such. If there is no map information for an entry in the
9078 // use_device_ptr list, we create one with map type 'alloc' and zero size
9079 // section. It is the user fault if that was not mapped before. If there is
9080 // no map information and the pointer is a struct member, then we defer the
9081 // emission of that entry until the whole struct has been processed.
9082 for (const auto *Cl : Clauses) {
9083 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
9084 if (!C)
9085 continue;
9086 bool HasUdpFbNullify =
9087 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9088 for (const auto L : C->component_lists()) {
9090 std::get<1>(L);
9091 assert(!Components.empty() &&
9092 "Not expecting empty list of components!");
9093 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9095 const Expr *IE = Components.back().getAssociatedExpression();
9096 // For use_device_ptr, we match an existing map clause if its attach-ptr
9097 // is same as the use_device_ptr operand. e.g.
9098 // map expr | use_device_ptr expr | current behavior
9099 // ---------|---------------------|-----------------
9100 // p[1] | p | match
9101 // ps->a | ps | match
9102 // p | p | no match
9103 const Expr *UDPOperandExpr =
9104 Components.front().getAssociatedExpression();
9105 if (IsMapInfoExist(CGF, VD, IE,
9106 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9107 /*IsDevAddr=*/false, HasUdpFbNullify))
9108 continue;
9109 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9110 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9111 }
9112 }
9113
9114 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9115 for (const auto *Cl : Clauses) {
9116 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
9117 if (!C)
9118 continue;
9119 for (const auto L : C->component_lists()) {
9121 std::get<1>(L);
9122 assert(!std::get<1>(L).empty() &&
9123 "Not expecting empty list of components!");
9124 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
9125 if (!Processed.insert(VD).second)
9126 continue;
9128 // For use_device_addr, we match an existing map clause if the
9129 // use_device_addr operand's attach-ptr matches the map operand's
9130 // attach-ptr.
9131 // We chould also restrict to only match cases when there is a full
9132 // match between the map/use_device_addr clause exprs, but that may be
9133 // unnecessary.
9134 //
9135 // map expr | use_device_addr expr | current | possible restrictive/
9136 // | | behavior | safer behavior
9137 // ---------|----------------------|-----------|-----------------------
9138 // p | p | match | match
9139 // p[0] | p[0] | match | match
9140 // p[0:1] | p[0] | match | no match
9141 // p[0:1] | p[2:1] | match | no match
9142 // p[1] | p[0] | match | no match
9143 // ps->a | ps->b | match | no match
9144 // p | p[0] | no match | no match
9145 // pp | pp[0][0] | no match | no match
9146 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9147 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
9148 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9149 "use_device_addr operand has an attach-ptr, but does not match "
9150 "last component's expr.");
9151 if (IsMapInfoExist(CGF, VD, IE,
9152 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9153 /*IsDevAddr=*/true))
9154 continue;
9155 MapInfoGen(CGF, IE, VD, Components,
9156 /*IsDevAddr=*/true,
9157 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9158 }
9159 }
9160
9161 for (const auto &Data : Info) {
9162 MapCombinedInfoTy CurInfo;
9163 const Decl *D = Data.first;
9164 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
9165 // Group component lists by their AttachPtrExpr and process them in order
9166 // of increasing complexity (nullptr first, then simple expressions like
9167 // p, then more complex ones like p[0], etc.)
9168 //
9169 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9170 // grouping for target constructs.
9171 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9172
9173 // First, collect all MapData entries with their attach-ptr exprs.
9174 for (const auto &M : Data.second) {
9175 for (const MapInfo &L : M) {
9176 assert(!L.Components.empty() &&
9177 "Not expecting declaration with no component lists.");
9178
9179 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9180 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9181 }
9182 }
9183
9184 // Next, sort by increasing order of their complexity.
9185 llvm::stable_sort(AttachPtrMapInfoPairs,
9186 [this](const auto &LHS, const auto &RHS) {
9187 return AttachPtrComparator(LHS.first, RHS.first);
9188 });
9189
9190 // And finally, process them all in order, grouping those with
9191 // equivalent attach-ptr exprs together.
9192 auto *It = AttachPtrMapInfoPairs.begin();
9193 while (It != AttachPtrMapInfoPairs.end()) {
9194 const Expr *AttachPtrExpr = It->first;
9195
9196 SmallVector<MapInfo, 8> GroupLists;
9197 while (It != AttachPtrMapInfoPairs.end() &&
9198 (It->first == AttachPtrExpr ||
9199 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9200 GroupLists.push_back(It->second);
9201 ++It;
9202 }
9203 assert(!GroupLists.empty() && "GroupLists should not be empty");
9204
9205 StructRangeInfoTy PartialStruct;
9206 AttachInfoTy AttachInfo;
9207 MapCombinedInfoTy GroupCurInfo;
9208 // Current group's struct base information:
9209 MapCombinedInfoTy GroupStructBaseCurInfo;
9210 for (const MapInfo &L : GroupLists) {
9211 // Remember the current base pointer index.
9212 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9213 unsigned StructBasePointersIdx =
9214 GroupStructBaseCurInfo.BasePointers.size();
9215
9216 GroupCurInfo.NonContigInfo.IsNonContiguous =
9217 L.Components.back().isNonContiguous();
9218 generateInfoForComponentList(
9219 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9220 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9221 /*IsFirstComponentList=*/false, L.IsImplicit,
9222 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9223 L.VarRef, /*OverlappedElements*/ {});
9224
9225 // If this entry relates to a device pointer, set the relevant
9226 // declaration and add the 'return pointer' flag.
9227 if (L.ReturnDevicePointer) {
9228 // Check whether a value was added to either GroupCurInfo or
9229 // GroupStructBaseCurInfo and error if no value was added to either
9230 // of them:
9231 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9232 StructBasePointersIdx <
9233 GroupStructBaseCurInfo.BasePointers.size()) &&
9234 "Unexpected number of mapped base pointers.");
9235
9236 // Choose a base pointer index which is always valid:
9237 const ValueDecl *RelevantVD =
9238 L.Components.back().getAssociatedDeclaration();
9239 assert(RelevantVD &&
9240 "No relevant declaration related with device pointer??");
9241
9242 // If GroupStructBaseCurInfo has been updated this iteration then
9243 // work on the first new entry added to it i.e. make sure that when
9244 // multiple values are added to any of the lists, the first value
9245 // added is being modified by the assignments below (not the last
9246 // value added).
9247 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9248 unsigned Idx) {
9249 Info.DevicePtrDecls[Idx] = RelevantVD;
9250 Info.DevicePointers[Idx] = L.ForDeviceAddr
9251 ? DeviceInfoTy::Address
9252 : DeviceInfoTy::Pointer;
9253 Info.Types[Idx] |=
9254 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9255 if (L.HasUdpFbNullify)
9256 Info.Types[Idx] |=
9257 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9258 };
9259
9260 if (StructBasePointersIdx <
9261 GroupStructBaseCurInfo.BasePointers.size())
9262 SetDevicePointerInfo(GroupStructBaseCurInfo,
9263 StructBasePointersIdx);
9264 else
9265 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9266 }
9267 }
9268
9269 // Unify entries in one list making sure the struct mapping precedes the
9270 // individual fields:
9271 MapCombinedInfoTy GroupUnionCurInfo;
9272 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9273 GroupUnionCurInfo.append(GroupCurInfo);
9274
9275 // If there is an entry in PartialStruct it means we have a struct with
9276 // individual members mapped. Emit an extra combined entry.
9277 if (PartialStruct.Base.isValid()) {
9278 // Prepend a synthetic dimension of length 1 to represent the
9279 // aggregated struct object. Using 1 (not 0, as 0 produced an
9280 // incorrect non-contiguous descriptor (DimSize==1), causing the
9281 // non-contiguous motion clause path to be skipped.) is important:
9282 // * It preserves the correct rank so targetDataUpdate() computes
9283 // DimSize == 2 for cases like strided array sections originating
9284 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9285 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9286 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9287 emitCombinedEntry(
9288 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9289 /*IsMapThis=*/!VD, OMPBuilder, VD,
9290 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9291 /*NotTargetParams=*/true);
9292 }
9293
9294 // Append this group's results to the overall CurInfo in the correct
9295 // order: combined-entry -> original-field-entries -> attach-entry
9296 CurInfo.append(GroupUnionCurInfo);
9297 if (AttachInfo.isValid())
9298 emitAttachEntry(CGF, CurInfo, AttachInfo);
9299 }
9300
9301 // We need to append the results of this capture to what we already have.
9302 CombinedInfo.append(CurInfo);
9303 }
9304 // Append data for use_device_ptr/addr clauses.
9305 CombinedInfo.append(UseDeviceDataCombinedInfo);
9306 }
9307
9308public:
9309 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9310 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9311 // Extract firstprivate clause information.
9312 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9313 for (const auto *D : C->varlist())
9314 FirstPrivateDecls.try_emplace(
9315 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9316 // Extract implicit firstprivates from uses_allocators clauses.
9317 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9318 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9319 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9320 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9321 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9322 /*Implicit=*/true);
9323 else if (const auto *VD = dyn_cast<VarDecl>(
9324 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9325 ->getDecl()))
9326 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9327 }
9328 }
9329 // Extract defaultmap clause information.
9330 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9331 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9332 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9333 // Extract device pointer clause information.
9334 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9335 for (auto L : C->component_lists())
9336 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9337 // Extract device addr clause information.
9338 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9339 for (auto L : C->component_lists())
9340 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9341 // Extract map information.
9342 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9343 if (C->getMapType() != OMPC_MAP_to)
9344 continue;
9345 for (auto L : C->component_lists()) {
9346 const ValueDecl *VD = std::get<0>(L);
9347 const auto *RD = VD ? VD->getType()
9348 .getCanonicalType()
9349 .getNonReferenceType()
9350 ->getAsCXXRecordDecl()
9351 : nullptr;
9352 if (RD && RD->isLambda())
9353 LambdasMap.try_emplace(std::get<0>(L), C);
9354 }
9355 }
9356
9357 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9358 for (auto L : C->component_lists()) {
9360 std::get<1>(L);
9361 if (!Components.empty())
9362 collectAttachPtrExprInfo(Components, CurDir);
9363 }
9364 };
9365
9366 // Populate the AttachPtrExprMap for all component lists from map-related
9367 // clauses.
9368 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9369 CollectAttachPtrExprsForClauseComponents(C);
9370 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9371 CollectAttachPtrExprsForClauseComponents(C);
9372 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9373 CollectAttachPtrExprsForClauseComponents(C);
9374 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9375 CollectAttachPtrExprsForClauseComponents(C);
9376 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9377 CollectAttachPtrExprsForClauseComponents(C);
9378 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9379 CollectAttachPtrExprsForClauseComponents(C);
9380 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9381 CollectAttachPtrExprsForClauseComponents(C);
9382 }
9383
9384 /// Constructor for the declare mapper directive.
9385 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9386 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9387
9388 /// Generate code for the combined entry if we have a partially mapped struct
9389 /// and take care of the mapping flags of the arguments corresponding to
9390 /// individual struct members.
9391 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9392 /// to the combined-entry's begin address, if emitted.
9393 /// \p PartialStruct contains attach base-pointer information.
9394 /// \returns The index of the combined entry if one was added, std::nullopt
9395 /// otherwise.
9396 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9397 MapFlagsArrayTy &CurTypes,
9398 const StructRangeInfoTy &PartialStruct,
9399 AttachInfoTy &AttachInfo, bool IsMapThis,
9400 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9401 unsigned OffsetForMemberOfFlag,
9402 bool NotTargetParams) const {
9403 if (CurTypes.size() == 1 &&
9404 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9405 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9406 !PartialStruct.IsArraySection)
9407 return;
9408 Address LBAddr = PartialStruct.LowestElem.second;
9409 Address HBAddr = PartialStruct.HighestElem.second;
9410 if (PartialStruct.HasCompleteRecord) {
9411 LBAddr = PartialStruct.LB;
9412 HBAddr = PartialStruct.LB;
9413 }
9414 CombinedInfo.Exprs.push_back(VD);
9415 // Base is the base of the struct
9416 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9417 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9418 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9419 // Pointer is the address of the lowest element
9420 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9421 const CXXMethodDecl *MD =
9422 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9423 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9424 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9425 // There should not be a mapper for a combined entry.
9426 if (HasBaseClass) {
9427 // OpenMP 5.2 148:21:
9428 // If the target construct is within a class non-static member function,
9429 // and a variable is an accessible data member of the object for which the
9430 // non-static data member function is invoked, the variable is treated as
9431 // if the this[:1] expression had appeared in a map clause with a map-type
9432 // of tofrom.
9433 // Emit this[:1]
9434 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9435 QualType Ty = MD->getFunctionObjectParameterType();
9436 llvm::Value *Size =
9437 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9438 /*isSigned=*/true);
9439 CombinedInfo.Sizes.push_back(Size);
9440 } else {
9441 CombinedInfo.Pointers.push_back(LB);
9442 // Size is (addr of {highest+1} element) - (addr of lowest element)
9443 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9444 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9445 HBAddr.getElementType(), HB, /*Idx0=*/1);
9446 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9447 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9448 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
9449 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9450 /*isSigned=*/false);
9451 CombinedInfo.Sizes.push_back(Size);
9452 }
9453 CombinedInfo.Mappers.push_back(nullptr);
9454 // Map type is always TARGET_PARAM, if generate info for captures.
9455 CombinedInfo.Types.push_back(
9456 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9457 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9458 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9459 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9460 // If any element has the present modifier, then make sure the runtime
9461 // doesn't attempt to allocate the struct.
9462 if (CurTypes.end() !=
9463 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9464 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9465 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9466 }))
9467 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9468 // Remove TARGET_PARAM flag from the first element
9469 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9470 // If any element has the ompx_hold modifier, then make sure the runtime
9471 // uses the hold reference count for the struct as a whole so that it won't
9472 // be unmapped by an extra dynamic reference count decrement. Add it to all
9473 // elements as well so the runtime knows which reference count to check
9474 // when determining whether it's time for device-to-host transfers of
9475 // individual elements.
9476 if (CurTypes.end() !=
9477 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9478 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9479 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9480 })) {
9481 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9482 for (auto &M : CurTypes)
9483 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9484 }
9485
9486 // All other current entries will be MEMBER_OF the combined entry
9487 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9488 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9489 // to be handled by themselves, after all other maps).
9490 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9491 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9492 for (auto &M : CurTypes)
9493 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9494
9495 // When we are emitting a combined entry. If there were any pending
9496 // attachments to be done, we do them to the begin address of the combined
9497 // entry. Note that this means only one attachment per combined-entry will
9498 // be done. So, for instance, if we have:
9499 // S *ps;
9500 // ... map(ps->a, ps->b)
9501 // When we are emitting a combined entry. If AttachInfo is valid,
9502 // update the pointee address to point to the begin address of the combined
9503 // entry. This ensures that if we have multiple maps like:
9504 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9505 //
9506 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9507 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9508 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9509 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9510 if (AttachInfo.isValid())
9511 AttachInfo.AttachPteeAddr = LBAddr;
9512 }
9513
9514 /// Generate all the base pointers, section pointers, sizes, map types, and
9515 /// mappers for the extracted mappable expressions (all included in \a
9516 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9517 /// pair of the relevant declaration and index where it occurs is appended to
9518 /// the device pointers info array.
9519 void generateAllInfo(
9520 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9521 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9522 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9523 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9524 "Expect a executable directive");
9525 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9526 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9527 SkipVarSet);
9528 }
9529
9530 /// Generate all the base pointers, section pointers, sizes, map types, and
9531 /// mappers for the extracted map clauses of user-defined mapper (all included
9532 /// in \a CombinedInfo).
9533 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9534 llvm::OpenMPIRBuilder &OMPBuilder) const {
9535 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9536 "Expect a declare mapper directive");
9537 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9538 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9539 OMPBuilder);
9540 }
9541
9542 /// Emit capture info for lambdas for variables captured by reference.
9543 void generateInfoForLambdaCaptures(
9544 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9545 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9546 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9547 const auto *RD = VDType->getAsCXXRecordDecl();
9548 if (!RD || !RD->isLambda())
9549 return;
9550 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9551 CGF.getContext().getDeclAlign(VD));
9552 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9553 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9554 FieldDecl *ThisCapture = nullptr;
9555 RD->getCaptureFields(Captures, ThisCapture);
9556 if (ThisCapture) {
9557 LValue ThisLVal =
9558 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9559 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9560 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9561 VDLVal.getPointer(CGF));
9562 CombinedInfo.Exprs.push_back(VD);
9563 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9564 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9565 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9566 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9567 CombinedInfo.Sizes.push_back(
9568 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9569 CGF.Int64Ty, /*isSigned=*/true));
9570 CombinedInfo.Types.push_back(
9571 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9572 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9573 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9574 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9575 CombinedInfo.Mappers.push_back(nullptr);
9576 }
9577 for (const LambdaCapture &LC : RD->captures()) {
9578 if (!LC.capturesVariable())
9579 continue;
9580 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9581 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9582 continue;
9583 auto It = Captures.find(VD);
9584 assert(It != Captures.end() && "Found lambda capture without field.");
9585 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9586 if (LC.getCaptureKind() == LCK_ByRef) {
9587 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9588 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9589 VDLVal.getPointer(CGF));
9590 CombinedInfo.Exprs.push_back(VD);
9591 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9592 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9593 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9594 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9595 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9596 CGF.getTypeSize(
9598 CGF.Int64Ty, /*isSigned=*/true));
9599 } else {
9600 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9601 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9602 VDLVal.getPointer(CGF));
9603 CombinedInfo.Exprs.push_back(VD);
9604 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9605 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9606 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9607 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9608 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9609 }
9610 CombinedInfo.Types.push_back(
9611 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9612 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9613 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9614 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9615 CombinedInfo.Mappers.push_back(nullptr);
9616 }
9617 }
9618
9619 /// Set correct indices for lambdas captures.
9620 void adjustMemberOfForLambdaCaptures(
9621 llvm::OpenMPIRBuilder &OMPBuilder,
9622 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9623 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9624 MapFlagsArrayTy &Types) const {
9625 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9626 // Set correct member_of idx for all implicit lambda captures.
9627 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9628 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9629 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9630 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9631 continue;
9632 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9633 assert(BasePtr && "Unable to find base lambda address.");
9634 int TgtIdx = -1;
9635 for (unsigned J = I; J > 0; --J) {
9636 unsigned Idx = J - 1;
9637 if (Pointers[Idx] != BasePtr)
9638 continue;
9639 TgtIdx = Idx;
9640 break;
9641 }
9642 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9643 // All other current entries will be MEMBER_OF the combined entry
9644 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9645 // 0xFFFF in the MEMBER_OF field).
9646 OpenMPOffloadMappingFlags MemberOfFlag =
9647 OMPBuilder.getMemberOfFlag(TgtIdx);
9648 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9649 }
9650 }
9651
9652 /// Populate component lists for non-lambda captured variables from map,
9653 /// is_device_ptr and has_device_addr clause info.
9654 void populateComponentListsForNonLambdaCaptureFromClauses(
9655 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9656 SmallVectorImpl<
9657 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9658 &StorageForImplicitlyAddedComponentLists) const {
9659 if (VD && LambdasMap.count(VD))
9660 return;
9661
9662 // For member fields list in is_device_ptr, store it in
9663 // DeclComponentLists for generating components info.
9665 auto It = DevPointersMap.find(VD);
9666 if (It != DevPointersMap.end())
9667 for (const auto &MCL : It->second)
9668 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9669 /*IsImpicit = */ true, nullptr,
9670 nullptr);
9671 auto I = HasDevAddrsMap.find(VD);
9672 if (I != HasDevAddrsMap.end())
9673 for (const auto &MCL : I->second)
9674 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9675 /*IsImpicit = */ true, nullptr,
9676 nullptr);
9677 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9678 "Expect a executable directive");
9679 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9680 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9681 const auto *EI = C->getVarRefs().begin();
9682 for (const auto L : C->decl_component_lists(VD)) {
9683 const ValueDecl *VDecl, *Mapper;
9684 // The Expression is not correct if the mapping is implicit
9685 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9687 std::tie(VDecl, Components, Mapper) = L;
9688 assert(VDecl == VD && "We got information for the wrong declaration??");
9689 assert(!Components.empty() &&
9690 "Not expecting declaration with no component lists.");
9691 DeclComponentLists.emplace_back(Components, C->getMapType(),
9692 C->getMapTypeModifiers(),
9693 C->isImplicit(), Mapper, E);
9694 ++EI;
9695 }
9696 }
9697
9698 // For the target construct, if there's a map with a base-pointer that's
9699 // a member of an implicitly captured struct, of the current class,
9700 // we need to emit an implicit map on the pointer.
9701 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9702 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9703 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9704
9705 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9706 const MapData &RHS) {
9707 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9708 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9709 bool HasPresent =
9710 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9711 bool HasAllocs = MapType == OMPC_MAP_alloc;
9712 MapModifiers = std::get<2>(RHS);
9713 MapType = std::get<1>(LHS);
9714 bool HasPresentR =
9715 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9716 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9717 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9718 });
9719 }
9720
9721 /// On a target construct, if there's an implicit map on a struct, or that of
9722 /// this[:], and an explicit map with a member of that struct/class as the
9723 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9724 /// to make sure we don't map the full struct/class. For example:
9725 ///
9726 /// \code
9727 /// struct S {
9728 /// int dummy[10000];
9729 /// int *p;
9730 /// void f1() {
9731 /// #pragma omp target map(p[0:1])
9732 /// (void)this;
9733 /// }
9734 /// }; S s;
9735 ///
9736 /// void f2() {
9737 /// #pragma omp target map(s.p[0:10])
9738 /// (void)s;
9739 /// }
9740 /// \endcode
9741 ///
9742 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9743 //
9744 // OpenMP 6.0: 7.9.6 map clause, pg 285
9745 // If a list item with an implicitly determined data-mapping attribute does
9746 // not have any corresponding storage in the device data environment prior to
9747 // a task encountering the construct associated with the map clause, and one
9748 // or more contiguous parts of the original storage are either list items or
9749 // base pointers to list items that are explicitly mapped on the construct,
9750 // only those parts of the original storage will have corresponding storage in
9751 // the device data environment as a result of the map clauses on the
9752 // construct.
9753 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9754 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9755 SmallVectorImpl<
9756 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9757 &ComponentVectorStorage) const {
9758 bool IsThisCapture = CapturedVD == nullptr;
9759
9760 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9762 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9763 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9764 if (!AttachPtrExpr)
9765 continue;
9766
9767 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9768 if (!ME)
9769 continue;
9770
9771 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9772
9773 // If we are handling a "this" capture, then we are looking for
9774 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9775 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9776 continue;
9777
9778 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9779 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9780 continue;
9781
9782 // For non-this captures, we are looking for attach-ptrs of form
9783 // `s.p`.
9784 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9785 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9786 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9787 continue;
9788
9789 // Check if we have an existing map on either:
9790 // this[:], s, this->p, or s.p, in which case, we don't need to add
9791 // an implicit one for the attach-ptr s.p/this->p.
9792 bool FoundExistingMap = false;
9793 for (const MapData &ExistingL : DeclComponentLists) {
9795 ExistingComponents = std::get<0>(ExistingL);
9796
9797 if (ExistingComponents.empty())
9798 continue;
9799
9800 // First check if we have a map like map(this->p) or map(s.p).
9801 const auto &FirstComponent = ExistingComponents.front();
9802 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9803
9804 if (!FirstExpr)
9805 continue;
9806
9807 // First check if we have a map like map(this->p) or map(s.p).
9808 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9809 FoundExistingMap = true;
9810 break;
9811 }
9812
9813 // Check if we have a map like this[0:1]
9814 if (IsThisCapture) {
9815 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9816 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9817 FoundExistingMap = true;
9818 break;
9819 }
9820 }
9821 continue;
9822 }
9823
9824 // When the attach-ptr is something like `s.p`, check if
9825 // `s` itself is mapped explicitly.
9826 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9827 if (DRE->getDecl() == CapturedVD) {
9828 FoundExistingMap = true;
9829 break;
9830 }
9831 }
9832 }
9833
9834 if (FoundExistingMap)
9835 continue;
9836
9837 // If no base map is found, we need to create an implicit map for the
9838 // attach-pointer expr.
9839
9840 ComponentVectorStorage.emplace_back();
9841 auto &AttachPtrComponents = ComponentVectorStorage.back();
9842
9844 bool SeenAttachPtrComponent = false;
9845 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9846 // components from the component-list which has `s.p/this->p`
9847 // as the attach-ptr, starting from the component which matches
9848 // `s.p/this->p`. This way, we'll have component-lists of
9849 // `s.p` -> `s`, and `this->p` -> `this`.
9850 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9851 const auto &Component = ComponentsWithAttachPtr[i];
9852 const Expr *ComponentExpr = Component.getAssociatedExpression();
9853
9854 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9855 continue;
9856 SeenAttachPtrComponent = true;
9857
9858 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9859 Component.getAssociatedDeclaration(),
9860 Component.isNonContiguous());
9861 }
9862 assert(!AttachPtrComponents.empty() &&
9863 "Could not populate component-lists for mapping attach-ptr");
9864
9865 DeclComponentLists.emplace_back(
9866 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9867 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9868 }
9869 }
9870
9871 /// For a capture that has an associated clause, generate the base pointers,
9872 /// section pointers, sizes, map types, and mappers (all included in
9873 /// \a CurCaptureVarInfo).
9874 void generateInfoForCaptureFromClauseInfo(
9875 const MapDataArrayTy &DeclComponentListsFromClauses,
9876 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9877 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9878 unsigned OffsetForMemberOfFlag) const {
9879 assert(!Cap->capturesVariableArrayType() &&
9880 "Not expecting to generate map info for a variable array type!");
9881
9882 // We need to know when we generating information for the first component
9883 const ValueDecl *VD = Cap->capturesThis()
9884 ? nullptr
9885 : Cap->getCapturedVar()->getCanonicalDecl();
9886
9887 // for map(to: lambda): skip here, processing it in
9888 // generateDefaultMapInfo
9889 if (LambdasMap.count(VD))
9890 return;
9891
9892 // If this declaration appears in a is_device_ptr clause we just have to
9893 // pass the pointer by value. If it is a reference to a declaration, we just
9894 // pass its value.
9895 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9896 CurCaptureVarInfo.Exprs.push_back(VD);
9897 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9898 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9899 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9900 CurCaptureVarInfo.Pointers.push_back(Arg);
9901 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9902 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9903 /*isSigned=*/true));
9904 CurCaptureVarInfo.Types.push_back(
9905 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9906 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9907 CurCaptureVarInfo.Mappers.push_back(nullptr);
9908 return;
9909 }
9910
9911 auto GenerateInfoForComponentLists =
9912 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9913 bool IsEligibleForTargetParamFlag) {
9914 MapCombinedInfoTy CurInfoForComponentLists;
9915 StructRangeInfoTy PartialStruct;
9916 AttachInfoTy AttachInfo;
9917
9918 if (DeclComponentListsFromClauses.empty())
9919 return;
9920
9921 generateInfoForCaptureFromComponentLists(
9922 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9923 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9924
9925 // If there is an entry in PartialStruct it means we have a
9926 // struct with individual members mapped. Emit an extra combined
9927 // entry.
9928 if (PartialStruct.Base.isValid()) {
9929 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9930 emitCombinedEntry(
9931 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9932 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9933 /*VD=*/nullptr, OffsetForMemberOfFlag,
9934 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9935 }
9936
9937 // We do the appends to get the entries in the following order:
9938 // combined-entry -> individual-field-entries -> attach-entry,
9939 CurCaptureVarInfo.append(CurInfoForComponentLists);
9940 if (AttachInfo.isValid())
9941 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9942 };
9943
9944 // Group component lists by their AttachPtrExpr and process them in order
9945 // of increasing complexity (nullptr first, then simple expressions like p,
9946 // then more complex ones like p[0], etc.)
9947 //
9948 // This ensure that we:
9949 // * handle maps that can contribute towards setting the kernel argument,
9950 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9951 // * allocate a single contiguous storage for all exprs with the same
9952 // captured var and having the same attach-ptr.
9953 //
9954 // Example: The map clauses below should be handled grouped together based
9955 // on their attachable-base-pointers:
9956 // map-clause | attachable-base-pointer
9957 // --------------------------+------------------------
9958 // map(p, ps) | nullptr
9959 // map(p[0]) | p
9960 // map(p[0]->b, p[0]->c) | p[0]
9961 // map(ps->d, ps->e, ps->pt) | ps
9962 // map(ps->pt->d, ps->pt->e) | ps->pt
9963
9964 // First, collect all MapData entries with their attach-ptr exprs.
9965 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9966
9967 for (const MapData &L : DeclComponentListsFromClauses) {
9969 std::get<0>(L);
9970 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9971 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9972 }
9973
9974 // Next, sort by increasing order of their complexity.
9975 llvm::stable_sort(AttachPtrMapDataPairs,
9976 [this](const auto &LHS, const auto &RHS) {
9977 return AttachPtrComparator(LHS.first, RHS.first);
9978 });
9979
9980 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9981 bool IsFirstGroup = true;
9982
9983 // And finally, process them all in order, grouping those with
9984 // equivalent attach-ptr exprs together.
9985 auto *It = AttachPtrMapDataPairs.begin();
9986 while (It != AttachPtrMapDataPairs.end()) {
9987 const Expr *AttachPtrExpr = It->first;
9988
9989 MapDataArrayTy GroupLists;
9990 while (It != AttachPtrMapDataPairs.end() &&
9991 (It->first == AttachPtrExpr ||
9992 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9993 GroupLists.push_back(It->second);
9994 ++It;
9995 }
9996 assert(!GroupLists.empty() && "GroupLists should not be empty");
9997
9998 // Determine if this group of component-lists is eligible for TARGET_PARAM
9999 // flag. Only the first group processed should be eligible, and only if no
10000 // default mapping was done.
10001 bool IsEligibleForTargetParamFlag =
10002 IsFirstGroup && NoDefaultMappingDoneForVD;
10003
10004 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
10005 IsFirstGroup = false;
10006 }
10007 }
10008
10009 /// Generate the base pointers, section pointers, sizes, map types, and
10010 /// mappers associated to \a DeclComponentLists for a given capture
10011 /// \a VD (all included in \a CurComponentListInfo).
10012 void generateInfoForCaptureFromComponentLists(
10013 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10014 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10015 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10016 // Find overlapping elements (including the offset from the base element).
10017 llvm::SmallDenseMap<
10018 const MapData *,
10019 llvm::SmallVector<
10021 4>
10022 OverlappedData;
10023 size_t Count = 0;
10024 for (const MapData &L : DeclComponentLists) {
10026 OpenMPMapClauseKind MapType;
10027 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10028 bool IsImplicit;
10029 const ValueDecl *Mapper;
10030 const Expr *VarRef;
10031 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10032 L;
10033 ++Count;
10034 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
10036 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
10037 VarRef) = L1;
10038 auto CI = Components.rbegin();
10039 auto CE = Components.rend();
10040 auto SI = Components1.rbegin();
10041 auto SE = Components1.rend();
10042 for (; CI != CE && SI != SE; ++CI, ++SI) {
10043 if (CI->getAssociatedExpression()->getStmtClass() !=
10044 SI->getAssociatedExpression()->getStmtClass())
10045 break;
10046 // Are we dealing with different variables/fields?
10047 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10048 break;
10049 }
10050 // Found overlapping if, at least for one component, reached the head
10051 // of the components list.
10052 if (CI == CE || SI == SE) {
10053 // Ignore it if it is the same component.
10054 if (CI == CE && SI == SE)
10055 continue;
10056 const auto It = (SI == SE) ? CI : SI;
10057 // If one component is a pointer and another one is a kind of
10058 // dereference of this pointer (array subscript, section, dereference,
10059 // etc.), it is not an overlapping.
10060 // Same, if one component is a base and another component is a
10061 // dereferenced pointer memberexpr with the same base.
10062 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
10063 (std::prev(It)->getAssociatedDeclaration() &&
10064 std::prev(It)
10065 ->getAssociatedDeclaration()
10066 ->getType()
10067 ->isPointerType()) ||
10068 (It->getAssociatedDeclaration() &&
10069 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10070 std::next(It) != CE && std::next(It) != SE))
10071 continue;
10072 const MapData &BaseData = CI == CE ? L : L1;
10074 SI == SE ? Components : Components1;
10075 OverlappedData[&BaseData].push_back(SubData);
10076 }
10077 }
10078 }
10079 // Sort the overlapped elements for each item.
10080 llvm::SmallVector<const FieldDecl *, 4> Layout;
10081 if (!OverlappedData.empty()) {
10082 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10083 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10084 while (BaseType != OrigType) {
10085 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10086 OrigType = BaseType->getPointeeOrArrayElementType();
10087 }
10088
10089 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10090 getPlainLayout(CRD, Layout, /*AsBase=*/false);
10091 else {
10092 const auto *RD = BaseType->getAsRecordDecl();
10093 Layout.append(RD->field_begin(), RD->field_end());
10094 }
10095 }
10096 for (auto &Pair : OverlappedData) {
10097 llvm::stable_sort(
10098 Pair.getSecond(),
10099 [&Layout](
10102 Second) {
10103 auto CI = First.rbegin();
10104 auto CE = First.rend();
10105 auto SI = Second.rbegin();
10106 auto SE = Second.rend();
10107 for (; CI != CE && SI != SE; ++CI, ++SI) {
10108 if (CI->getAssociatedExpression()->getStmtClass() !=
10109 SI->getAssociatedExpression()->getStmtClass())
10110 break;
10111 // Are we dealing with different variables/fields?
10112 if (CI->getAssociatedDeclaration() !=
10113 SI->getAssociatedDeclaration())
10114 break;
10115 }
10116
10117 // Lists contain the same elements.
10118 if (CI == CE && SI == SE)
10119 return false;
10120
10121 // List with less elements is less than list with more elements.
10122 if (CI == CE || SI == SE)
10123 return CI == CE;
10124
10125 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
10126 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
10127 if (FD1->getParent() == FD2->getParent())
10128 return FD1->getFieldIndex() < FD2->getFieldIndex();
10129 const auto *It =
10130 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
10131 return FD == FD1 || FD == FD2;
10132 });
10133 return *It == FD1;
10134 });
10135 }
10136
10137 // Associated with a capture, because the mapping flags depend on it.
10138 // Go through all of the elements with the overlapped elements.
10139 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10140 MapCombinedInfoTy StructBaseCombinedInfo;
10141 for (const auto &Pair : OverlappedData) {
10142 const MapData &L = *Pair.getFirst();
10144 OpenMPMapClauseKind MapType;
10145 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10146 bool IsImplicit;
10147 const ValueDecl *Mapper;
10148 const Expr *VarRef;
10149 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10150 L;
10151 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10152 OverlappedComponents = Pair.getSecond();
10153 generateInfoForComponentList(
10154 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10155 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
10156 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10157 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
10158 AddTargetParamFlag = false;
10159 }
10160 // Go through other elements without overlapped elements.
10161 for (const MapData &L : DeclComponentLists) {
10163 OpenMPMapClauseKind MapType;
10164 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10165 bool IsImplicit;
10166 const ValueDecl *Mapper;
10167 const Expr *VarRef;
10168 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10169 L;
10170 auto It = OverlappedData.find(&L);
10171 if (It == OverlappedData.end())
10172 generateInfoForComponentList(
10173 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10174 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10175 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10176 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10177 /*OverlappedElements*/ {});
10178 AddTargetParamFlag = false;
10179 }
10180 }
10181
10182 /// Check if a variable should be treated as firstprivate due to explicit
10183 /// firstprivate clause or defaultmap(firstprivate:...).
10184 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10185 // Check explicit firstprivate clauses (not implicit from defaultmap)
10186 auto I = FirstPrivateDecls.find(VD);
10187 if (I != FirstPrivateDecls.end() && !I->getSecond())
10188 return true; // Explicit firstprivate only
10189
10190 // Check defaultmap(firstprivate:scalar) for scalar types
10191 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10192 if (Type->isScalarType())
10193 return true;
10194 }
10195
10196 // Check defaultmap(firstprivate:pointer) for pointer types
10197 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10198 if (Type->isAnyPointerType())
10199 return true;
10200 }
10201
10202 // Check defaultmap(firstprivate:aggregate) for aggregate types
10203 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10204 if (Type->isAggregateType())
10205 return true;
10206 }
10207
10208 // Check defaultmap(firstprivate:all) for all types
10209 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10210 }
10211
10212 /// Generate the default map information for a given capture \a CI,
10213 /// record field declaration \a RI and captured value \a CV.
10214 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10215 const FieldDecl &RI, llvm::Value *CV,
10216 MapCombinedInfoTy &CombinedInfo) const {
10217 bool IsImplicit = true;
10218 // Do the default mapping.
10219 if (CI.capturesThis()) {
10220 CombinedInfo.Exprs.push_back(nullptr);
10221 CombinedInfo.BasePointers.push_back(CV);
10222 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10223 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10224 CombinedInfo.Pointers.push_back(CV);
10225 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10226 CombinedInfo.Sizes.push_back(
10227 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10228 CGF.Int64Ty, /*isSigned=*/true));
10229 // Default map type.
10230 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10231 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10232 } else if (CI.capturesVariableByCopy()) {
10233 const VarDecl *VD = CI.getCapturedVar();
10234 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10235 CombinedInfo.BasePointers.push_back(CV);
10236 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10237 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10238 CombinedInfo.Pointers.push_back(CV);
10239 bool IsFirstprivate =
10240 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10241
10242 if (!RI.getType()->isAnyPointerType()) {
10243 // We have to signal to the runtime captures passed by value that are
10244 // not pointers.
10245 CombinedInfo.Types.push_back(
10246 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10247 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10248 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10249 } else if (IsFirstprivate) {
10250 // Firstprivate pointers should be passed by value (as literals)
10251 // without performing a present table lookup at runtime.
10252 CombinedInfo.Types.push_back(
10253 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10254 // Use zero size for pointer literals (just passing the pointer value)
10255 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10256 } else {
10257 // Pointers are implicitly mapped with a zero size and no flags
10258 // (other than first map that is added for all implicit maps).
10259 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10260 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10261 }
10262 auto I = FirstPrivateDecls.find(VD);
10263 if (I != FirstPrivateDecls.end())
10264 IsImplicit = I->getSecond();
10265 } else {
10266 assert(CI.capturesVariable() && "Expected captured reference.");
10267 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10268 QualType ElementType = PtrTy->getPointeeType();
10269 const VarDecl *VD = CI.getCapturedVar();
10270 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10271 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10272 CombinedInfo.BasePointers.push_back(CV);
10273 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10274 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10275
10276 // For firstprivate pointers, pass by value instead of dereferencing
10277 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10278 // Treat as a literal value (pass the pointer value itself)
10279 CombinedInfo.Pointers.push_back(CV);
10280 // Use zero size for pointer literals
10281 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10282 CombinedInfo.Types.push_back(
10283 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10284 } else {
10285 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10286 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10287 // The default map type for a scalar/complex type is 'to' because by
10288 // default the value doesn't have to be retrieved. For an aggregate
10289 // type, the default is 'tofrom'.
10290 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10291 CombinedInfo.Pointers.push_back(CV);
10292 }
10293 auto I = FirstPrivateDecls.find(VD);
10294 if (I != FirstPrivateDecls.end())
10295 IsImplicit = I->getSecond();
10296 }
10297 // Every default map produces a single argument which is a target parameter.
10298 CombinedInfo.Types.back() |=
10299 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10300
10301 // Add flag stating this is an implicit map.
10302 if (IsImplicit)
10303 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10304
10305 // No user-defined mapper for default mapping.
10306 CombinedInfo.Mappers.push_back(nullptr);
10307 }
10308};
10309} // anonymous namespace
10310
10311// Try to extract the base declaration from a `this->x` expression if possible.
10313 if (!E)
10314 return nullptr;
10315
10316 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10317 if (const MemberExpr *ME =
10318 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10319 return ME->getMemberDecl();
10320 return nullptr;
10321}
10322
10323/// Emit a string constant containing the names of the values mapped to the
10324/// offloading runtime library.
10325static llvm::Constant *
10326emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10327 MappableExprsHandler::MappingExprInfo &MapExprs) {
10328
10329 uint32_t SrcLocStrSize;
10330 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10331 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10332
10333 SourceLocation Loc;
10334 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10335 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10336 Loc = VD->getLocation();
10337 else
10338 Loc = MapExprs.getMapExpr()->getExprLoc();
10339 } else {
10340 Loc = MapExprs.getMapDecl()->getLocation();
10341 }
10342
10343 std::string ExprName;
10344 if (MapExprs.getMapExpr()) {
10346 llvm::raw_string_ostream OS(ExprName);
10347 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10348 } else {
10349 ExprName = MapExprs.getMapDecl()->getNameAsString();
10350 }
10351
10352 std::string FileName;
10354 if (auto *DbgInfo = CGF.getDebugInfo())
10355 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10356 else
10357 FileName = PLoc.getFilename();
10358 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10359 PLoc.getColumn(), SrcLocStrSize);
10360}
10361/// Emit the arrays used to pass the captures and map information to the
10362/// offloading runtime library. If there is no map or capture information,
10363/// return nullptr by reference.
10365 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10366 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10367 bool IsNonContiguous = false, bool ForEndCall = false) {
10368 CodeGenModule &CGM = CGF.CGM;
10369
10370 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10371 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10372 CGF.AllocaInsertPt->getIterator());
10373 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10374 CGF.Builder.GetInsertPoint());
10375
10376 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10377 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10378 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10379 }
10380 };
10381
10382 auto CustomMapperCB = [&](unsigned int I) {
10383 llvm::Function *MFunc = nullptr;
10384 if (CombinedInfo.Mappers[I]) {
10385 Info.HasMapper = true;
10387 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10388 }
10389 return MFunc;
10390 };
10391 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10392 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10393 IsNonContiguous, ForEndCall, DeviceAddrCB));
10394}
10395
10396/// Check for inner distribute directive.
10397static const OMPExecutableDirective *
10399 const auto *CS = D.getInnermostCapturedStmt();
10400 const auto *Body =
10401 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10402 const Stmt *ChildStmt =
10404
10405 if (const auto *NestedDir =
10406 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10407 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10408 switch (D.getDirectiveKind()) {
10409 case OMPD_target:
10410 // For now, treat 'target' with nested 'teams loop' as if it's
10411 // distributed (target teams distribute).
10412 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10413 return NestedDir;
10414 if (DKind == OMPD_teams) {
10415 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10416 /*IgnoreCaptured=*/true);
10417 if (!Body)
10418 return nullptr;
10419 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10420 if (const auto *NND =
10421 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10422 DKind = NND->getDirectiveKind();
10423 if (isOpenMPDistributeDirective(DKind))
10424 return NND;
10425 }
10426 }
10427 return nullptr;
10428 case OMPD_target_teams:
10429 if (isOpenMPDistributeDirective(DKind))
10430 return NestedDir;
10431 return nullptr;
10432 case OMPD_target_parallel:
10433 case OMPD_target_simd:
10434 case OMPD_target_parallel_for:
10435 case OMPD_target_parallel_for_simd:
10436 return nullptr;
10437 case OMPD_target_teams_distribute:
10438 case OMPD_target_teams_distribute_simd:
10439 case OMPD_target_teams_distribute_parallel_for:
10440 case OMPD_target_teams_distribute_parallel_for_simd:
10441 case OMPD_parallel:
10442 case OMPD_for:
10443 case OMPD_parallel_for:
10444 case OMPD_parallel_master:
10445 case OMPD_parallel_sections:
10446 case OMPD_for_simd:
10447 case OMPD_parallel_for_simd:
10448 case OMPD_cancel:
10449 case OMPD_cancellation_point:
10450 case OMPD_ordered:
10451 case OMPD_threadprivate:
10452 case OMPD_allocate:
10453 case OMPD_task:
10454 case OMPD_simd:
10455 case OMPD_tile:
10456 case OMPD_unroll:
10457 case OMPD_sections:
10458 case OMPD_section:
10459 case OMPD_single:
10460 case OMPD_master:
10461 case OMPD_critical:
10462 case OMPD_taskyield:
10463 case OMPD_barrier:
10464 case OMPD_taskwait:
10465 case OMPD_taskgroup:
10466 case OMPD_atomic:
10467 case OMPD_flush:
10468 case OMPD_depobj:
10469 case OMPD_scan:
10470 case OMPD_teams:
10471 case OMPD_target_data:
10472 case OMPD_target_exit_data:
10473 case OMPD_target_enter_data:
10474 case OMPD_distribute:
10475 case OMPD_distribute_simd:
10476 case OMPD_distribute_parallel_for:
10477 case OMPD_distribute_parallel_for_simd:
10478 case OMPD_teams_distribute:
10479 case OMPD_teams_distribute_simd:
10480 case OMPD_teams_distribute_parallel_for:
10481 case OMPD_teams_distribute_parallel_for_simd:
10482 case OMPD_target_update:
10483 case OMPD_declare_simd:
10484 case OMPD_declare_variant:
10485 case OMPD_begin_declare_variant:
10486 case OMPD_end_declare_variant:
10487 case OMPD_declare_target:
10488 case OMPD_end_declare_target:
10489 case OMPD_declare_reduction:
10490 case OMPD_declare_mapper:
10491 case OMPD_taskloop:
10492 case OMPD_taskloop_simd:
10493 case OMPD_master_taskloop:
10494 case OMPD_master_taskloop_simd:
10495 case OMPD_parallel_master_taskloop:
10496 case OMPD_parallel_master_taskloop_simd:
10497 case OMPD_requires:
10498 case OMPD_metadirective:
10499 case OMPD_unknown:
10500 default:
10501 llvm_unreachable("Unexpected directive.");
10502 }
10503 }
10504
10505 return nullptr;
10506}
10507
10508/// Emit the user-defined mapper function. The code generation follows the
10509/// pattern in the example below.
10510/// \code
10511/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10512/// void *base, void *begin,
10513/// int64_t size, int64_t type,
10514/// void *name = nullptr) {
10515/// // Allocate space for an array section first.
10516/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10517/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10518/// size*sizeof(Ty), clearToFromMember(type));
10519/// // Map members.
10520/// for (unsigned i = 0; i < size; i++) {
10521/// // For each component specified by this mapper:
10522/// for (auto c : begin[i]->all_components) {
10523/// if (c.hasMapper())
10524/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10525/// c.arg_type, c.arg_name);
10526/// else
10527/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10528/// c.arg_begin, c.arg_size, c.arg_type,
10529/// c.arg_name);
10530/// }
10531/// }
10532/// // Delete the array section.
10533/// if (size > 1 && maptype.IsDelete)
10534/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10535/// size*sizeof(Ty), clearToFromMember(type));
10536/// }
10537/// \endcode
10539 CodeGenFunction *CGF) {
10540 if (UDMMap.count(D) > 0)
10541 return;
10542 ASTContext &C = CGM.getContext();
10543 QualType Ty = D->getType();
10544 auto *MapperVarDecl =
10546 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10547 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10548
10549 CodeGenFunction MapperCGF(CGM);
10550 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10551 auto PrivatizeAndGenMapInfoCB =
10552 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10553 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10554 MapperCGF.Builder.restoreIP(CodeGenIP);
10555
10556 // Privatize the declared variable of mapper to be the current array
10557 // element.
10558 Address PtrCurrent(
10559 PtrPHI, ElemTy,
10560 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10561 .getAlignment()
10562 .alignmentOfArrayElement(ElementSize));
10564 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10565 (void)Scope.Privatize();
10566
10567 // Get map clause information.
10568 MappableExprsHandler MEHandler(*D, MapperCGF);
10569 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10570
10571 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10572 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10573 };
10574 if (CGM.getCodeGenOpts().getDebugInfo() !=
10575 llvm::codegenoptions::NoDebugInfo) {
10576 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10577 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10578 FillInfoMap);
10579 }
10580
10581 return CombinedInfo;
10582 };
10583
10584 auto CustomMapperCB = [&](unsigned I) {
10585 llvm::Function *MapperFunc = nullptr;
10586 if (CombinedInfo.Mappers[I]) {
10587 // Call the corresponding mapper function.
10589 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10590 assert(MapperFunc && "Expect a valid mapper function is available.");
10591 }
10592 return MapperFunc;
10593 };
10594
10595 SmallString<64> TyStr;
10596 llvm::raw_svector_ostream Out(TyStr);
10597 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10598 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10599
10600 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10601 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10602 UDMMap.try_emplace(D, NewFn);
10603 if (CGF)
10604 FunctionUDMMap[CGF->CurFn].push_back(D);
10605}
10606
10608 const OMPDeclareMapperDecl *D) {
10609 auto I = UDMMap.find(D);
10610 if (I != UDMMap.end())
10611 return I->second;
10613 return UDMMap.lookup(D);
10614}
10615
10618 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10619 const OMPLoopDirective &D)>
10620 SizeEmitter) {
10621 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10622 const OMPExecutableDirective *TD = &D;
10623 // Get nested teams distribute kind directive, if any. For now, treat
10624 // 'target_teams_loop' as if it's really a target_teams_distribute.
10625 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10626 Kind != OMPD_target_teams_loop)
10627 TD = getNestedDistributeDirective(CGM.getContext(), D);
10628 if (!TD)
10629 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10630
10631 const auto *LD = cast<OMPLoopDirective>(TD);
10632 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10633 return NumIterations;
10634 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10635}
10636
10637static void
10638emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10639 const OMPExecutableDirective &D,
10641 bool RequiresOuterTask, const CapturedStmt &CS,
10642 bool OffloadingMandatory, CodeGenFunction &CGF) {
10643 if (OffloadingMandatory) {
10644 CGF.Builder.CreateUnreachable();
10645 } else {
10646 if (RequiresOuterTask) {
10647 CapturedVars.clear();
10648 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10649 }
10650 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10651 CapturedVars.end());
10652 Args.push_back(llvm::Constant::getNullValue(CGF.Builder.getPtrTy()));
10653 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10654 Args);
10655 }
10656}
10657
10658static llvm::Value *emitDeviceID(
10659 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10660 CodeGenFunction &CGF) {
10661 // Emit device ID if any.
10662 llvm::Value *DeviceID;
10663 if (Device.getPointer()) {
10664 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10665 Device.getInt() == OMPC_DEVICE_device_num) &&
10666 "Expected device_num modifier.");
10667 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10668 DeviceID =
10669 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10670 } else {
10671 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10672 }
10673 return DeviceID;
10674}
10675
10676static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10678 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10679 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10680
10681 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10682 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10683 llvm::Value *DynGPVal =
10684 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10685 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10686 /*isSigned=*/false);
10687 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10688 switch (FallbackModifier) {
10689 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10690 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10691 break;
10692 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10693 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10694 break;
10695 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10697 // This is the default for dyn_groupprivate.
10698 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10699 break;
10700 default:
10701 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10702 }
10703 } else if (auto *OMPXDynCGClause =
10704 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10705 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10706 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10707 /*IgnoreResultAssign=*/true);
10708 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10709 /*isSigned=*/false);
10710 }
10711 return {DynGP, DynGPFallback};
10712}
10713
10715 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10716 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10717 llvm::OpenMPIRBuilder &OMPBuilder,
10718 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10719 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10720
10721 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10722 auto RI = CS.getCapturedRecordDecl()->field_begin();
10723 auto *CV = CapturedVars.begin();
10725 CE = CS.capture_end();
10726 CI != CE; ++CI, ++RI, ++CV) {
10727 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10728
10729 // VLA sizes are passed to the outlined region by copy and do not have map
10730 // information associated.
10731 if (CI->capturesVariableArrayType()) {
10732 CurInfo.Exprs.push_back(nullptr);
10733 CurInfo.BasePointers.push_back(*CV);
10734 CurInfo.DevicePtrDecls.push_back(nullptr);
10735 CurInfo.DevicePointers.push_back(
10736 MappableExprsHandler::DeviceInfoTy::None);
10737 CurInfo.Pointers.push_back(*CV);
10738 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10739 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10740 // Copy to the device as an argument. No need to retrieve it.
10741 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10742 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10743 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10744 CurInfo.Mappers.push_back(nullptr);
10745 } else {
10746 const ValueDecl *CapturedVD =
10747 CI->capturesThis() ? nullptr
10749 bool HasEntryWithCVAsAttachPtr = false;
10750 if (CapturedVD)
10751 HasEntryWithCVAsAttachPtr =
10752 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10753
10754 // Populate component lists for the captured variable from clauses.
10755 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10758 StorageForImplicitlyAddedComponentLists;
10759 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10760 CapturedVD, DeclComponentLists,
10761 StorageForImplicitlyAddedComponentLists);
10762
10763 // OpenMP 6.0, 15.8, target construct, restrictions:
10764 // * A list item in a map clause that is specified on a target construct
10765 // must have a base variable or base pointer.
10766 //
10767 // Map clauses on a target construct must either have a base pointer, or a
10768 // base-variable. So, if we don't have a base-pointer, that means that it
10769 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10770 // etc. In such cases, we do not need to handle default map generation
10771 // for `s`.
10772 bool HasEntryWithoutAttachPtr =
10773 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10775 Components = std::get<0>(MapData);
10776 return !MEHandler.getAttachPtrExpr(Components);
10777 });
10778
10779 // Generate default map info first if there's no direct map with CV as
10780 // the base-variable, or attach pointer.
10781 if (DeclComponentLists.empty() ||
10782 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10783 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10784
10785 // If we have any information in the map clause, we use it, otherwise we
10786 // just do a default mapping.
10787 MEHandler.generateInfoForCaptureFromClauseInfo(
10788 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10789 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10790
10791 if (!CI->capturesThis())
10792 MappedVarSet.insert(CI->getCapturedVar());
10793 else
10794 MappedVarSet.insert(nullptr);
10795
10796 // Generate correct mapping for variables captured by reference in
10797 // lambdas.
10798 if (CI->capturesVariable())
10799 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10800 CurInfo, LambdaPointers);
10801 }
10802 // We expect to have at least an element of information for this capture.
10803 assert(!CurInfo.BasePointers.empty() &&
10804 "Non-existing map pointer for capture!");
10805 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10806 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10807 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10808 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10809 "Inconsistent map information sizes!");
10810
10811 // We need to append the results of this capture to what we already have.
10812 CombinedInfo.append(CurInfo);
10813 }
10814 // Adjust MEMBER_OF flags for the lambdas captures.
10815 MEHandler.adjustMemberOfForLambdaCaptures(
10816 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10817 CombinedInfo.Pointers, CombinedInfo.Types);
10818}
10819static void
10820genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10821 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10822 llvm::OpenMPIRBuilder &OMPBuilder,
10823 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10824 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10825
10826 CodeGenModule &CGM = CGF.CGM;
10827 // Map any list items in a map clause that were not captures because they
10828 // weren't referenced within the construct.
10829 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10830
10831 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10832 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10833 };
10834 if (CGM.getCodeGenOpts().getDebugInfo() !=
10835 llvm::codegenoptions::NoDebugInfo) {
10836 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10837 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10838 FillInfoMap);
10839 }
10840}
10841
10843 const CapturedStmt &CS,
10845 llvm::OpenMPIRBuilder &OMPBuilder,
10846 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10847 // Get mappable expression information.
10848 MappableExprsHandler MEHandler(D, CGF);
10849 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10850
10851 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10852 MappedVarSet, CombinedInfo);
10853 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10854}
10855
10856template <typename ClauseTy>
10857static void
10859 const OMPExecutableDirective &D,
10861 const auto *C = D.getSingleClause<ClauseTy>();
10862 assert(!C->varlist_empty() &&
10863 "ompx_bare requires explicit num_teams and thread_limit");
10865 for (auto *E : C->varlist()) {
10866 llvm::Value *V = CGF.EmitScalarExpr(E);
10867 Values.push_back(
10868 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10869 }
10870}
10871
10873 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10874 const OMPExecutableDirective &D,
10875 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10876 const CapturedStmt &CS, bool OffloadingMandatory,
10877 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10878 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10879 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10880 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10881 const OMPLoopDirective &D)>
10882 SizeEmitter,
10883 CodeGenFunction &CGF, CodeGenModule &CGM) {
10884 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10885
10886 // Fill up the arrays with all the captured variables.
10887 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10889 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10890
10891 // Append a null entry for the implicit dyn_ptr argument.
10892 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10893 auto *NullPtr = llvm::Constant::getNullValue(CGF.Builder.getPtrTy());
10894 CombinedInfo.BasePointers.push_back(NullPtr);
10895 CombinedInfo.Pointers.push_back(NullPtr);
10896 CombinedInfo.DevicePointers.push_back(
10897 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10898 CombinedInfo.Sizes.push_back(CGF.Builder.getInt64(0));
10899 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10900 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10901 if (!CombinedInfo.Names.empty())
10902 CombinedInfo.Names.push_back(NullPtr);
10903 CombinedInfo.Exprs.push_back(nullptr);
10904 CombinedInfo.Mappers.push_back(nullptr);
10905 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10906
10907 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10908 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10909
10910 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10911 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10912 CGF.VoidPtrTy, CGM.getPointerAlign());
10913 InputInfo.PointersArray =
10914 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10915 InputInfo.SizesArray =
10916 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10917 InputInfo.MappersArray =
10918 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10919 MapTypesArray = Info.RTArgs.MapTypesArray;
10920 MapNamesArray = Info.RTArgs.MapNamesArray;
10921
10922 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10923 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10924 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10925 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10926 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10927
10928 if (IsReverseOffloading) {
10929 // Reverse offloading is not supported, so just execute on the host.
10930 // FIXME: This fallback solution is incorrect since it ignores the
10931 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10932 // assert here and ensure SEMA emits an error.
10933 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10934 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10935 return;
10936 }
10937
10938 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10939 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10940
10941 llvm::Value *BasePointersArray =
10942 InputInfo.BasePointersArray.emitRawPointer(CGF);
10943 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10944 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10945 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10946
10947 auto &&EmitTargetCallFallbackCB =
10948 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10949 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10950 -> llvm::OpenMPIRBuilder::InsertPointTy {
10951 CGF.Builder.restoreIP(IP);
10952 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10953 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10954 return CGF.Builder.saveIP();
10955 };
10956
10957 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10960 if (IsBare) {
10963 NumThreads);
10964 } else {
10965 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10966 NumThreads.push_back(
10967 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10968 }
10969
10970 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10971 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10972 llvm::Value *NumIterations =
10973 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10974 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10975 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10976 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10977
10978 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10979 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10980 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10981
10982 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10983 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10984 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10985
10986 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10987 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10988 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10989 RTLoc, AllocaIP));
10990 CGF.Builder.restoreIP(AfterIP);
10991 };
10992
10993 if (RequiresOuterTask)
10994 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10995 else
10996 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10997}
10998
10999static void
11000emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
11001 const OMPExecutableDirective &D,
11003 bool RequiresOuterTask, const CapturedStmt &CS,
11004 bool OffloadingMandatory, CodeGenFunction &CGF) {
11005
11006 // Notify that the host version must be executed.
11007 auto &&ElseGen =
11008 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11009 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11010 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11011 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11012 };
11013
11014 if (RequiresOuterTask) {
11016 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
11017 } else {
11018 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
11019 }
11020}
11021
11024 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11025 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11026 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11027 const OMPLoopDirective &D)>
11028 SizeEmitter) {
11029 if (!CGF.HaveInsertPoint())
11030 return;
11031
11032 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11033 CGM.getLangOpts().OpenMPOffloadMandatory;
11034
11035 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11036
11037 const bool RequiresOuterTask =
11038 D.hasClausesOfKind<OMPDependClause>() ||
11039 D.hasClausesOfKind<OMPNowaitClause>() ||
11040 D.hasClausesOfKind<OMPInReductionClause>() ||
11041 (CGM.getLangOpts().OpenMP >= 51 &&
11042 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
11043 D.hasClausesOfKind<OMPThreadLimitClause>());
11045 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
11046 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11047 PrePostActionTy &) {
11048 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
11049 };
11050 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
11051
11053 llvm::Value *MapTypesArray = nullptr;
11054 llvm::Value *MapNamesArray = nullptr;
11055
11056 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11057 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11058 OutlinedFnID, &InputInfo, &MapTypesArray,
11059 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11060 PrePostActionTy &) {
11061 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
11062 RequiresOuterTask, CS, OffloadingMandatory,
11063 Device, OutlinedFnID, InputInfo, MapTypesArray,
11064 MapNamesArray, SizeEmitter, CGF, CGM);
11065 };
11066
11067 auto &&TargetElseGen =
11068 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11069 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11070 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11071 CS, OffloadingMandatory, CGF);
11072 };
11073
11074 // If we have a target function ID it means that we need to support
11075 // offloading, otherwise, just execute on the host. We need to execute on host
11076 // regardless of the conditional in the if clause if, e.g., the user do not
11077 // specify target triples.
11078 if (OutlinedFnID) {
11079 if (IfCond) {
11080 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
11081 } else {
11082 RegionCodeGenTy ThenRCG(TargetThenGen);
11083 ThenRCG(CGF);
11084 }
11085 } else {
11086 RegionCodeGenTy ElseRCG(TargetElseGen);
11087 ElseRCG(CGF);
11088 }
11089}
11090
11092 StringRef ParentName) {
11093 if (!S)
11094 return;
11095
11096 // Register vtable from device for target data and target directives.
11097 // Add this block here since scanForTargetRegionsFunctions ignores
11098 // target data by checking if S is a executable directive (target).
11099 if (auto *E = dyn_cast<OMPExecutableDirective>(S);
11100 E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
11101 // Don't need to check if it's device compile
11102 // since scanForTargetRegionsFunctions currently only called
11103 // in device compilation.
11104 registerVTable(*E);
11105 }
11106
11107 // Codegen OMP target directives that offload compute to the device.
11108 bool RequiresDeviceCodegen =
11111 cast<OMPExecutableDirective>(S)->getDirectiveKind());
11112
11113 if (RequiresDeviceCodegen) {
11114 const auto &E = *cast<OMPExecutableDirective>(S);
11115
11116 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11117 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
11118
11119 // Is this a target region that should not be emitted as an entry point? If
11120 // so just signal we are done with this target region.
11121 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11122 return;
11123
11124 switch (E.getDirectiveKind()) {
11125 case OMPD_target:
11128 break;
11129 case OMPD_target_parallel:
11131 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
11132 break;
11133 case OMPD_target_teams:
11135 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
11136 break;
11137 case OMPD_target_teams_distribute:
11140 break;
11141 case OMPD_target_teams_distribute_simd:
11144 break;
11145 case OMPD_target_parallel_for:
11148 break;
11149 case OMPD_target_parallel_for_simd:
11152 break;
11153 case OMPD_target_simd:
11155 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
11156 break;
11157 case OMPD_target_teams_distribute_parallel_for:
11159 CGM, ParentName,
11161 break;
11162 case OMPD_target_teams_distribute_parallel_for_simd:
11165 CGM, ParentName,
11167 break;
11168 case OMPD_target_teams_loop:
11171 break;
11172 case OMPD_target_parallel_loop:
11175 break;
11176 case OMPD_parallel:
11177 case OMPD_for:
11178 case OMPD_parallel_for:
11179 case OMPD_parallel_master:
11180 case OMPD_parallel_sections:
11181 case OMPD_for_simd:
11182 case OMPD_parallel_for_simd:
11183 case OMPD_cancel:
11184 case OMPD_cancellation_point:
11185 case OMPD_ordered:
11186 case OMPD_threadprivate:
11187 case OMPD_allocate:
11188 case OMPD_task:
11189 case OMPD_simd:
11190 case OMPD_tile:
11191 case OMPD_unroll:
11192 case OMPD_sections:
11193 case OMPD_section:
11194 case OMPD_single:
11195 case OMPD_master:
11196 case OMPD_critical:
11197 case OMPD_taskyield:
11198 case OMPD_barrier:
11199 case OMPD_taskwait:
11200 case OMPD_taskgroup:
11201 case OMPD_atomic:
11202 case OMPD_flush:
11203 case OMPD_depobj:
11204 case OMPD_scan:
11205 case OMPD_teams:
11206 case OMPD_target_data:
11207 case OMPD_target_exit_data:
11208 case OMPD_target_enter_data:
11209 case OMPD_distribute:
11210 case OMPD_distribute_simd:
11211 case OMPD_distribute_parallel_for:
11212 case OMPD_distribute_parallel_for_simd:
11213 case OMPD_teams_distribute:
11214 case OMPD_teams_distribute_simd:
11215 case OMPD_teams_distribute_parallel_for:
11216 case OMPD_teams_distribute_parallel_for_simd:
11217 case OMPD_target_update:
11218 case OMPD_declare_simd:
11219 case OMPD_declare_variant:
11220 case OMPD_begin_declare_variant:
11221 case OMPD_end_declare_variant:
11222 case OMPD_declare_target:
11223 case OMPD_end_declare_target:
11224 case OMPD_declare_reduction:
11225 case OMPD_declare_mapper:
11226 case OMPD_taskloop:
11227 case OMPD_taskloop_simd:
11228 case OMPD_master_taskloop:
11229 case OMPD_master_taskloop_simd:
11230 case OMPD_parallel_master_taskloop:
11231 case OMPD_parallel_master_taskloop_simd:
11232 case OMPD_requires:
11233 case OMPD_metadirective:
11234 case OMPD_unknown:
11235 default:
11236 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11237 }
11238 return;
11239 }
11240
11241 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11242 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11243 return;
11244
11245 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11246 return;
11247 }
11248
11249 // If this is a lambda function, look into its body.
11250 if (const auto *L = dyn_cast<LambdaExpr>(S))
11251 S = L->getBody();
11252
11253 // Keep looking for target regions recursively.
11254 for (const Stmt *II : S->children())
11255 scanForTargetRegionsFunctions(II, ParentName);
11256}
11257
11258static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11259 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11260 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11261 if (!DevTy)
11262 return false;
11263 // Do not emit device_type(nohost) functions for the host.
11264 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11265 return true;
11266 // Do not emit device_type(host) functions for the device.
11267 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11268 return true;
11269 return false;
11270}
11271
11273 // If emitting code for the host, we do not process FD here. Instead we do
11274 // the normal code generation.
11275 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11276 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11278 CGM.getLangOpts().OpenMPIsTargetDevice))
11279 return true;
11280 return false;
11281 }
11282
11283 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11284 // Try to detect target regions in the function.
11285 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11286 StringRef Name = CGM.getMangledName(GD);
11289 CGM.getLangOpts().OpenMPIsTargetDevice))
11290 return true;
11291 }
11292
11293 // Do not to emit function if it is not marked as declare target.
11294 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11295 AlreadyEmittedTargetDecls.count(VD) == 0;
11296}
11297
11300 CGM.getLangOpts().OpenMPIsTargetDevice))
11301 return true;
11302
11303 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11304 return false;
11305
11306 // Check if there are Ctors/Dtors in this declaration and look for target
11307 // regions in it. We use the complete variant to produce the kernel name
11308 // mangling.
11309 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11310 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11311 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11312 StringRef ParentName =
11313 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11314 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11315 }
11316 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11317 StringRef ParentName =
11318 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11319 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11320 }
11321 }
11322
11323 // Do not to emit variable if it is not marked as declare target.
11324 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11325 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11326 cast<VarDecl>(GD.getDecl()));
11327 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11328 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11329 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11330 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11333 return true;
11334 }
11335 return false;
11336}
11337
11339 llvm::Constant *Addr) {
11340 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11341 !CGM.getLangOpts().OpenMPIsTargetDevice)
11342 return;
11343
11344 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11345 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11346
11347 // If this is an 'extern' declaration we defer to the canonical definition and
11348 // do not emit an offloading entry.
11349 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11350 VD->hasExternalStorage())
11351 return;
11352
11353 if (!Res) {
11354 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11355 // Register non-target variables being emitted in device code (debug info
11356 // may cause this).
11357 StringRef VarName = CGM.getMangledName(VD);
11358 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11359 }
11360 return;
11361 }
11362
11363 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11364 auto LinkageForVariable = [&VD, this]() {
11365 return CGM.getLLVMLinkageVarDefinition(VD);
11366 };
11367
11368 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11369 OMPBuilder.registerTargetGlobalVariable(
11371 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11372 VD->isExternallyVisible(),
11374 VD->getCanonicalDecl()->getBeginLoc()),
11375 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11376 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11377 CGM.getTypes().ConvertTypeForMem(
11378 CGM.getContext().getPointerType(VD->getType())),
11379 Addr);
11380
11381 for (auto *ref : GeneratedRefs)
11382 CGM.addCompilerUsedGlobal(ref);
11383}
11384
11386 if (isa<FunctionDecl>(GD.getDecl()) ||
11388 return emitTargetFunctions(GD);
11389
11390 return emitTargetGlobalVariable(GD);
11391}
11392
11394 for (const VarDecl *VD : DeferredGlobalVariables) {
11395 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11396 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11397 if (!Res)
11398 continue;
11399 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11400 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11401 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11403 CGM.EmitGlobal(VD);
11404 } else {
11405 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11406 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11407 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11408 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11410 "Expected link clause or to clause with unified memory.");
11411 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11412 }
11413 }
11414}
11415
11417 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11418 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11419 " Expected target-based directive.");
11420}
11421
11423 for (const OMPClause *Clause : D->clauselists()) {
11424 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11426 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11427 } else if (const auto *AC =
11428 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11429 switch (AC->getAtomicDefaultMemOrderKind()) {
11430 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11431 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11432 break;
11433 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11434 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11435 break;
11436 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11437 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11438 break;
11440 break;
11441 }
11442 }
11443 }
11444}
11445
11446llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11448}
11449
11451 LangAS &AS) {
11452 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11453 return false;
11454 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11455 switch(A->getAllocatorType()) {
11456 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11457 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11458 // Not supported, fallback to the default mem space.
11459 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11460 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11461 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11462 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11463 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11464 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11465 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11466 AS = LangAS::Default;
11467 return true;
11468 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11469 llvm_unreachable("Expected predefined allocator for the variables with the "
11470 "static storage.");
11471 }
11472 return false;
11473}
11474
11478
11480 CodeGenModule &CGM)
11481 : CGM(CGM) {
11482 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11483 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11484 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11485 }
11486}
11487
11489 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11490 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11491}
11492
11494 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11495 return true;
11496
11497 const auto *D = cast<FunctionDecl>(GD.getDecl());
11498 // Do not to emit function if it is marked as declare target as it was already
11499 // emitted.
11500 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11501 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11502 if (auto *F = dyn_cast_or_null<llvm::Function>(
11503 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11504 return !F->isDeclaration();
11505 return false;
11506 }
11507 return true;
11508 }
11509
11510 return !AlreadyEmittedTargetDecls.insert(D).second;
11511}
11512
11514 const OMPExecutableDirective &D,
11515 SourceLocation Loc,
11516 llvm::Function *OutlinedFn,
11517 ArrayRef<llvm::Value *> CapturedVars) {
11518 if (!CGF.HaveInsertPoint())
11519 return;
11520
11521 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11523
11524 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11525 llvm::Value *Args[] = {
11526 RTLoc,
11527 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11528 OutlinedFn};
11530 RealArgs.append(std::begin(Args), std::end(Args));
11531 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11532
11533 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11534 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11535 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11536}
11537
11539 const Expr *NumTeams,
11540 const Expr *ThreadLimit,
11541 SourceLocation Loc) {
11542 if (!CGF.HaveInsertPoint())
11543 return;
11544
11545 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11546
11547 llvm::Value *NumTeamsVal =
11548 NumTeams
11549 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11550 CGF.CGM.Int32Ty, /* isSigned = */ true)
11551 : CGF.Builder.getInt32(0);
11552
11553 llvm::Value *ThreadLimitVal =
11554 ThreadLimit
11555 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11556 CGF.CGM.Int32Ty, /* isSigned = */ true)
11557 : CGF.Builder.getInt32(0);
11558
11559 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11560 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11561 ThreadLimitVal};
11562 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11563 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11564 PushNumTeamsArgs);
11565}
11566
11568 const Expr *ThreadLimit,
11569 SourceLocation Loc) {
11570 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11571 llvm::Value *ThreadLimitVal =
11572 ThreadLimit
11573 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11574 CGF.CGM.Int32Ty, /* isSigned = */ true)
11575 : CGF.Builder.getInt32(0);
11576
11577 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11578 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11579 ThreadLimitVal};
11580 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11581 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11582 ThreadLimitArgs);
11583}
11584
11586 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11587 const Expr *Device, const RegionCodeGenTy &CodeGen,
11589 if (!CGF.HaveInsertPoint())
11590 return;
11591
11592 // Action used to replace the default codegen action and turn privatization
11593 // off.
11594 PrePostActionTy NoPrivAction;
11595
11596 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11597
11598 llvm::Value *IfCondVal = nullptr;
11599 if (IfCond)
11600 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11601
11602 // Emit device ID if any.
11603 llvm::Value *DeviceID = nullptr;
11604 if (Device) {
11605 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11606 CGF.Int64Ty, /*isSigned=*/true);
11607 } else {
11608 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11609 }
11610
11611 // Fill up the arrays with all the mapped variables.
11612 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11613 auto GenMapInfoCB =
11614 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11615 CGF.Builder.restoreIP(CodeGenIP);
11616 // Get map clause information.
11617 MappableExprsHandler MEHandler(D, CGF);
11618 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11619
11620 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11621 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11622 };
11623 if (CGM.getCodeGenOpts().getDebugInfo() !=
11624 llvm::codegenoptions::NoDebugInfo) {
11625 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11626 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11627 FillInfoMap);
11628 }
11629
11630 return CombinedInfo;
11631 };
11632 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11633 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11634 CGF.Builder.restoreIP(CodeGenIP);
11635 switch (BodyGenType) {
11636 case BodyGenTy::Priv:
11637 if (!Info.CaptureDeviceAddrMap.empty())
11638 CodeGen(CGF);
11639 break;
11640 case BodyGenTy::DupNoPriv:
11641 if (!Info.CaptureDeviceAddrMap.empty()) {
11642 CodeGen.setAction(NoPrivAction);
11643 CodeGen(CGF);
11644 }
11645 break;
11646 case BodyGenTy::NoPriv:
11647 if (Info.CaptureDeviceAddrMap.empty()) {
11648 CodeGen.setAction(NoPrivAction);
11649 CodeGen(CGF);
11650 }
11651 break;
11652 }
11653 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11654 CGF.Builder.GetInsertPoint());
11655 };
11656
11657 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11658 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11659 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11660 }
11661 };
11662
11663 auto CustomMapperCB = [&](unsigned int I) {
11664 llvm::Function *MFunc = nullptr;
11665 if (CombinedInfo.Mappers[I]) {
11666 Info.HasMapper = true;
11668 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11669 }
11670 return MFunc;
11671 };
11672
11673 // Source location for the ident struct
11674 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11675
11676 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11677 CGF.AllocaInsertPt->getIterator());
11678 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11679 CGF.Builder.GetInsertPoint());
11680 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11681 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11682 cantFail(OMPBuilder.createTargetData(
11683 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11684 CustomMapperCB,
11685 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11686 CGF.Builder.restoreIP(AfterIP);
11687}
11688
11690 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11691 const Expr *Device) {
11692 if (!CGF.HaveInsertPoint())
11693 return;
11694
11698 "Expecting either target enter, exit data, or update directives.");
11699
11701 llvm::Value *MapTypesArray = nullptr;
11702 llvm::Value *MapNamesArray = nullptr;
11703 // Generate the code for the opening of the data environment.
11704 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11705 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11706 // Emit device ID if any.
11707 llvm::Value *DeviceID = nullptr;
11708 if (Device) {
11709 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11710 CGF.Int64Ty, /*isSigned=*/true);
11711 } else {
11712 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11713 }
11714
11715 // Emit the number of elements in the offloading arrays.
11716 llvm::Constant *PointerNum =
11717 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11718
11719 // Source location for the ident struct
11720 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11721
11722 SmallVector<llvm::Value *, 13> OffloadingArgs(
11723 {RTLoc, DeviceID, PointerNum,
11724 InputInfo.BasePointersArray.emitRawPointer(CGF),
11725 InputInfo.PointersArray.emitRawPointer(CGF),
11726 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11727 InputInfo.MappersArray.emitRawPointer(CGF)});
11728
11729 // Select the right runtime function call for each standalone
11730 // directive.
11731 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11732 RuntimeFunction RTLFn;
11733 switch (D.getDirectiveKind()) {
11734 case OMPD_target_enter_data:
11735 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11736 : OMPRTL___tgt_target_data_begin_mapper;
11737 break;
11738 case OMPD_target_exit_data:
11739 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11740 : OMPRTL___tgt_target_data_end_mapper;
11741 break;
11742 case OMPD_target_update:
11743 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11744 : OMPRTL___tgt_target_data_update_mapper;
11745 break;
11746 case OMPD_parallel:
11747 case OMPD_for:
11748 case OMPD_parallel_for:
11749 case OMPD_parallel_master:
11750 case OMPD_parallel_sections:
11751 case OMPD_for_simd:
11752 case OMPD_parallel_for_simd:
11753 case OMPD_cancel:
11754 case OMPD_cancellation_point:
11755 case OMPD_ordered:
11756 case OMPD_threadprivate:
11757 case OMPD_allocate:
11758 case OMPD_task:
11759 case OMPD_simd:
11760 case OMPD_tile:
11761 case OMPD_unroll:
11762 case OMPD_sections:
11763 case OMPD_section:
11764 case OMPD_single:
11765 case OMPD_master:
11766 case OMPD_critical:
11767 case OMPD_taskyield:
11768 case OMPD_barrier:
11769 case OMPD_taskwait:
11770 case OMPD_taskgroup:
11771 case OMPD_atomic:
11772 case OMPD_flush:
11773 case OMPD_depobj:
11774 case OMPD_scan:
11775 case OMPD_teams:
11776 case OMPD_target_data:
11777 case OMPD_distribute:
11778 case OMPD_distribute_simd:
11779 case OMPD_distribute_parallel_for:
11780 case OMPD_distribute_parallel_for_simd:
11781 case OMPD_teams_distribute:
11782 case OMPD_teams_distribute_simd:
11783 case OMPD_teams_distribute_parallel_for:
11784 case OMPD_teams_distribute_parallel_for_simd:
11785 case OMPD_declare_simd:
11786 case OMPD_declare_variant:
11787 case OMPD_begin_declare_variant:
11788 case OMPD_end_declare_variant:
11789 case OMPD_declare_target:
11790 case OMPD_end_declare_target:
11791 case OMPD_declare_reduction:
11792 case OMPD_declare_mapper:
11793 case OMPD_taskloop:
11794 case OMPD_taskloop_simd:
11795 case OMPD_master_taskloop:
11796 case OMPD_master_taskloop_simd:
11797 case OMPD_parallel_master_taskloop:
11798 case OMPD_parallel_master_taskloop_simd:
11799 case OMPD_target:
11800 case OMPD_target_simd:
11801 case OMPD_target_teams_distribute:
11802 case OMPD_target_teams_distribute_simd:
11803 case OMPD_target_teams_distribute_parallel_for:
11804 case OMPD_target_teams_distribute_parallel_for_simd:
11805 case OMPD_target_teams:
11806 case OMPD_target_parallel:
11807 case OMPD_target_parallel_for:
11808 case OMPD_target_parallel_for_simd:
11809 case OMPD_requires:
11810 case OMPD_metadirective:
11811 case OMPD_unknown:
11812 default:
11813 llvm_unreachable("Unexpected standalone target data directive.");
11814 break;
11815 }
11816 if (HasNowait) {
11817 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11818 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11819 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11820 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11821 }
11822 CGF.EmitRuntimeCall(
11823 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11824 OffloadingArgs);
11825 };
11826
11827 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11828 &MapNamesArray](CodeGenFunction &CGF,
11829 PrePostActionTy &) {
11830 // Fill up the arrays with all the mapped variables.
11831 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11833 MappableExprsHandler MEHandler(D, CGF);
11834 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11835 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11836 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11837
11838 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11839 D.hasClausesOfKind<OMPNowaitClause>();
11840
11841 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11842 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11843 CGF.VoidPtrTy, CGM.getPointerAlign());
11844 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11845 CGM.getPointerAlign());
11846 InputInfo.SizesArray =
11847 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11848 InputInfo.MappersArray =
11849 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11850 MapTypesArray = Info.RTArgs.MapTypesArray;
11851 MapNamesArray = Info.RTArgs.MapNamesArray;
11852 if (RequiresOuterTask)
11853 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11854 else
11855 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11856 };
11857
11858 if (IfCond) {
11859 emitIfClause(CGF, IfCond, TargetThenGen,
11860 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11861 } else {
11862 RegionCodeGenTy ThenRCG(TargetThenGen);
11863 ThenRCG(CGF);
11864 }
11865}
11866
11867static unsigned
11870 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11871 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11872 // of that clause. The VLEN value must be power of 2.
11873 // In other case the notion of the function`s "characteristic data type" (CDT)
11874 // is used to compute the vector length.
11875 // CDT is defined in the following order:
11876 // a) For non-void function, the CDT is the return type.
11877 // b) If the function has any non-uniform, non-linear parameters, then the
11878 // CDT is the type of the first such parameter.
11879 // c) If the CDT determined by a) or b) above is struct, union, or class
11880 // type which is pass-by-value (except for the type that maps to the
11881 // built-in complex data type), the characteristic data type is int.
11882 // d) If none of the above three cases is applicable, the CDT is int.
11883 // The VLEN is then determined based on the CDT and the size of vector
11884 // register of that ISA for which current vector version is generated. The
11885 // VLEN is computed using the formula below:
11886 // VLEN = sizeof(vector_register) / sizeof(CDT),
11887 // where vector register size specified in section 3.2.1 Registers and the
11888 // Stack Frame of original AMD64 ABI document.
11889 QualType RetType = FD->getReturnType();
11890 if (RetType.isNull())
11891 return 0;
11892 ASTContext &C = FD->getASTContext();
11893 QualType CDT;
11894 if (!RetType.isNull() && !RetType->isVoidType()) {
11895 CDT = RetType;
11896 } else {
11897 unsigned Offset = 0;
11898 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11899 if (ParamAttrs[Offset].Kind ==
11900 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11901 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11902 ++Offset;
11903 }
11904 if (CDT.isNull()) {
11905 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11906 if (ParamAttrs[I + Offset].Kind ==
11907 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11908 CDT = FD->getParamDecl(I)->getType();
11909 break;
11910 }
11911 }
11912 }
11913 }
11914 if (CDT.isNull())
11915 CDT = C.IntTy;
11916 CDT = CDT->getCanonicalTypeUnqualified();
11917 if (CDT->isRecordType() || CDT->isUnionType())
11918 CDT = C.IntTy;
11919 return C.getTypeSize(CDT);
11920}
11921
11922// This are the Functions that are needed to mangle the name of the
11923// vector functions generated by the compiler, according to the rules
11924// defined in the "Vector Function ABI specifications for AArch64",
11925// available at
11926// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11927
11928/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11930 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11931 QT = QT.getCanonicalType();
11932
11933 if (QT->isVoidType())
11934 return false;
11935
11936 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11937 return false;
11938
11939 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11940 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11941 return false;
11942
11943 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11944 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11945 !QT->isReferenceType())
11946 return false;
11947
11948 return true;
11949}
11950
11951/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11953 QT = QT.getCanonicalType();
11954 unsigned Size = C.getTypeSize(QT);
11955
11956 // Only scalars and complex within 16 bytes wide set PVB to true.
11957 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11958 return false;
11959
11960 if (QT->isFloatingType())
11961 return true;
11962
11963 if (QT->isIntegerType())
11964 return true;
11965
11966 if (QT->isPointerType())
11967 return true;
11968
11969 // TODO: Add support for complex types (section 3.1.2, item 2).
11970
11971 return false;
11972}
11973
11974/// Computes the lane size (LS) of a return type or of an input parameter,
11975/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11976/// TODO: Add support for references, section 3.2.1, item 1.
11977static unsigned getAArch64LS(QualType QT,
11978 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
11979 ASTContext &C) {
11980 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11982 if (getAArch64PBV(PTy, C))
11983 return C.getTypeSize(PTy);
11984 }
11985 if (getAArch64PBV(QT, C))
11986 return C.getTypeSize(QT);
11987
11988 return C.getTypeSize(C.getUIntPtrType());
11989}
11990
11991// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11992// signature of the scalar function, as defined in 3.2.2 of the
11993// AAVFABI.
11994static std::tuple<unsigned, unsigned, bool>
11997 QualType RetType = FD->getReturnType().getCanonicalType();
11998
11999 ASTContext &C = FD->getASTContext();
12000
12001 bool OutputBecomesInput = false;
12002
12004 if (!RetType->isVoidType()) {
12005 Sizes.push_back(getAArch64LS(
12006 RetType, llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12007 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
12008 OutputBecomesInput = true;
12009 }
12010 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12012 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
12013 }
12014
12015 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12016 // The LS of a function parameter / return value can only be a power
12017 // of 2, starting from 8 bits, up to 128.
12018 assert(llvm::all_of(Sizes,
12019 [](unsigned Size) {
12020 return Size == 8 || Size == 16 || Size == 32 ||
12021 Size == 64 || Size == 128;
12022 }) &&
12023 "Invalid size");
12024
12025 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
12026 OutputBecomesInput);
12027}
12028
12029static llvm::OpenMPIRBuilder::DeclareSimdBranch
12030convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12031 switch (State) {
12032 case OMPDeclareSimdDeclAttr::BS_Undefined:
12033 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12034 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12035 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12036 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12037 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12038 }
12039 llvm_unreachable("unexpected declare simd branch state");
12040}
12041
12042// Check the values provided via `simdlen` by the user.
12044 unsigned UserVLEN, unsigned WDS, char ISA) {
12045 // 1. A `simdlen(1)` doesn't produce vector signatures.
12046 if (UserVLEN == 1) {
12047 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12048 return false;
12049 }
12050
12051 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12052 // SIMD.
12053 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12054 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12055 return false;
12056 }
12057
12058 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12059 if (ISA == 's' && UserVLEN != 0 &&
12060 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12061 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12062 return false;
12063 }
12064
12065 return true;
12066}
12067
12069 llvm::Function *Fn) {
12070 ASTContext &C = CGM.getContext();
12071 FD = FD->getMostRecentDecl();
12072 while (FD) {
12073 // Map params to their positions in function decl.
12074 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12075 if (isa<CXXMethodDecl>(FD))
12076 ParamPositions.try_emplace(FD, 0);
12077 unsigned ParamPos = ParamPositions.size();
12078 for (const ParmVarDecl *P : FD->parameters()) {
12079 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12080 ++ParamPos;
12081 }
12082 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12084 ParamPositions.size());
12085 // Mark uniform parameters.
12086 for (const Expr *E : Attr->uniforms()) {
12087 E = E->IgnoreParenImpCasts();
12088 unsigned Pos;
12089 if (isa<CXXThisExpr>(E)) {
12090 Pos = ParamPositions[FD];
12091 } else {
12092 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12093 ->getCanonicalDecl();
12094 auto It = ParamPositions.find(PVD);
12095 assert(It != ParamPositions.end() && "Function parameter not found");
12096 Pos = It->second;
12097 }
12098 ParamAttrs[Pos].Kind =
12099 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12100 }
12101 // Get alignment info.
12102 auto *NI = Attr->alignments_begin();
12103 for (const Expr *E : Attr->aligneds()) {
12104 E = E->IgnoreParenImpCasts();
12105 unsigned Pos;
12106 QualType ParmTy;
12107 if (isa<CXXThisExpr>(E)) {
12108 Pos = ParamPositions[FD];
12109 ParmTy = E->getType();
12110 } else {
12111 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12112 ->getCanonicalDecl();
12113 auto It = ParamPositions.find(PVD);
12114 assert(It != ParamPositions.end() && "Function parameter not found");
12115 Pos = It->second;
12116 ParmTy = PVD->getType();
12117 }
12118 ParamAttrs[Pos].Alignment =
12119 (*NI)
12120 ? (*NI)->EvaluateKnownConstInt(C)
12121 : llvm::APSInt::getUnsigned(
12122 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12123 .getQuantity());
12124 ++NI;
12125 }
12126 // Mark linear parameters.
12127 auto *SI = Attr->steps_begin();
12128 auto *MI = Attr->modifiers_begin();
12129 for (const Expr *E : Attr->linears()) {
12130 E = E->IgnoreParenImpCasts();
12131 unsigned Pos;
12132 bool IsReferenceType = false;
12133 // Rescaling factor needed to compute the linear parameter
12134 // value in the mangled name.
12135 unsigned PtrRescalingFactor = 1;
12136 if (isa<CXXThisExpr>(E)) {
12137 Pos = ParamPositions[FD];
12138 auto *P = cast<PointerType>(E->getType());
12139 PtrRescalingFactor = CGM.getContext()
12140 .getTypeSizeInChars(P->getPointeeType())
12141 .getQuantity();
12142 } else {
12143 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12144 ->getCanonicalDecl();
12145 auto It = ParamPositions.find(PVD);
12146 assert(It != ParamPositions.end() && "Function parameter not found");
12147 Pos = It->second;
12148 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12149 PtrRescalingFactor = CGM.getContext()
12150 .getTypeSizeInChars(P->getPointeeType())
12151 .getQuantity();
12152 else if (PVD->getType()->isReferenceType()) {
12153 IsReferenceType = true;
12154 PtrRescalingFactor =
12155 CGM.getContext()
12156 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12157 .getQuantity();
12158 }
12159 }
12160 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12161 if (*MI == OMPC_LINEAR_ref)
12162 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12163 else if (*MI == OMPC_LINEAR_uval)
12164 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12165 else if (IsReferenceType)
12166 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12167 else
12168 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12169 // Assuming a stride of 1, for `linear` without modifiers.
12170 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12171 if (*SI) {
12173 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12174 if (const auto *DRE =
12175 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12176 if (const auto *StridePVD =
12177 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12178 ParamAttr.HasVarStride = true;
12179 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12180 assert(It != ParamPositions.end() &&
12181 "Function parameter not found");
12182 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12183 }
12184 }
12185 } else {
12186 ParamAttr.StrideOrArg = Result.Val.getInt();
12187 }
12188 }
12189 // If we are using a linear clause on a pointer, we need to
12190 // rescale the value of linear_step with the byte size of the
12191 // pointee type.
12192 if (!ParamAttr.HasVarStride &&
12193 (ParamAttr.Kind ==
12194 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12195 ParamAttr.Kind ==
12196 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12197 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12198 ++SI;
12199 ++MI;
12200 }
12201 llvm::APSInt VLENVal;
12202 SourceLocation ExprLoc;
12203 const Expr *VLENExpr = Attr->getSimdlen();
12204 if (VLENExpr) {
12205 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12206 ExprLoc = VLENExpr->getExprLoc();
12207 }
12208 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12209 convertDeclareSimdBranch(Attr->getBranchState());
12210 if (CGM.getTriple().isX86()) {
12211 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12212 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12213 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElts, VLENVal, ParamAttrs,
12214 State);
12215 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12216 unsigned VLEN = VLENVal.getExtValue();
12217 // Get basic data for building the vector signature.
12218 const auto Data = getNDSWDS(FD, ParamAttrs);
12219 const unsigned NDS = std::get<0>(Data);
12220 const unsigned WDS = std::get<1>(Data);
12221 const bool OutputBecomesInput = std::get<2>(Data);
12222 if (CGM.getTarget().hasFeature("sve")) {
12223 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 's'))
12224 OMPBuilder.emitAArch64DeclareSimdFunction(
12225 Fn, VLEN, ParamAttrs, State, 's', NDS, OutputBecomesInput);
12226 } else if (CGM.getTarget().hasFeature("neon")) {
12227 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 'n'))
12228 OMPBuilder.emitAArch64DeclareSimdFunction(
12229 Fn, VLEN, ParamAttrs, State, 'n', NDS, OutputBecomesInput);
12230 }
12231 }
12232 }
12233 FD = FD->getPreviousDecl();
12234 }
12235}
12236
12237namespace {
12238/// Cleanup action for doacross support.
12239class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12240public:
12241 static const int DoacrossFinArgs = 2;
12242
12243private:
12244 llvm::FunctionCallee RTLFn;
12245 llvm::Value *Args[DoacrossFinArgs];
12246
12247public:
12248 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12249 ArrayRef<llvm::Value *> CallArgs)
12250 : RTLFn(RTLFn) {
12251 assert(CallArgs.size() == DoacrossFinArgs);
12252 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12253 }
12254 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12255 if (!CGF.HaveInsertPoint())
12256 return;
12257 CGF.EmitRuntimeCall(RTLFn, Args);
12258 }
12259};
12260} // namespace
12261
12263 const OMPLoopDirective &D,
12264 ArrayRef<Expr *> NumIterations) {
12265 if (!CGF.HaveInsertPoint())
12266 return;
12267
12268 ASTContext &C = CGM.getContext();
12269 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12270 RecordDecl *RD;
12271 if (KmpDimTy.isNull()) {
12272 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12273 // kmp_int64 lo; // lower
12274 // kmp_int64 up; // upper
12275 // kmp_int64 st; // stride
12276 // };
12277 RD = C.buildImplicitRecord("kmp_dim");
12278 RD->startDefinition();
12279 addFieldToRecordDecl(C, RD, Int64Ty);
12280 addFieldToRecordDecl(C, RD, Int64Ty);
12281 addFieldToRecordDecl(C, RD, Int64Ty);
12282 RD->completeDefinition();
12283 KmpDimTy = C.getCanonicalTagType(RD);
12284 } else {
12285 RD = KmpDimTy->castAsRecordDecl();
12286 }
12287 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12288 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12290
12291 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12292 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12293 enum { LowerFD = 0, UpperFD, StrideFD };
12294 // Fill dims with data.
12295 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12296 LValue DimsLVal = CGF.MakeAddrLValue(
12297 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12298 // dims.upper = num_iterations;
12299 LValue UpperLVal = CGF.EmitLValueForField(
12300 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12301 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12302 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12303 Int64Ty, NumIterations[I]->getExprLoc());
12304 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12305 // dims.stride = 1;
12306 LValue StrideLVal = CGF.EmitLValueForField(
12307 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12308 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12309 StrideLVal);
12310 }
12311
12312 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12313 // kmp_int32 num_dims, struct kmp_dim * dims);
12314 llvm::Value *Args[] = {
12315 emitUpdateLocation(CGF, D.getBeginLoc()),
12316 getThreadID(CGF, D.getBeginLoc()),
12317 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12319 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12320 CGM.VoidPtrTy)};
12321
12322 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12323 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12324 CGF.EmitRuntimeCall(RTLFn, Args);
12325 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12326 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12327 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12328 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12329 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12330 llvm::ArrayRef(FiniArgs));
12331}
12332
12333template <typename T>
12335 const T *C, llvm::Value *ULoc,
12336 llvm::Value *ThreadID) {
12337 QualType Int64Ty =
12338 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12339 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12341 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12342 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12343 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12344 const Expr *CounterVal = C->getLoopData(I);
12345 assert(CounterVal);
12346 llvm::Value *CntVal = CGF.EmitScalarConversion(
12347 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12348 CounterVal->getExprLoc());
12349 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12350 /*Volatile=*/false, Int64Ty);
12351 }
12352 llvm::Value *Args[] = {
12353 ULoc, ThreadID,
12354 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12355 llvm::FunctionCallee RTLFn;
12356 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12357 OMPDoacrossKind<T> ODK;
12358 if (ODK.isSource(C)) {
12359 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12360 OMPRTL___kmpc_doacross_post);
12361 } else {
12362 assert(ODK.isSink(C) && "Expect sink modifier.");
12363 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12364 OMPRTL___kmpc_doacross_wait);
12365 }
12366 CGF.EmitRuntimeCall(RTLFn, Args);
12367}
12368
12370 const OMPDependClause *C) {
12372 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12373 getThreadID(CGF, C->getBeginLoc()));
12374}
12375
12377 const OMPDoacrossClause *C) {
12379 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12380 getThreadID(CGF, C->getBeginLoc()));
12381}
12382
12384 llvm::FunctionCallee Callee,
12385 ArrayRef<llvm::Value *> Args) const {
12386 assert(Loc.isValid() && "Outlined function call location must be valid.");
12388
12389 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12390 if (Fn->doesNotThrow()) {
12391 CGF.EmitNounwindRuntimeCall(Fn, Args);
12392 return;
12393 }
12394 }
12395 CGF.EmitRuntimeCall(Callee, Args);
12396}
12397
12399 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12400 ArrayRef<llvm::Value *> Args) const {
12401 emitCall(CGF, Loc, OutlinedFn, Args);
12402}
12403
12405 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12406 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12408}
12409
12411 const VarDecl *NativeParam,
12412 const VarDecl *TargetParam) const {
12413 return CGF.GetAddrOfLocalVar(NativeParam);
12414}
12415
12416/// Return allocator value from expression, or return a null allocator (default
12417/// when no allocator specified).
12418static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12419 const Expr *Allocator) {
12420 llvm::Value *AllocVal;
12421 if (Allocator) {
12422 AllocVal = CGF.EmitScalarExpr(Allocator);
12423 // According to the standard, the original allocator type is a enum
12424 // (integer). Convert to pointer type, if required.
12425 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12426 CGF.getContext().VoidPtrTy,
12427 Allocator->getExprLoc());
12428 } else {
12429 // If no allocator specified, it defaults to the null allocator.
12430 AllocVal = llvm::Constant::getNullValue(
12432 }
12433 return AllocVal;
12434}
12435
12436/// Return the alignment from an allocate directive if present.
12437static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12438 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12439
12440 if (!AllocateAlignment)
12441 return nullptr;
12442
12443 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12444}
12445
12447 const VarDecl *VD) {
12448 if (!VD)
12449 return Address::invalid();
12450 Address UntiedAddr = Address::invalid();
12451 Address UntiedRealAddr = Address::invalid();
12452 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12453 if (It != FunctionToUntiedTaskStackMap.end()) {
12454 const UntiedLocalVarsAddressesMap &UntiedData =
12455 UntiedLocalVarsStack[It->second];
12456 auto I = UntiedData.find(VD);
12457 if (I != UntiedData.end()) {
12458 UntiedAddr = I->second.first;
12459 UntiedRealAddr = I->second.second;
12460 }
12461 }
12462 const VarDecl *CVD = VD->getCanonicalDecl();
12463 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12464 // Use the default allocation.
12465 if (!isAllocatableDecl(VD))
12466 return UntiedAddr;
12467 llvm::Value *Size;
12468 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12469 if (CVD->getType()->isVariablyModifiedType()) {
12470 Size = CGF.getTypeSize(CVD->getType());
12471 // Align the size: ((size + align - 1) / align) * align
12472 Size = CGF.Builder.CreateNUWAdd(
12473 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12474 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12475 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12476 } else {
12477 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12478 Size = CGM.getSize(Sz.alignTo(Align));
12479 }
12480 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12481 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12482 const Expr *Allocator = AA->getAllocator();
12483 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12484 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12486 Args.push_back(ThreadID);
12487 if (Alignment)
12488 Args.push_back(Alignment);
12489 Args.push_back(Size);
12490 Args.push_back(AllocVal);
12491 llvm::omp::RuntimeFunction FnID =
12492 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12493 llvm::Value *Addr = CGF.EmitRuntimeCall(
12494 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12495 getName({CVD->getName(), ".void.addr"}));
12496 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12497 CGM.getModule(), OMPRTL___kmpc_free);
12498 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12500 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12501 if (UntiedAddr.isValid())
12502 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12503
12504 // Cleanup action for allocate support.
12505 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12506 llvm::FunctionCallee RTLFn;
12507 SourceLocation::UIntTy LocEncoding;
12508 Address Addr;
12509 const Expr *AllocExpr;
12510
12511 public:
12512 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12513 SourceLocation::UIntTy LocEncoding, Address Addr,
12514 const Expr *AllocExpr)
12515 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12516 AllocExpr(AllocExpr) {}
12517 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12518 if (!CGF.HaveInsertPoint())
12519 return;
12520 llvm::Value *Args[3];
12521 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12522 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12524 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12525 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12526 Args[2] = AllocVal;
12527 CGF.EmitRuntimeCall(RTLFn, Args);
12528 }
12529 };
12530 Address VDAddr =
12531 UntiedRealAddr.isValid()
12532 ? UntiedRealAddr
12533 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12534 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12535 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12536 VDAddr, Allocator);
12537 if (UntiedRealAddr.isValid())
12538 if (auto *Region =
12539 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12540 Region->emitUntiedSwitch(CGF);
12541 return VDAddr;
12542 }
12543 return UntiedAddr;
12544}
12545
12547 const VarDecl *VD) const {
12548 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12549 if (It == FunctionToUntiedTaskStackMap.end())
12550 return false;
12551 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12552}
12553
12555 CodeGenModule &CGM, const OMPLoopDirective &S)
12556 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12557 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12558 if (!NeedToPush)
12559 return;
12561 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12562 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12563 for (const Stmt *Ref : C->private_refs()) {
12564 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12565 const ValueDecl *VD;
12566 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12567 VD = DRE->getDecl();
12568 } else {
12569 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12570 assert((ME->isImplicitCXXThis() ||
12571 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12572 "Expected member of current class.");
12573 VD = ME->getMemberDecl();
12574 }
12575 DS.insert(VD);
12576 }
12577 }
12578}
12579
12581 if (!NeedToPush)
12582 return;
12583 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12584}
12585
12587 CodeGenFunction &CGF,
12588 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12589 std::pair<Address, Address>> &LocalVars)
12590 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12591 if (!NeedToPush)
12592 return;
12593 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12594 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12595 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12596}
12597
12599 if (!NeedToPush)
12600 return;
12601 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12602}
12603
12605 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12606
12607 return llvm::any_of(
12608 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12609 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12610}
12611
12612void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12613 const OMPExecutableDirective &S,
12614 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12615 const {
12616 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12617 // Vars in target/task regions must be excluded completely.
12618 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12619 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12621 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12622 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12623 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12624 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12625 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12626 }
12627 }
12628 // Exclude vars in private clauses.
12629 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12630 for (const Expr *Ref : C->varlist()) {
12631 if (!Ref->getType()->isScalarType())
12632 continue;
12633 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12634 if (!DRE)
12635 continue;
12636 NeedToCheckForLPCs.insert(DRE->getDecl());
12637 }
12638 }
12639 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12640 for (const Expr *Ref : C->varlist()) {
12641 if (!Ref->getType()->isScalarType())
12642 continue;
12643 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12644 if (!DRE)
12645 continue;
12646 NeedToCheckForLPCs.insert(DRE->getDecl());
12647 }
12648 }
12649 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12650 for (const Expr *Ref : C->varlist()) {
12651 if (!Ref->getType()->isScalarType())
12652 continue;
12653 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12654 if (!DRE)
12655 continue;
12656 NeedToCheckForLPCs.insert(DRE->getDecl());
12657 }
12658 }
12659 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12660 for (const Expr *Ref : C->varlist()) {
12661 if (!Ref->getType()->isScalarType())
12662 continue;
12663 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12664 if (!DRE)
12665 continue;
12666 NeedToCheckForLPCs.insert(DRE->getDecl());
12667 }
12668 }
12669 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12670 for (const Expr *Ref : C->varlist()) {
12671 if (!Ref->getType()->isScalarType())
12672 continue;
12673 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12674 if (!DRE)
12675 continue;
12676 NeedToCheckForLPCs.insert(DRE->getDecl());
12677 }
12678 }
12679 for (const Decl *VD : NeedToCheckForLPCs) {
12680 for (const LastprivateConditionalData &Data :
12681 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12682 if (Data.DeclToUniqueName.count(VD) > 0) {
12683 if (!Data.Disabled)
12684 NeedToAddForLPCsAsDisabled.insert(VD);
12685 break;
12686 }
12687 }
12688 }
12689}
12690
12691CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12692 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12693 : CGM(CGF.CGM),
12694 Action((CGM.getLangOpts().OpenMP >= 50 &&
12695 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12696 [](const OMPLastprivateClause *C) {
12697 return C->getKind() ==
12698 OMPC_LASTPRIVATE_conditional;
12699 }))
12700 ? ActionToDo::PushAsLastprivateConditional
12701 : ActionToDo::DoNotPush) {
12702 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12703 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12704 return;
12705 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12706 "Expected a push action.");
12708 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12709 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12710 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12711 continue;
12712
12713 for (const Expr *Ref : C->varlist()) {
12714 Data.DeclToUniqueName.insert(std::make_pair(
12715 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12716 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12717 }
12718 }
12719 Data.IVLVal = IVLVal;
12720 Data.Fn = CGF.CurFn;
12721}
12722
12723CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12725 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12726 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12727 if (CGM.getLangOpts().OpenMP < 50)
12728 return;
12729 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12730 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12731 if (!NeedToAddForLPCsAsDisabled.empty()) {
12732 Action = ActionToDo::DisableLastprivateConditional;
12733 LastprivateConditionalData &Data =
12735 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12736 Data.DeclToUniqueName.try_emplace(VD);
12737 Data.Fn = CGF.CurFn;
12738 Data.Disabled = true;
12739 }
12740}
12741
12742CGOpenMPRuntime::LastprivateConditionalRAII
12744 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12745 return LastprivateConditionalRAII(CGF, S);
12746}
12747
12749 if (CGM.getLangOpts().OpenMP < 50)
12750 return;
12751 if (Action == ActionToDo::DisableLastprivateConditional) {
12752 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12753 "Expected list of disabled private vars.");
12754 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12755 }
12756 if (Action == ActionToDo::PushAsLastprivateConditional) {
12757 assert(
12758 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12759 "Expected list of lastprivate conditional vars.");
12760 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12761 }
12762}
12763
12765 const VarDecl *VD) {
12766 ASTContext &C = CGM.getContext();
12767 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12768 QualType NewType;
12769 const FieldDecl *VDField;
12770 const FieldDecl *FiredField;
12771 LValue BaseLVal;
12772 auto VI = I->getSecond().find(VD);
12773 if (VI == I->getSecond().end()) {
12774 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12775 RD->startDefinition();
12776 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12777 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12778 RD->completeDefinition();
12779 NewType = C.getCanonicalTagType(RD);
12780 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12781 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12782 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12783 } else {
12784 NewType = std::get<0>(VI->getSecond());
12785 VDField = std::get<1>(VI->getSecond());
12786 FiredField = std::get<2>(VI->getSecond());
12787 BaseLVal = std::get<3>(VI->getSecond());
12788 }
12789 LValue FiredLVal =
12790 CGF.EmitLValueForField(BaseLVal, FiredField);
12792 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12793 FiredLVal);
12794 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12795}
12796
12797namespace {
12798/// Checks if the lastprivate conditional variable is referenced in LHS.
12799class LastprivateConditionalRefChecker final
12800 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12802 const Expr *FoundE = nullptr;
12803 const Decl *FoundD = nullptr;
12804 StringRef UniqueDeclName;
12805 LValue IVLVal;
12806 llvm::Function *FoundFn = nullptr;
12807 SourceLocation Loc;
12808
12809public:
12810 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12812 llvm::reverse(LPM)) {
12813 auto It = D.DeclToUniqueName.find(E->getDecl());
12814 if (It == D.DeclToUniqueName.end())
12815 continue;
12816 if (D.Disabled)
12817 return false;
12818 FoundE = E;
12819 FoundD = E->getDecl()->getCanonicalDecl();
12820 UniqueDeclName = It->second;
12821 IVLVal = D.IVLVal;
12822 FoundFn = D.Fn;
12823 break;
12824 }
12825 return FoundE == E;
12826 }
12827 bool VisitMemberExpr(const MemberExpr *E) {
12829 return false;
12831 llvm::reverse(LPM)) {
12832 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12833 if (It == D.DeclToUniqueName.end())
12834 continue;
12835 if (D.Disabled)
12836 return false;
12837 FoundE = E;
12838 FoundD = E->getMemberDecl()->getCanonicalDecl();
12839 UniqueDeclName = It->second;
12840 IVLVal = D.IVLVal;
12841 FoundFn = D.Fn;
12842 break;
12843 }
12844 return FoundE == E;
12845 }
12846 bool VisitStmt(const Stmt *S) {
12847 for (const Stmt *Child : S->children()) {
12848 if (!Child)
12849 continue;
12850 if (const auto *E = dyn_cast<Expr>(Child))
12851 if (!E->isGLValue())
12852 continue;
12853 if (Visit(Child))
12854 return true;
12855 }
12856 return false;
12857 }
12858 explicit LastprivateConditionalRefChecker(
12859 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12860 : LPM(LPM) {}
12861 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12862 getFoundData() const {
12863 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12864 }
12865};
12866} // namespace
12867
12869 LValue IVLVal,
12870 StringRef UniqueDeclName,
12871 LValue LVal,
12872 SourceLocation Loc) {
12873 // Last updated loop counter for the lastprivate conditional var.
12874 // int<xx> last_iv = 0;
12875 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12876 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12877 LLIVTy, getName({UniqueDeclName, "iv"}));
12878 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12879 IVLVal.getAlignment().getAsAlign());
12880 LValue LastIVLVal =
12881 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12882
12883 // Last value of the lastprivate conditional.
12884 // decltype(priv_a) last_a;
12885 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12886 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12887 cast<llvm::GlobalVariable>(Last)->setAlignment(
12888 LVal.getAlignment().getAsAlign());
12889 LValue LastLVal =
12890 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12891
12892 // Global loop counter. Required to handle inner parallel-for regions.
12893 // iv
12894 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12895
12896 // #pragma omp critical(a)
12897 // if (last_iv <= iv) {
12898 // last_iv = iv;
12899 // last_a = priv_a;
12900 // }
12901 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12902 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12903 Action.Enter(CGF);
12904 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12905 // (last_iv <= iv) ? Check if the variable is updated and store new
12906 // value in global var.
12907 llvm::Value *CmpRes;
12908 if (IVLVal.getType()->isSignedIntegerType()) {
12909 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12910 } else {
12911 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12912 "Loop iteration variable must be integer.");
12913 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12914 }
12915 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12916 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12917 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12918 // {
12919 CGF.EmitBlock(ThenBB);
12920
12921 // last_iv = iv;
12922 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12923
12924 // last_a = priv_a;
12925 switch (CGF.getEvaluationKind(LVal.getType())) {
12926 case TEK_Scalar: {
12927 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12928 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12929 break;
12930 }
12931 case TEK_Complex: {
12932 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12933 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12934 break;
12935 }
12936 case TEK_Aggregate:
12937 llvm_unreachable(
12938 "Aggregates are not supported in lastprivate conditional.");
12939 }
12940 // }
12941 CGF.EmitBranch(ExitBB);
12942 // There is no need to emit line number for unconditional branch.
12944 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12945 };
12946
12947 if (CGM.getLangOpts().OpenMPSimd) {
12948 // Do not emit as a critical region as no parallel region could be emitted.
12949 RegionCodeGenTy ThenRCG(CodeGen);
12950 ThenRCG(CGF);
12951 } else {
12952 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12953 }
12954}
12955
12957 const Expr *LHS) {
12958 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12959 return;
12960 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12961 if (!Checker.Visit(LHS))
12962 return;
12963 const Expr *FoundE;
12964 const Decl *FoundD;
12965 StringRef UniqueDeclName;
12966 LValue IVLVal;
12967 llvm::Function *FoundFn;
12968 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12969 Checker.getFoundData();
12970 if (FoundFn != CGF.CurFn) {
12971 // Special codegen for inner parallel regions.
12972 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12973 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12974 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12975 "Lastprivate conditional is not found in outer region.");
12976 QualType StructTy = std::get<0>(It->getSecond());
12977 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12978 LValue PrivLVal = CGF.EmitLValue(FoundE);
12980 PrivLVal.getAddress(),
12981 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12982 CGF.ConvertTypeForMem(StructTy));
12983 LValue BaseLVal =
12984 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12985 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12986 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12987 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12988 FiredLVal, llvm::AtomicOrdering::Unordered,
12989 /*IsVolatile=*/true, /*isInit=*/false);
12990 return;
12991 }
12992
12993 // Private address of the lastprivate conditional in the current context.
12994 // priv_a
12995 LValue LVal = CGF.EmitLValue(FoundE);
12996 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12997 FoundE->getExprLoc());
12998}
12999
13002 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13003 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13004 return;
13005 auto Range = llvm::reverse(LastprivateConditionalStack);
13006 auto It = llvm::find_if(
13007 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13008 if (It == Range.end() || It->Fn != CGF.CurFn)
13009 return;
13010 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13011 assert(LPCI != LastprivateConditionalToTypes.end() &&
13012 "Lastprivates must be registered already.");
13014 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13015 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13016 for (const auto &Pair : It->DeclToUniqueName) {
13017 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13018 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13019 continue;
13020 auto I = LPCI->getSecond().find(Pair.first);
13021 assert(I != LPCI->getSecond().end() &&
13022 "Lastprivate must be rehistered already.");
13023 // bool Cmp = priv_a.Fired != 0;
13024 LValue BaseLVal = std::get<3>(I->getSecond());
13025 LValue FiredLVal =
13026 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13027 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13028 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13029 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13030 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13031 // if (Cmp) {
13032 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13033 CGF.EmitBlock(ThenBB);
13034 Address Addr = CGF.GetAddrOfLocalVar(VD);
13035 LValue LVal;
13036 if (VD->getType()->isReferenceType())
13037 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13039 else
13040 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13042 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13043 D.getBeginLoc());
13045 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13046 // }
13047 }
13048}
13049
13051 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13052 SourceLocation Loc) {
13053 if (CGF.getLangOpts().OpenMP < 50)
13054 return;
13055 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13056 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13057 "Unknown lastprivate conditional variable.");
13058 StringRef UniqueName = It->second;
13059 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13060 // The variable was not updated in the region - exit.
13061 if (!GV)
13062 return;
13063 LValue LPLVal = CGF.MakeRawAddrLValue(
13064 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13065 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13066 CGF.EmitStoreOfScalar(Res, PrivLVal);
13067}
13068
13071 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13072 const RegionCodeGenTy &CodeGen) {
13073 llvm_unreachable("Not supported in SIMD-only mode");
13074}
13075
13078 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13079 const RegionCodeGenTy &CodeGen) {
13080 llvm_unreachable("Not supported in SIMD-only mode");
13081}
13082
13084 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13085 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13086 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13087 bool Tied, unsigned &NumberOfParts) {
13088 llvm_unreachable("Not supported in SIMD-only mode");
13089}
13090
13092 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13093 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13094 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13095 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13096 llvm_unreachable("Not supported in SIMD-only mode");
13097}
13098
13100 CodeGenFunction &CGF, StringRef CriticalName,
13101 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13102 const Expr *Hint) {
13103 llvm_unreachable("Not supported in SIMD-only mode");
13104}
13105
13107 const RegionCodeGenTy &MasterOpGen,
13108 SourceLocation Loc) {
13109 llvm_unreachable("Not supported in SIMD-only mode");
13110}
13111
13113 const RegionCodeGenTy &MasterOpGen,
13114 SourceLocation Loc,
13115 const Expr *Filter) {
13116 llvm_unreachable("Not supported in SIMD-only mode");
13117}
13118
13120 SourceLocation Loc) {
13121 llvm_unreachable("Not supported in SIMD-only mode");
13122}
13123
13125 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13126 SourceLocation Loc) {
13127 llvm_unreachable("Not supported in SIMD-only mode");
13128}
13129
13131 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13132 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13134 ArrayRef<const Expr *> AssignmentOps) {
13135 llvm_unreachable("Not supported in SIMD-only mode");
13136}
13137
13139 const RegionCodeGenTy &OrderedOpGen,
13140 SourceLocation Loc,
13141 bool IsThreads) {
13142 llvm_unreachable("Not supported in SIMD-only mode");
13143}
13144
13146 SourceLocation Loc,
13148 bool EmitChecks,
13149 bool ForceSimpleCall) {
13150 llvm_unreachable("Not supported in SIMD-only mode");
13151}
13152
13155 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13156 bool Ordered, const DispatchRTInput &DispatchValues) {
13157 llvm_unreachable("Not supported in SIMD-only mode");
13158}
13159
13161 SourceLocation Loc) {
13162 llvm_unreachable("Not supported in SIMD-only mode");
13163}
13164
13167 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13168 llvm_unreachable("Not supported in SIMD-only mode");
13169}
13170
13173 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13174 llvm_unreachable("Not supported in SIMD-only mode");
13175}
13176
13178 SourceLocation Loc,
13179 unsigned IVSize,
13180 bool IVSigned) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13185 SourceLocation Loc,
13186 OpenMPDirectiveKind DKind) {
13187 llvm_unreachable("Not supported in SIMD-only mode");
13188}
13189
13191 SourceLocation Loc,
13192 unsigned IVSize, bool IVSigned,
13193 Address IL, Address LB,
13194 Address UB, Address ST) {
13195 llvm_unreachable("Not supported in SIMD-only mode");
13196}
13197
13199 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13201 SourceLocation SeverityLoc, const Expr *Message,
13202 SourceLocation MessageLoc) {
13203 llvm_unreachable("Not supported in SIMD-only mode");
13204}
13205
13207 ProcBindKind ProcBind,
13208 SourceLocation Loc) {
13209 llvm_unreachable("Not supported in SIMD-only mode");
13210}
13211
13213 const VarDecl *VD,
13214 Address VDAddr,
13215 SourceLocation Loc) {
13216 llvm_unreachable("Not supported in SIMD-only mode");
13217}
13218
13220 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13221 CodeGenFunction *CGF) {
13222 llvm_unreachable("Not supported in SIMD-only mode");
13223}
13224
13226 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13232 SourceLocation Loc,
13233 llvm::AtomicOrdering AO) {
13234 llvm_unreachable("Not supported in SIMD-only mode");
13235}
13236
13238 const OMPExecutableDirective &D,
13239 llvm::Function *TaskFunction,
13240 QualType SharedsTy, Address Shareds,
13241 const Expr *IfCond,
13242 const OMPTaskDataTy &Data) {
13243 llvm_unreachable("Not supported in SIMD-only mode");
13244}
13245
13248 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13249 const Expr *IfCond, const OMPTaskDataTy &Data) {
13250 llvm_unreachable("Not supported in SIMD-only mode");
13251}
13252
13256 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13257 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13258 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13259 ReductionOps, Options);
13260}
13261
13264 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13265 llvm_unreachable("Not supported in SIMD-only mode");
13266}
13267
13269 SourceLocation Loc,
13270 bool IsWorksharingReduction) {
13271 llvm_unreachable("Not supported in SIMD-only mode");
13272}
13273
13275 SourceLocation Loc,
13276 ReductionCodeGen &RCG,
13277 unsigned N) {
13278 llvm_unreachable("Not supported in SIMD-only mode");
13279}
13280
13282 SourceLocation Loc,
13283 llvm::Value *ReductionsPtr,
13284 LValue SharedLVal) {
13285 llvm_unreachable("Not supported in SIMD-only mode");
13286}
13287
13289 SourceLocation Loc,
13290 const OMPTaskDataTy &Data) {
13291 llvm_unreachable("Not supported in SIMD-only mode");
13292}
13293
13296 OpenMPDirectiveKind CancelRegion) {
13297 llvm_unreachable("Not supported in SIMD-only mode");
13298}
13299
13301 SourceLocation Loc, const Expr *IfCond,
13302 OpenMPDirectiveKind CancelRegion) {
13303 llvm_unreachable("Not supported in SIMD-only mode");
13304}
13305
13307 const OMPExecutableDirective &D, StringRef ParentName,
13308 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13309 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13310 llvm_unreachable("Not supported in SIMD-only mode");
13311}
13312
13315 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13316 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13317 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13318 const OMPLoopDirective &D)>
13319 SizeEmitter) {
13320 llvm_unreachable("Not supported in SIMD-only mode");
13321}
13322
13324 llvm_unreachable("Not supported in SIMD-only mode");
13325}
13326
13328 llvm_unreachable("Not supported in SIMD-only mode");
13329}
13330
13332 return false;
13333}
13334
13336 const OMPExecutableDirective &D,
13337 SourceLocation Loc,
13338 llvm::Function *OutlinedFn,
13339 ArrayRef<llvm::Value *> CapturedVars) {
13340 llvm_unreachable("Not supported in SIMD-only mode");
13341}
13342
13344 const Expr *NumTeams,
13345 const Expr *ThreadLimit,
13346 SourceLocation Loc) {
13347 llvm_unreachable("Not supported in SIMD-only mode");
13348}
13349
13351 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13352 const Expr *Device, const RegionCodeGenTy &CodeGen,
13354 llvm_unreachable("Not supported in SIMD-only mode");
13355}
13356
13358 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13359 const Expr *Device) {
13360 llvm_unreachable("Not supported in SIMD-only mode");
13361}
13362
13364 const OMPLoopDirective &D,
13365 ArrayRef<Expr *> NumIterations) {
13366 llvm_unreachable("Not supported in SIMD-only mode");
13367}
13368
13370 const OMPDependClause *C) {
13371 llvm_unreachable("Not supported in SIMD-only mode");
13372}
13373
13375 const OMPDoacrossClause *C) {
13376 llvm_unreachable("Not supported in SIMD-only mode");
13377}
13378
13379const VarDecl *
13381 const VarDecl *NativeParam) const {
13382 llvm_unreachable("Not supported in SIMD-only mode");
13383}
13384
13385Address
13387 const VarDecl *NativeParam,
13388 const VarDecl *TargetParam) const {
13389 llvm_unreachable("Not supported in SIMD-only mode");
13390}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool getAArch64MTV(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc, unsigned UserVLEN, unsigned WDS, char ISA)
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static unsigned getAArch64LS(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static llvm::OpenMPIRBuilder::DeclareSimdBranch convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
llvm::json::Array Array
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
This represents clause 'affinity' in the 'pragma omp task'-based directives.
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
ArrayRef< MappableComponent > MappableExprComponentListRef
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents clause 'map' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
SourceManager & getSourceManager()
Definition ASTContext.h:859
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:952
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5384
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3772
Attr - This represents one attribute.
Definition Attr.h:46
Represents a base class of a C++ class.
Definition DeclCXX.h:146
Represents a C++ constructor within a class.
Definition DeclCXX.h:2624
Represents a C++ destructor within a class.
Definition DeclCXX.h:2889
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2275
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2299
Represents a C++ struct/union/class.
Definition DeclCXX.h:258
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1790
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2131
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3951
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3985
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1391
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3991
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3979
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3982
This captures a statement into a function.
Definition Stmt.h:3938
const Capture * const_capture_iterator
Definition Stmt.h:4072
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4089
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4059
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4042
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1517
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4084
capture_range captures()
Definition Stmt.h:4076
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
std::string SampleProfileFile
Name of the profile file to use with -fprofile-sample-use.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:146
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:213
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:251
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:375
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:288
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, const VarDecl *VD)
Register VTable to OpenMP offload entry.
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitAndRegisterVTable(CodeGenModule &CGM, CXXRecordDecl *CXXRecord, const VarDecl *VD)
Emit and register VTable for the C++ class in OpenMP offload entry.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
virtual void registerVTable(const OMPExecutableDirective &D)
Emit code for registering vtable by scanning through map clause in OpenMP target region.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3383
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2416
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::Function * GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, const OMPExecutableDirective &D)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2300
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3392
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2273
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5723
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:176
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:251
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2497
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5240
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:232
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5897
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2217
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2738
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3402
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:302
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1591
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:660
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1649
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5509
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1707
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1822
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:640
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2107
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2969
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1801
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:742
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::GlobalVariable * GetAddrOfVTable(const CXXRecordDecl *RD)
GetAddrOfVTable - Get the address of the VTable for the given record decl.
Definition CGVTables.cpp:41
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:355
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:350
Address getAddress() const
Definition CGValue.h:373
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:358
QualType getType() const
Definition CGValue.h:303
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:347
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1462
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1273
ValueDecl * getDecl()
Definition Expr.h:1341
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:581
bool hasAttrs() const
Definition DeclBase.h:526
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:547
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1100
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:567
SourceLocation getLocation() const
Definition DeclBase.h:447
DeclContext * getDeclContext()
Definition DeclBase.h:456
AttrVec & getAttrs()
Definition DeclBase.h:532
bool hasAttr() const
Definition DeclBase.h:585
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:991
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3117
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:677
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:675
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3090
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3688
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4320
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4056
Represents a member of a struct/union/class.
Definition Decl.h:3175
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3260
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3411
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4702
Represents a function declaration or definition.
Definition Decl.h:2015
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2812
QualType getReturnType() const
Definition Decl.h:2860
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2789
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3749
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3828
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5604
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:975
An lvalue reference type, per C++11 [dcl.ref].
Definition TypeBase.h:3667
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3367
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3450
Expr * getBase() const
Definition Expr.h:3444
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5592
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'threadset' clause in the 'pragma omp task ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1181
Represents a parameter to a function.
Definition Decl.h:1805
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3378
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1178
QualType withRestrict() const
Definition TypeBase.h:1181
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8431
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8471
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8616
QualType getCanonicalType() const
Definition TypeBase.h:8483
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1551
Represents a struct/union/class.
Definition Decl.h:4342
field_iterator field_end() const
Definition Decl.h:4548
field_range fields() const
Definition Decl.h:4545
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5293
bool field_empty() const
Definition Decl.h:4553
field_iterator field_begin() const
Definition Decl.cpp:5277
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:304
StmtClass getStmtClass() const
Definition Stmt.h:1494
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:210
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4908
bool isUnion() const
Definition Decl.h:3943
The base class of the type hierarchy.
Definition TypeBase.h:1866
bool isVoidType() const
Definition TypeBase.h:9034
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9221
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2231
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8767
bool isPointerType() const
Definition TypeBase.h:8668
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:9078
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9328
bool isReferenceType() const
Definition TypeBase.h:8692
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:754
bool isLValueReferenceType() const
Definition TypeBase.h:8696
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2456
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3169
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9214
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2850
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9314
bool isFloatingType() const
Definition Type.cpp:2342
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2285
bool isAnyPointerType() const
Definition TypeBase.h:8676
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9261
bool isRecordType() const
Definition TypeBase.h:8795
bool isUnionType() const
Definition Type.cpp:720
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2270
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2379
const Expr * getInit() const
Definition Decl.h:1383
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1232
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1310
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2388
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1277
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1373
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:4016
Expr * getSizeExpr() const
Definition TypeBase.h:4030
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:830
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
static bool classof(const OMPClause *T)
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5989
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:564
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1761
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
Extra information about a function prototype.
Definition TypeBase.h:5442
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.