clang 23.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Fused distribute+for static schedule (entityId = team*nthreads + tid,
550 /// num_entities = nteams*nthreads). One for_static_init call, no
551 /// surrounding distribute_static_init. Matches
552 /// kmp_sched_distr_static_chunk_sched_static_chunkone in the device RTL
553 /// (openmp/device/include/DeviceTypes.h).
554 OMP_dist_sch_static_chunked_sch_static_chunkone = 93,
555 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
556 /// Set if the monotonic schedule modifier was present.
557 OMP_sch_modifier_monotonic = (1 << 29),
558 /// Set if the nonmonotonic schedule modifier was present.
559 OMP_sch_modifier_nonmonotonic = (1 << 30),
560};
561
562/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
563/// region.
564class CleanupTy final : public EHScopeStack::Cleanup {
565 PrePostActionTy *Action;
566
567public:
568 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
569 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
570 if (!CGF.HaveInsertPoint())
571 return;
572 Action->Exit(CGF);
573 }
574};
575
576} // anonymous namespace
577
580 if (PrePostAction) {
581 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
582 Callback(CodeGen, CGF, *PrePostAction);
583 } else {
584 PrePostActionTy Action;
585 Callback(CodeGen, CGF, Action);
586 }
587}
588
589/// Check if the combiner is a call to UDR combiner and if it is so return the
590/// UDR decl used for reduction.
591static const OMPDeclareReductionDecl *
592getReductionInit(const Expr *ReductionOp) {
593 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
594 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
595 if (const auto *DRE =
596 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
597 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
598 return DRD;
599 return nullptr;
600}
601
603 const OMPDeclareReductionDecl *DRD,
604 const Expr *InitOp,
605 Address Private, Address Original,
606 QualType Ty) {
607 if (DRD->getInitializer()) {
608 std::pair<llvm::Function *, llvm::Function *> Reduction =
610 const auto *CE = cast<CallExpr>(InitOp);
611 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
612 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
613 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
614 const auto *LHSDRE =
615 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
616 const auto *RHSDRE =
617 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
618 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
619 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
620 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
621 (void)PrivateScope.Privatize();
624 CGF.EmitIgnoredExpr(InitOp);
625 } else {
626 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
627 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
628 auto *GV = new llvm::GlobalVariable(
629 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
630 llvm::GlobalValue::PrivateLinkage, Init, Name);
631 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
632 RValue InitRVal;
633 switch (CGF.getEvaluationKind(Ty)) {
634 case TEK_Scalar:
635 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
636 break;
637 case TEK_Complex:
638 InitRVal =
640 break;
641 case TEK_Aggregate: {
642 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
643 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
644 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
645 /*IsInitializer=*/false);
646 return;
647 }
648 }
649 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
650 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
651 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
652 /*IsInitializer=*/false);
653 }
654}
655
656/// Emit initialization of arrays of complex types.
657/// \param DestAddr Address of the array.
658/// \param Type Type of array.
659/// \param Init Initial expression of array.
660/// \param SrcAddr Address of the original array.
662 QualType Type, bool EmitDeclareReductionInit,
663 const Expr *Init,
664 const OMPDeclareReductionDecl *DRD,
665 Address SrcAddr = Address::invalid()) {
666 // Perform element-by-element initialization.
667 QualType ElementTy;
668
669 // Drill down to the base element type on both arrays.
670 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
671 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
672 if (DRD)
673 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
674
675 llvm::Value *SrcBegin = nullptr;
676 if (DRD)
677 SrcBegin = SrcAddr.emitRawPointer(CGF);
678 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
679 // Cast from pointer to array type to pointer to single element.
680 llvm::Value *DestEnd =
681 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
682 // The basic structure here is a while-do loop.
683 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
684 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
685 llvm::Value *IsEmpty =
686 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
687 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
688
689 // Enter the loop body, making that address the current address.
690 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
691 CGF.EmitBlock(BodyBB);
692
693 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
694
695 llvm::PHINode *SrcElementPHI = nullptr;
696 Address SrcElementCurrent = Address::invalid();
697 if (DRD) {
698 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
699 "omp.arraycpy.srcElementPast");
700 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
701 SrcElementCurrent =
702 Address(SrcElementPHI, SrcAddr.getElementType(),
703 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
704 }
705 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
706 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
707 DestElementPHI->addIncoming(DestBegin, EntryBB);
708 Address DestElementCurrent =
709 Address(DestElementPHI, DestAddr.getElementType(),
710 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
711
712 // Emit copy.
713 {
715 if (EmitDeclareReductionInit) {
716 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
717 SrcElementCurrent, ElementTy);
718 } else
719 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
720 /*IsInitializer=*/false);
721 }
722
723 if (DRD) {
724 // Shift the address forward by one element.
725 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
726 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
727 "omp.arraycpy.dest.element");
728 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
729 }
730
731 // Shift the address forward by one element.
732 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
733 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
734 "omp.arraycpy.dest.element");
735 // Check whether we've reached the end.
736 llvm::Value *Done =
737 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
738 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
739 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
740
741 // Done.
742 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
743}
744
745LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
746 return CGF.EmitOMPSharedLValue(E);
747}
748
749LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
750 const Expr *E) {
751 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
752 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
753 return LValue();
754}
755
756void ReductionCodeGen::emitAggregateInitialization(
757 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
758 const OMPDeclareReductionDecl *DRD) {
759 // Emit VarDecl with copy init for arrays.
760 // Get the address of the original variable captured in current
761 // captured region.
762 const auto *PrivateVD =
763 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
764 bool EmitDeclareReductionInit =
765 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
766 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
767 EmitDeclareReductionInit,
768 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
769 : PrivateVD->getInit(),
770 DRD, SharedAddr);
771}
772
776 ArrayRef<const Expr *> ReductionOps) {
777 ClausesData.reserve(Shareds.size());
778 SharedAddresses.reserve(Shareds.size());
779 Sizes.reserve(Shareds.size());
780 BaseDecls.reserve(Shareds.size());
781 const auto *IOrig = Origs.begin();
782 const auto *IPriv = Privates.begin();
783 const auto *IRed = ReductionOps.begin();
784 for (const Expr *Ref : Shareds) {
785 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
786 std::advance(IOrig, 1);
787 std::advance(IPriv, 1);
788 std::advance(IRed, 1);
789 }
790}
791
793 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
794 "Number of generated lvalues must be exactly N.");
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
797 SharedAddresses.emplace_back(First, Second);
798 if (ClausesData[N].Shared == ClausesData[N].Ref) {
799 OrigAddresses.emplace_back(First, Second);
800 } else {
801 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
802 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
803 OrigAddresses.emplace_back(First, Second);
804 }
805}
806
808 QualType PrivateType = getPrivateType(N);
809 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
810 if (!PrivateType->isVariablyModifiedType()) {
811 Sizes.emplace_back(
812 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
813 nullptr);
814 return;
815 }
816 llvm::Value *Size;
817 llvm::Value *SizeInChars;
818 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
819 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
820 if (AsArraySection) {
821 Size = CGF.Builder.CreatePtrDiff(ElemType,
822 OrigAddresses[N].second.getPointer(CGF),
823 OrigAddresses[N].first.getPointer(CGF));
824 Size = CGF.Builder.CreateZExtOrTrunc(Size, ElemSizeOf->getType());
825 Size = CGF.Builder.CreateNUWAdd(
826 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
827 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
828 } else {
829 SizeInChars =
830 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
831 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
832 }
833 Sizes.emplace_back(SizeInChars, Size);
835 CGF,
837 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
838 RValue::get(Size));
839 CGF.EmitVariablyModifiedType(PrivateType);
840}
841
843 llvm::Value *Size) {
844 QualType PrivateType = getPrivateType(N);
845 if (!PrivateType->isVariablyModifiedType()) {
846 assert(!Size && !Sizes[N].second &&
847 "Size should be nullptr for non-variably modified reduction "
848 "items.");
849 return;
850 }
852 CGF,
854 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
855 RValue::get(Size));
856 CGF.EmitVariablyModifiedType(PrivateType);
857}
858
860 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
861 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
862 assert(SharedAddresses.size() > N && "No variable was generated");
863 const auto *PrivateVD =
864 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
865 const OMPDeclareReductionDecl *DRD =
866 getReductionInit(ClausesData[N].ReductionOp);
867 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
868 if (DRD && DRD->getInitializer())
869 (void)DefaultInit(CGF);
870 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
871 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
872 (void)DefaultInit(CGF);
873 QualType SharedType = SharedAddresses[N].first.getType();
874 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
875 PrivateAddr, SharedAddr, SharedType);
876 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
877 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
878 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
879 PrivateVD->getType().getQualifiers(),
880 /*IsInitializer=*/false);
881 }
882}
883
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 return DTorKind != QualType::DK_none;
888}
889
891 Address PrivateAddr) {
892 QualType PrivateType = getPrivateType(N);
893 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
894 if (needCleanups(N)) {
895 PrivateAddr =
896 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
897 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
898 }
899}
900
901static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
902 LValue BaseLV) {
903 BaseTy = BaseTy.getNonReferenceType();
904 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
905 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
906 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
907 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
908 } else {
909 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
910 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
911 }
912 BaseTy = BaseTy->getPointeeType();
913 }
914 return CGF.MakeAddrLValue(
915 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
916 BaseLV.getType(), BaseLV.getBaseInfo(),
917 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
918}
919
921 Address OriginalBaseAddress, llvm::Value *Addr) {
923 Address TopTmp = Address::invalid();
924 Address MostTopTmp = Address::invalid();
925 BaseTy = BaseTy.getNonReferenceType();
926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928 Tmp = CGF.CreateMemTempWithoutCast(BaseTy);
929 if (TopTmp.isValid())
930 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
931 else
932 MostTopTmp = Tmp;
933 TopTmp = Tmp;
934 BaseTy = BaseTy->getPointeeType();
935 }
936
937 if (Tmp.isValid()) {
939 Addr, Tmp.getElementType());
940 CGF.Builder.CreateStore(Addr, Tmp);
941 return MostTopTmp;
942 }
943
945 Addr, OriginalBaseAddress.getType());
946 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
947}
948
949static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
950 const VarDecl *OrigVD = nullptr;
951 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
952 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
953 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
954 Base = TempOASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 OrigVD = cast<VarDecl>(DE->getDecl());
959 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
960 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
961 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
962 Base = TempASE->getBase()->IgnoreParenImpCasts();
964 OrigVD = cast<VarDecl>(DE->getDecl());
965 }
966 return OrigVD;
967}
968
970 Address PrivateAddr) {
971 const DeclRefExpr *DE;
972 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
973 BaseDecls.emplace_back(OrigVD);
974 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
975 LValue BaseLValue =
976 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
977 OriginalBaseLValue);
978 Address SharedAddr = SharedAddresses[N].first.getAddress();
979 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
980 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
981 SharedAddr.emitRawPointer(CGF));
982 llvm::Value *PrivatePointer =
984 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
985 llvm::Value *Ptr = CGF.Builder.CreateGEP(
986 SharedAddr.getElementType(), PrivatePointer, Adjustment);
987 return castToBase(CGF, OrigVD->getType(),
988 SharedAddresses[N].first.getType(),
989 OriginalBaseLValue.getAddress(), Ptr);
990 }
991 BaseDecls.emplace_back(
992 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
993 return PrivateAddr;
994}
995
997 const OMPDeclareReductionDecl *DRD =
998 getReductionInit(ClausesData[N].ReductionOp);
999 return DRD && DRD->getInitializer();
1000}
1001
1002LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1003 return CGF.EmitLoadOfPointerLValue(
1004 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1005 getThreadIDVariable()->getType()->castAs<PointerType>());
1006}
1007
1008void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1009 if (!CGF.HaveInsertPoint())
1010 return;
1011 // 1.2.2 OpenMP Language Terminology
1012 // Structured block - An executable statement with a single entry at the
1013 // top and a single exit at the bottom.
1014 // The point of exit cannot be a branch out of the structured block.
1015 // longjmp() and throw() must not violate the entry/exit criteria.
1016 CGF.EHStack.pushTerminate();
1017 if (S)
1019 CodeGen(CGF);
1020 CGF.EHStack.popTerminate();
1021}
1022
1023LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1024 CodeGenFunction &CGF) {
1025 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1026 getThreadIDVariable()->getType(),
1028}
1029
1031 QualType FieldTy) {
1032 auto *Field = FieldDecl::Create(
1033 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1034 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1035 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1036 Field->setAccess(AS_public);
1037 DC->addDecl(Field);
1038 return Field;
1039}
1040
1042 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1043 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1044 llvm::OpenMPIRBuilderConfig Config(
1045 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1046 CGM.getLangOpts().OpenMPOffloadMandatory,
1047 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1048 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1049 Config.setDefaultTargetAS(
1050 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1051 Config.setRuntimeCC(CGM.getRuntimeCC());
1052
1053 OMPBuilder.setConfig(Config);
1054 OMPBuilder.initialize();
1055 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1056 CGM.getLangOpts().OpenMPIsTargetDevice
1057 ? CGM.getLangOpts().OMPHostIRFile
1058 : StringRef{});
1059
1060 // The user forces the compiler to behave as if omp requires
1061 // unified_shared_memory was given.
1062 if (CGM.getLangOpts().OpenMPForceUSM) {
1064 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1065 }
1066}
1067
1069 InternalVars.clear();
1070 // Clean non-target variable declarations possibly used only in debug info.
1071 for (const auto &Data : EmittedNonTargetVariables) {
1072 if (!Data.getValue().pointsToAliveValue())
1073 continue;
1074 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1075 if (!GV)
1076 continue;
1077 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1078 continue;
1079 GV->eraseFromParent();
1080 }
1081}
1082
1084 return OMPBuilder.createPlatformSpecificName(Parts);
1085}
1086
1087static llvm::Function *
1089 const Expr *CombinerInitializer, const VarDecl *In,
1090 const VarDecl *Out, bool IsCombiner) {
1091 // void .omp_combiner.(Ty *in, Ty *out);
1092 ASTContext &C = CGM.getContext();
1093 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1094 auto *OmpOutParm = ImplicitParamDecl::Create(
1095 C, /*DC=*/nullptr, Out->getLocation(),
1096 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1097 auto *OmpInParm = ImplicitParamDecl::Create(
1098 C, /*DC=*/nullptr, In->getLocation(),
1099 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1100 FunctionArgList Args{OmpOutParm, OmpInParm};
1101 const CGFunctionInfo &FnInfo =
1102 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1103 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1104 std::string Name = CGM.getOpenMPRuntime().getName(
1105 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1106 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1107 Name, &CGM.getModule());
1108 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1109 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1110 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
1111 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1112 Fn->removeFnAttr(llvm::Attribute::NoInline);
1113 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1114 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1115 }
1116 CodeGenFunction CGF(CGM);
1117 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1118 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1119 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1120 Out->getLocation());
1122 Address AddrIn = CGF.GetAddrOfLocalVar(OmpInParm);
1123 Scope.addPrivate(
1124 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1125 .getAddress());
1126 Address AddrOut = CGF.GetAddrOfLocalVar(OmpOutParm);
1127 Scope.addPrivate(
1128 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1129 .getAddress());
1130 (void)Scope.Privatize();
1131 if (!IsCombiner && Out->hasInit() &&
1132 !CGF.isTrivialInitializer(Out->getInit())) {
1133 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1134 Out->getType().getQualifiers(),
1135 /*IsInitializer=*/true);
1136 }
1137 if (CombinerInitializer)
1138 CGF.EmitIgnoredExpr(CombinerInitializer);
1139 Scope.ForceCleanup();
1140 CGF.FinishFunction();
1141 return Fn;
1142}
1143
1146 if (UDRMap.count(D) > 0)
1147 return;
1148 llvm::Function *Combiner = emitCombinerOrInitializer(
1149 CGM, D->getType(), D->getCombiner(),
1152 /*IsCombiner=*/true);
1153 llvm::Function *Initializer = nullptr;
1154 if (const Expr *Init = D->getInitializer()) {
1156 CGM, D->getType(),
1158 : nullptr,
1161 /*IsCombiner=*/false);
1162 }
1163 UDRMap.try_emplace(D, Combiner, Initializer);
1164 if (CGF)
1165 FunctionUDRMap[CGF->CurFn].push_back(D);
1166}
1167
1168std::pair<llvm::Function *, llvm::Function *>
1170 auto I = UDRMap.find(D);
1171 if (I != UDRMap.end())
1172 return I->second;
1173 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1174 return UDRMap.lookup(D);
1175}
1176
1177namespace {
1178// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1179// Builder if one is present.
1180struct PushAndPopStackRAII {
1181 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1182 bool HasCancel, llvm::omp::Directive Kind)
1183 : OMPBuilder(OMPBuilder) {
1184 if (!OMPBuilder)
1185 return;
1186
1187 // The following callback is the crucial part of clangs cleanup process.
1188 //
1189 // NOTE:
1190 // Once the OpenMPIRBuilder is used to create parallel regions (and
1191 // similar), the cancellation destination (Dest below) is determined via
1192 // IP. That means if we have variables to finalize we split the block at IP,
1193 // use the new block (=BB) as destination to build a JumpDest (via
1194 // getJumpDestInCurrentScope(BB)) which then is fed to
1195 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1196 // to push & pop an FinalizationInfo object.
1197 // The FiniCB will still be needed but at the point where the
1198 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1199 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1200 assert(IP.getBlock()->end() == IP.getPoint() &&
1201 "Clang CG should cause non-terminated block!");
1202 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1203 CGF.Builder.restoreIP(IP);
1205 CGF.getOMPCancelDestination(OMPD_parallel);
1206 CGF.EmitBranchThroughCleanup(Dest);
1207 return llvm::Error::success();
1208 };
1209
1210 // TODO: Remove this once we emit parallel regions through the
1211 // OpenMPIRBuilder as it can do this setup internally.
1212 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1213 OMPBuilder->pushFinalizationCB(std::move(FI));
1214 }
1215 ~PushAndPopStackRAII() {
1216 if (OMPBuilder)
1217 OMPBuilder->popFinalizationCB();
1218 }
1219 llvm::OpenMPIRBuilder *OMPBuilder;
1220};
1221} // namespace
1222
1224 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1225 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1226 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1227 assert(ThreadIDVar->getType()->isPointerType() &&
1228 "thread id variable must be of type kmp_int32 *");
1229 CodeGenFunction CGF(CGM, true);
1230 bool HasCancel = false;
1231 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1232 HasCancel = OPD->hasCancel();
1233 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1234 HasCancel = OPD->hasCancel();
1235 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1236 HasCancel = OPSD->hasCancel();
1237 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1238 HasCancel = OPFD->hasCancel();
1239 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1240 HasCancel = OPFD->hasCancel();
1241 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1242 HasCancel = OPFD->hasCancel();
1243 else if (const auto *OPFD =
1244 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1245 HasCancel = OPFD->hasCancel();
1246 else if (const auto *OPFD =
1247 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1248 HasCancel = OPFD->hasCancel();
1249
1250 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1251 // parallel region to make cancellation barriers work properly.
1252 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1253 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1254 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1255 HasCancel, OutlinedHelperName);
1256 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1257 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1258}
1259
1260std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1261 std::string Suffix = getName({"omp_outlined"});
1262 return (Name + Suffix).str();
1263}
1264
1266 return getOutlinedHelperName(CGF.CurFn->getName());
1267}
1268
1269std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1270 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1271 return (Name + Suffix).str();
1272}
1273
1276 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1277 const RegionCodeGenTy &CodeGen) {
1278 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1280 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1281 CodeGen);
1282}
1283
1286 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1287 const RegionCodeGenTy &CodeGen) {
1288 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1290 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1291 CodeGen);
1292}
1293
1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1297 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1298 bool Tied, unsigned &NumberOfParts) {
1299 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1300 PrePostActionTy &) {
1301 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1302 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1303 llvm::Value *TaskArgs[] = {
1304 UpLoc, ThreadID,
1305 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1306 TaskTVar->getType()->castAs<PointerType>())
1307 .getPointer(CGF)};
1308 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1309 CGM.getModule(), OMPRTL___kmpc_omp_task),
1310 TaskArgs);
1311 };
1312 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1313 UntiedCodeGen);
1314 CodeGen.setAction(Action);
1315 assert(!ThreadIDVar->getType()->isPointerType() &&
1316 "thread id variable must be of type kmp_int32 for tasks");
1317 const OpenMPDirectiveKind Region =
1318 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1319 : OMPD_task;
1320 const CapturedStmt *CS = D.getCapturedStmt(Region);
1321 bool HasCancel = false;
1322 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1323 HasCancel = TD->hasCancel();
1324 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1325 HasCancel = TD->hasCancel();
1326 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1327 HasCancel = TD->hasCancel();
1328 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1329 HasCancel = TD->hasCancel();
1330
1331 CodeGenFunction CGF(CGM, true);
1332 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1333 InnermostKind, HasCancel, Action);
1334 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1335 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1336 if (!Tied)
1337 NumberOfParts = Action.getNumberOfParts();
1338 return Res;
1339}
1340
1342 bool AtCurrentPoint) {
1343 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1344 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1345
1346 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1347 if (AtCurrentPoint) {
1348 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1349 CGF.Builder.GetInsertBlock());
1350 } else {
1351 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1352 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1353 }
1354}
1355
1357 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1358 if (Elem.ServiceInsertPt) {
1359 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1360 Elem.ServiceInsertPt = nullptr;
1361 Ptr->eraseFromParent();
1362 }
1363}
1364
1366 SourceLocation Loc,
1367 SmallString<128> &Buffer) {
1368 llvm::raw_svector_ostream OS(Buffer);
1369 // Build debug location
1371 OS << ";";
1372 if (auto *DbgInfo = CGF.getDebugInfo())
1373 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1374 else
1375 OS << PLoc.getFilename();
1376 OS << ";";
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378 OS << FD->getQualifiedNameAsString();
1379 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1380 return OS.str();
1381}
1382
1384 SourceLocation Loc,
1385 unsigned Flags, bool EmitLoc) {
1386 uint32_t SrcLocStrSize;
1387 llvm::Constant *SrcLocStr;
1388 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1389 llvm::codegenoptions::NoDebugInfo) ||
1390 Loc.isInvalid()) {
1391 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1392 } else {
1393 std::string FunctionName;
1394 std::string FileName;
1395 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1396 FunctionName = FD->getQualifiedNameAsString();
1398 if (auto *DbgInfo = CGF.getDebugInfo())
1399 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1400 else
1401 FileName = PLoc.getFilename();
1402 unsigned Line = PLoc.getLine();
1403 unsigned Column = PLoc.getColumn();
1404 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1405 Column, SrcLocStrSize);
1406 }
1407 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1408 return OMPBuilder.getOrCreateIdent(
1409 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1410}
1411
1413 SourceLocation Loc) {
1414 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1415 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1416 // the clang invariants used below might be broken.
1417 if (CGM.getLangOpts().OpenMPIRBuilder) {
1418 SmallString<128> Buffer;
1419 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1420 uint32_t SrcLocStrSize;
1421 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1422 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1423 return OMPBuilder.getOrCreateThreadID(
1424 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1425 }
1426
1427 llvm::Value *ThreadID = nullptr;
1428 // Check whether we've already cached a load of the thread id in this
1429 // function.
1430 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1431 if (I != OpenMPLocThreadIDMap.end()) {
1432 ThreadID = I->second.ThreadID;
1433 if (ThreadID != nullptr)
1434 return ThreadID;
1435 }
1436 // If exceptions are enabled, do not use parameter to avoid possible crash.
1437 if (auto *OMPRegionInfo =
1438 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1439 if (OMPRegionInfo->getThreadIDVariable()) {
1440 // Check if this an outlined function with thread id passed as argument.
1441 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1442 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1443 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1444 !CGF.getLangOpts().CXXExceptions ||
1445 CGF.Builder.GetInsertBlock() == TopBlock ||
1446 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1447 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1448 TopBlock ||
1449 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1450 CGF.Builder.GetInsertBlock()) {
1451 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1452 // If value loaded in entry block, cache it and use it everywhere in
1453 // function.
1454 if (CGF.Builder.GetInsertBlock() == TopBlock)
1455 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1456 return ThreadID;
1457 }
1458 }
1459 }
1460
1461 // This is not an outlined function region - need to call __kmpc_int32
1462 // kmpc_global_thread_num(ident_t *loc).
1463 // Generate thread id value and cache this value for use across the
1464 // function.
1465 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1466 if (!Elem.ServiceInsertPt)
1468 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1469 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1471 llvm::CallInst *Call = CGF.Builder.CreateCall(
1472 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1473 OMPRTL___kmpc_global_thread_num),
1474 emitUpdateLocation(CGF, Loc));
1475 Call->setCallingConv(CGF.getRuntimeCC());
1476 Elem.ThreadID = Call;
1477 return Call;
1478}
1479
1481 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1482 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1484 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1485 }
1486 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1487 for (const auto *D : I->second)
1488 UDRMap.erase(D);
1489 FunctionUDRMap.erase(I);
1490 }
1491 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1492 for (const auto *D : I->second)
1493 UDMMap.erase(D);
1494 FunctionUDMMap.erase(I);
1495 }
1498}
1499
1501 return OMPBuilder.IdentPtr;
1502}
1503
1504static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1506 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1507 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1508 if (!DevTy)
1509 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1510
1511 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1512 case OMPDeclareTargetDeclAttr::DT_Host:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1514 break;
1515 case OMPDeclareTargetDeclAttr::DT_NoHost:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1517 break;
1518 case OMPDeclareTargetDeclAttr::DT_Any:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1520 break;
1521 default:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1523 break;
1524 }
1525}
1526
1527static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1529 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1530 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1531 if (!MapType)
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1536 break;
1537 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1539 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1540 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1541 break;
1542 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1543 // MT_Local variables don't need offload entry (device-local).
1544 llvm_unreachable("MT_Local should not reach convertCaptureClause");
1545 break;
1546 default:
1547 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1548 break;
1549 }
1550}
1551
1552static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1553 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1554 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1555
1556 auto FileInfoCallBack = [&]() {
1558 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1559
1560 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1561 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1562
1563 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1564 };
1565
1566 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1567 *CGM.getFileSystem(), ParentName);
1568}
1569
1571 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1572
1573 auto LinkageForVariable = [&VD, this]() {
1574 return CGM.getLLVMLinkageVarDefinition(VD);
1575 };
1576
1577 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1578
1579 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1580 CGM.getContext().getPointerType(VD->getType()));
1581 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1583 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1584 VD->isExternallyVisible(),
1586 VD->getCanonicalDecl()->getBeginLoc()),
1587 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1588 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1589 LinkageForVariable);
1590
1591 if (!addr)
1592 return ConstantAddress::invalid();
1593 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1594}
1595
1596llvm::Constant *
1598 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1599 !CGM.getContext().getTargetInfo().isTLSSupported());
1600 // Lookup the entry, lazily creating it if necessary.
1601 std::string Suffix = getName({"cache", ""});
1602 return OMPBuilder.getOrCreateInternalVariable(
1603 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1604}
1605
1607 const VarDecl *VD,
1608 Address VDAddr,
1609 SourceLocation Loc) {
1610 if (CGM.getLangOpts().OpenMPUseTLS &&
1611 CGM.getContext().getTargetInfo().isTLSSupported())
1612 return VDAddr;
1613
1614 llvm::Type *VarTy = VDAddr.getElementType();
1615 llvm::Value *Args[] = {
1616 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1617 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1618 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1620 return Address(
1621 CGF.EmitRuntimeCall(
1622 OMPBuilder.getOrCreateRuntimeFunction(
1623 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1624 Args),
1625 CGF.Int8Ty, VDAddr.getAlignment());
1626}
1627
1629 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1630 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1631 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1632 // library.
1633 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1634 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1636 OMPLoc);
1637 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1638 // to register constructor/destructor for variable.
1639 llvm::Value *Args[] = {
1640 OMPLoc,
1641 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1642 Ctor, CopyCtor, Dtor};
1643 CGF.EmitRuntimeCall(
1644 OMPBuilder.getOrCreateRuntimeFunction(
1645 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1646 Args);
1647}
1648
1650 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1651 bool PerformInit, CodeGenFunction *CGF) {
1652 if (CGM.getLangOpts().OpenMPUseTLS &&
1653 CGM.getContext().getTargetInfo().isTLSSupported())
1654 return nullptr;
1655
1656 VD = VD->getDefinition(CGM.getContext());
1657 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1658 QualType ASTTy = VD->getType();
1659
1660 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1661 const Expr *Init = VD->getAnyInitializer();
1662 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1663 // Generate function that re-emits the declaration's initializer into the
1664 // threadprivate copy of the variable VD
1665 CodeGenFunction CtorCGF(CGM);
1666 auto *Dst = ImplicitParamDecl::Create(
1667 CGM.getContext(), /*DC=*/nullptr, Loc,
1668 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamKind::Other);
1669
1670 FunctionArgList Args{Dst};
1671 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1672 CGM.getContext().VoidPtrTy, Args);
1673 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1674 std::string Name = getName({"__kmpc_global_ctor_", ""});
1675 llvm::Function *Fn =
1676 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1677 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1678 Args, Loc, Loc);
1679 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(Dst), /*Volatile=*/false,
1681 CGM.getContext().VoidPtrTy, Dst->getLocation());
1682 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1683 VDAddr.getAlignment());
1684 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1685 /*IsInitializer=*/true);
1686 ArgVal = CtorCGF.EmitLoadOfScalar(
1687 CtorCGF.GetAddrOfLocalVar(Dst), /*Volatile=*/false,
1688 CGM.getContext().VoidPtrTy, Dst->getLocation());
1689 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1690 CtorCGF.FinishFunction();
1691 Ctor = Fn;
1692 }
1694 // Generate function that emits destructor call for the threadprivate copy
1695 // of the variable VD
1696 CodeGenFunction DtorCGF(CGM);
1697 auto *Dst = ImplicitParamDecl::Create(
1698 CGM.getContext(), /*DC=*/nullptr, Loc,
1699 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamKind::Other);
1700
1701 FunctionArgList Args{Dst};
1702 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1703 CGM.getContext().VoidTy, Args);
1704 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1705 std::string Name = getName({"__kmpc_global_dtor_", ""});
1706 llvm::Function *Fn =
1707 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1708 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1709 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1710 Loc, Loc);
1711 // Create a scope with an artificial location for the body of this function.
1712 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1713 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1714 DtorCGF.GetAddrOfLocalVar(Dst),
1715 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst->getLocation());
1716 DtorCGF.emitDestroy(
1717 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1718 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1719 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1720 DtorCGF.FinishFunction();
1721 Dtor = Fn;
1722 }
1723 // Do not emit init function if it is not required.
1724 if (!Ctor && !Dtor)
1725 return nullptr;
1726
1727 // Copying constructor for the threadprivate variable.
1728 // Must be NULL - reserved by runtime, but currently it requires that this
1729 // parameter is always NULL. Otherwise it fires assertion.
1730 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1731 if (Ctor == nullptr) {
1732 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1733 }
1734 if (Dtor == nullptr) {
1735 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1736 }
1737 if (!CGF) {
1738 auto *InitFunctionTy =
1739 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1740 std::string Name = getName({"__omp_threadprivate_init_", ""});
1741 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1742 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1743 CodeGenFunction InitCGF(CGM);
1744 FunctionArgList ArgList;
1745 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1746 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1747 Loc, Loc);
1748 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1749 InitCGF.FinishFunction();
1750 return InitFunction;
1751 }
1752 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 }
1754 return nullptr;
1755}
1756
1758 llvm::GlobalValue *GV) {
1759 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1760 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1761
1762 // We only need to handle active 'indirect' declare target functions.
1763 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1764 return;
1765
1766 // Get a mangled name to store the new device global in.
1767 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1769 SmallString<128> Name;
1770 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1771
1772 // We need to generate a new global to hold the address of the indirectly
1773 // called device function. Doing this allows us to keep the visibility and
1774 // linkage of the associated function unchanged while allowing the runtime to
1775 // access its value.
1776 llvm::GlobalValue *Addr = GV;
1777 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1778 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1779 CGM.getLLVMContext(),
1780 CGM.getModule().getDataLayout().getProgramAddressSpace());
1781 Addr = new llvm::GlobalVariable(
1782 CGM.getModule(), FnPtrTy,
1783 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1784 nullptr, llvm::GlobalValue::NotThreadLocal,
1785 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1786 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1787 }
1788
1789 // Register the indirect Vtable:
1790 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1791 // size field refers to the size of memory pointed to, not the size of
1792 // the pointer symbol itself (which is implicitly the size of a pointer).
1793 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1794 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1795 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1796 llvm::GlobalValue::WeakODRLinkage);
1797}
1798
1799void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1800 const VarDecl *VD) {
1801 // TODO: add logic to avoid duplicate vtable registrations per
1802 // translation unit; though for external linkage, this should no
1803 // longer be an issue - or at least we can avoid the issue by
1804 // checking for an existing offloading entry. But, perhaps the
1805 // better approach is to defer emission of the vtables and offload
1806 // entries until later (by tracking a list of items that need to be
1807 // emitted).
1808
1809 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1810
1811 // Generate a new externally visible global to point to the
1812 // internally visible vtable. Doing this allows us to keep the
1813 // visibility and linkage of the associated vtable unchanged while
1814 // allowing the runtime to access its value. The externally
1815 // visible global var needs to be emitted with a unique mangled
1816 // name that won't conflict with similarly named (internal)
1817 // vtables in other translation units.
1818
1819 // Register vtable with source location of dynamic object in map
1820 // clause.
1821 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1823 VTable->getName());
1824
1825 llvm::GlobalVariable *Addr = VTable;
1826 SmallString<128> AddrName;
1827 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1828 AddrName.append("addr");
1829
1830 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1831 Addr = new llvm::GlobalVariable(
1832 CGM.getModule(), VTable->getType(),
1833 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1834 AddrName,
1835 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1836 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1837 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1838 }
1839 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1840 AddrName, VTable,
1841 CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1842 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1843 llvm::GlobalValue::WeakODRLinkage);
1844}
1845
1848 const VarDecl *VD) {
1849 // Register C++ VTable to OpenMP Offload Entry if it's a new
1850 // CXXRecordDecl.
1851 if (CXXRecord && CXXRecord->isDynamicClass() &&
1852 !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1853 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1854 if (Res.second) {
1855 CGM.EmitVTable(CXXRecord);
1856 CodeGenVTables VTables = CGM.getVTables();
1857 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1858 assert(VTablesAddr && "Expected non-null VTable address");
1859 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1860 // Emit VTable for all the fields containing dynamic CXXRecord
1861 for (const FieldDecl *Field : CXXRecord->fields()) {
1862 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1864 }
1865 // Emit VTable for all dynamic parent class
1866 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1867 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1868 emitAndRegisterVTable(CGM, BaseDecl, VD);
1869 }
1870 }
1871 }
1872}
1873
1875 // Register VTable by scanning through the map clause of OpenMP target region.
1876 // Get CXXRecordDecl and VarDecl from Expr.
1877 auto GetVTableDecl = [](const Expr *E) {
1878 QualType VDTy = E->getType();
1879 CXXRecordDecl *CXXRecord = nullptr;
1880 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1881 VDTy = RefType->getPointeeType();
1882 if (VDTy->isPointerType())
1884 else
1885 CXXRecord = VDTy->getAsCXXRecordDecl();
1886
1887 const VarDecl *VD = nullptr;
1888 if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
1889 VD = cast<VarDecl>(DRE->getDecl());
1890 } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
1891 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
1892 if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
1893 VD = BaseVD;
1894 }
1895 }
1896 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1897 };
1898 // Collect VTable from OpenMP map clause.
1899 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1900 for (const auto *E : C->varlist()) {
1901 auto DeclPair = GetVTableDecl(E);
1902 // Ensure VD is not null
1903 if (DeclPair.second)
1904 emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1905 }
1906 }
1907}
1908
1910 QualType VarType,
1911 StringRef Name) {
1912 std::string Suffix = getName({"artificial", ""});
1913 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1914 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1915 VarLVType, Twine(Name).concat(Suffix).str());
1916 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1917 CGM.getTarget().isTLSSupported()) {
1918 GAddr->setThreadLocal(/*Val=*/true);
1919 return Address(GAddr, GAddr->getValueType(),
1920 CGM.getContext().getTypeAlignInChars(VarType));
1921 }
1922 std::string CacheSuffix = getName({"cache", ""});
1923 llvm::Value *Args[] = {
1926 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1927 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1928 /*isSigned=*/false),
1929 OMPBuilder.getOrCreateInternalVariable(
1930 CGM.VoidPtrPtrTy,
1931 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1932 return Address(
1934 CGF.EmitRuntimeCall(
1935 OMPBuilder.getOrCreateRuntimeFunction(
1936 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1937 Args),
1938 CGF.Builder.getPtrTy(0)),
1939 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1940}
1941
1943 const RegionCodeGenTy &ThenGen,
1944 const RegionCodeGenTy &ElseGen) {
1945 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1946
1947 // If the condition constant folds and can be elided, try to avoid emitting
1948 // the condition and the dead arm of the if/else.
1949 bool CondConstant;
1950 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1951 if (CondConstant)
1952 ThenGen(CGF);
1953 else
1954 ElseGen(CGF);
1955 return;
1956 }
1957
1958 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1959 // emit the conditional branch.
1960 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1961 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1962 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1963 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1964
1965 // Emit the 'then' code.
1966 CGF.EmitBlock(ThenBlock);
1967 ThenGen(CGF);
1968 CGF.EmitBranch(ContBlock);
1969 // Emit the 'else' code if present.
1970 // There is no need to emit line number for unconditional branch.
1972 CGF.EmitBlock(ElseBlock);
1973 ElseGen(CGF);
1974 // There is no need to emit line number for unconditional branch.
1976 CGF.EmitBranch(ContBlock);
1977 // Emit the continuation block for code after the if.
1978 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1979}
1980
1982 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1983 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1984 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1985 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1986 if (!CGF.HaveInsertPoint())
1987 return;
1988 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1989 auto &M = CGM.getModule();
1990 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1991 this](CodeGenFunction &CGF, PrePostActionTy &) {
1992 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1993 llvm::Value *Args[] = {
1994 RTLoc,
1995 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1996 OutlinedFn};
1998 RealArgs.append(std::begin(Args), std::end(Args));
1999 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2000
2001 llvm::FunctionCallee RTLFn =
2002 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2003 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2004 };
2005 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2006 this](CodeGenFunction &CGF, PrePostActionTy &) {
2008 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2009 // Build calls:
2010 // __kmpc_serialized_parallel(&Loc, GTid);
2011 llvm::Value *Args[] = {RTLoc, ThreadID};
2012 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2013 M, OMPRTL___kmpc_serialized_parallel),
2014 Args);
2015
2016 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2017 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2018 RawAddress ZeroAddrBound =
2020 /*Name=*/".bound.zero.addr");
2021 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2023 // ThreadId for serialized parallels is 0.
2024 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
2025 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2026 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2027
2028 // Ensure we do not inline the function. This is trivially true for the ones
2029 // passed to __kmpc_fork_call but the ones called in serialized regions
2030 // could be inlined. This is not a perfect but it is closer to the invariant
2031 // we want, namely, every data environment starts with a new function.
2032 // TODO: We should pass the if condition to the runtime function and do the
2033 // handling there. Much cleaner code.
2034 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2035 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2036 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2037
2038 // __kmpc_end_serialized_parallel(&Loc, GTid);
2039 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2040 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2041 M, OMPRTL___kmpc_end_serialized_parallel),
2042 EndArgs);
2043 };
2044 if (IfCond) {
2045 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2046 } else {
2047 RegionCodeGenTy ThenRCG(ThenGen);
2048 ThenRCG(CGF);
2049 }
2050}
2051
2052// If we're inside an (outlined) parallel region, use the region info's
2053// thread-ID variable (it is passed in a first argument of the outlined function
2054// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2055// regular serial code region, get thread ID by calling kmp_int32
2056// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2057// return the address of that temp.
2059 SourceLocation Loc) {
2060 if (auto *OMPRegionInfo =
2061 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2062 if (OMPRegionInfo->getThreadIDVariable())
2063 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2064
2065 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2066 QualType Int32Ty =
2067 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2068 Address ThreadIDTemp =
2069 CGF.CreateMemTempWithoutCast(Int32Ty, /*Name*/ ".threadid_temp.");
2070 CGF.EmitStoreOfScalar(ThreadID,
2071 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2072
2073 return ThreadIDTemp;
2074}
2075
2076llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2077 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2078 std::string Name = getName({Prefix, "var"});
2079 llvm::GlobalVariable *GV =
2080 OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2081 CGM.setDSOLocal(GV);
2082 return GV;
2083}
2084
2085namespace {
2086/// Common pre(post)-action for different OpenMP constructs.
2087class CommonActionTy final : public PrePostActionTy {
2088 llvm::FunctionCallee EnterCallee;
2089 ArrayRef<llvm::Value *> EnterArgs;
2090 llvm::FunctionCallee ExitCallee;
2091 ArrayRef<llvm::Value *> ExitArgs;
2092 bool Conditional;
2093 llvm::BasicBlock *ContBlock = nullptr;
2094
2095public:
2096 CommonActionTy(llvm::FunctionCallee EnterCallee,
2097 ArrayRef<llvm::Value *> EnterArgs,
2098 llvm::FunctionCallee ExitCallee,
2099 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2100 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2101 ExitArgs(ExitArgs), Conditional(Conditional) {}
2102 void Enter(CodeGenFunction &CGF) override {
2103 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2104 if (Conditional) {
2105 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2106 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2107 ContBlock = CGF.createBasicBlock("omp_if.end");
2108 // Generate the branch (If-stmt)
2109 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2110 CGF.EmitBlock(ThenBlock);
2111 }
2112 }
2113 void Done(CodeGenFunction &CGF) {
2114 // Emit the rest of blocks/branches
2115 CGF.EmitBranch(ContBlock);
2116 CGF.EmitBlock(ContBlock, true);
2117 }
2118 void Exit(CodeGenFunction &CGF) override {
2119 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2120 }
2121};
2122} // anonymous namespace
2123
2125 StringRef CriticalName,
2126 const RegionCodeGenTy &CriticalOpGen,
2127 SourceLocation Loc, const Expr *Hint) {
2128 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2129 // CriticalOpGen();
2130 // __kmpc_end_critical(ident_t *, gtid, Lock);
2131 // Prepare arguments and build a call to __kmpc_critical
2132 if (!CGF.HaveInsertPoint())
2133 return;
2134 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2135 CGM.getModule(),
2136 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2137 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2138 unsigned LockVarArgIdx = 2;
2139 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2140 RuntimeFcn.getFunctionType()
2141 ->getParamType(LockVarArgIdx)
2142 ->getPointerAddressSpace())
2143 LockVar = CGF.Builder.CreateAddrSpaceCast(
2144 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2145 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2146 LockVar};
2147 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2148 std::end(Args));
2149 if (Hint) {
2150 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2151 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2152 }
2153 CommonActionTy Action(RuntimeFcn, EnterArgs,
2154 OMPBuilder.getOrCreateRuntimeFunction(
2155 CGM.getModule(), OMPRTL___kmpc_end_critical),
2156 Args);
2157 CriticalOpGen.setAction(Action);
2158 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2159}
2160
2162 const RegionCodeGenTy &MasterOpGen,
2163 SourceLocation Loc) {
2164 if (!CGF.HaveInsertPoint())
2165 return;
2166 // if(__kmpc_master(ident_t *, gtid)) {
2167 // MasterOpGen();
2168 // __kmpc_end_master(ident_t *, gtid);
2169 // }
2170 // Prepare arguments and build a call to __kmpc_master
2171 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2172 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2173 CGM.getModule(), OMPRTL___kmpc_master),
2174 Args,
2175 OMPBuilder.getOrCreateRuntimeFunction(
2176 CGM.getModule(), OMPRTL___kmpc_end_master),
2177 Args,
2178 /*Conditional=*/true);
2179 MasterOpGen.setAction(Action);
2180 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2181 Action.Done(CGF);
2182}
2183
2185 const RegionCodeGenTy &MaskedOpGen,
2186 SourceLocation Loc, const Expr *Filter) {
2187 if (!CGF.HaveInsertPoint())
2188 return;
2189 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2190 // MaskedOpGen();
2191 // __kmpc_end_masked(iden_t *, gtid);
2192 // }
2193 // Prepare arguments and build a call to __kmpc_masked
2194 llvm::Value *FilterVal = Filter
2195 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2196 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2197 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2198 FilterVal};
2199 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2200 getThreadID(CGF, Loc)};
2201 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2202 CGM.getModule(), OMPRTL___kmpc_masked),
2203 Args,
2204 OMPBuilder.getOrCreateRuntimeFunction(
2205 CGM.getModule(), OMPRTL___kmpc_end_masked),
2206 ArgsEnd,
2207 /*Conditional=*/true);
2208 MaskedOpGen.setAction(Action);
2209 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2210 Action.Done(CGF);
2211}
2212
2214 SourceLocation Loc) {
2215 if (!CGF.HaveInsertPoint())
2216 return;
2217 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2218 OMPBuilder.createTaskyield(CGF.Builder);
2219 } else {
2220 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2221 llvm::Value *Args[] = {
2222 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2223 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2224 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2225 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2226 Args);
2227 }
2228
2229 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2230 Region->emitUntiedSwitch(CGF);
2231}
2232
2234 const RegionCodeGenTy &TaskgroupOpGen,
2235 SourceLocation Loc) {
2236 if (!CGF.HaveInsertPoint())
2237 return;
2238 // __kmpc_taskgroup(ident_t *, gtid);
2239 // TaskgroupOpGen();
2240 // __kmpc_end_taskgroup(ident_t *, gtid);
2241 // Prepare arguments and build a call to __kmpc_taskgroup
2242 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2243 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2244 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2245 Args,
2246 OMPBuilder.getOrCreateRuntimeFunction(
2247 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2248 Args);
2249 TaskgroupOpGen.setAction(Action);
2250 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2251}
2252
2253/// Given an array of pointers to variables, project the address of a
2254/// given variable.
2256 unsigned Index, const VarDecl *Var) {
2257 // Pull out the pointer to the variable.
2258 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2259 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2260
2261 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2262 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2263}
2264
2266 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2267 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2268 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2269 SourceLocation Loc) {
2270 ASTContext &C = CGM.getContext();
2271 // void copy_func(void *LHSArg, void *RHSArg);
2272
2273 auto *LHSArg =
2274 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2275 C.VoidPtrTy, ImplicitParamKind::Other);
2276 auto *RHSArg =
2277 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2278 C.VoidPtrTy, ImplicitParamKind::Other);
2279 FunctionArgList Args{LHSArg, RHSArg};
2280 const auto &CGFI =
2281 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2282 std::string Name =
2283 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2284 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2285 llvm::GlobalValue::InternalLinkage, Name,
2286 &CGM.getModule());
2288 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2289 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
2290 Fn->setDoesNotRecurse();
2291 CodeGenFunction CGF(CGM);
2292 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2293 // Dest = (void*[n])(LHSArg);
2294 // Src = (void*[n])(RHSArg);
2296 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(LHSArg)),
2297 CGF.Builder.getPtrTy(0)),
2298 ArgsElemType, CGF.getPointerAlign());
2300 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(RHSArg)),
2301 CGF.Builder.getPtrTy(0)),
2302 ArgsElemType, CGF.getPointerAlign());
2303 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2304 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2305 // ...
2306 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2307 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2308 const auto *DestVar =
2309 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2310 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2311
2312 const auto *SrcVar =
2313 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2314 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2315
2316 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2317 QualType Type = VD->getType();
2318 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2319 }
2320 CGF.FinishFunction();
2321 return Fn;
2322}
2323
2325 const RegionCodeGenTy &SingleOpGen,
2326 SourceLocation Loc,
2327 ArrayRef<const Expr *> CopyprivateVars,
2328 ArrayRef<const Expr *> SrcExprs,
2329 ArrayRef<const Expr *> DstExprs,
2330 ArrayRef<const Expr *> AssignmentOps) {
2331 if (!CGF.HaveInsertPoint())
2332 return;
2333 assert(CopyprivateVars.size() == SrcExprs.size() &&
2334 CopyprivateVars.size() == DstExprs.size() &&
2335 CopyprivateVars.size() == AssignmentOps.size());
2336 ASTContext &C = CGM.getContext();
2337 // int32 did_it = 0;
2338 // if(__kmpc_single(ident_t *, gtid)) {
2339 // SingleOpGen();
2340 // __kmpc_end_single(ident_t *, gtid);
2341 // did_it = 1;
2342 // }
2343 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2344 // <copy_func>, did_it);
2345
2346 Address DidIt = Address::invalid();
2347 if (!CopyprivateVars.empty()) {
2348 // int32 did_it = 0;
2349 QualType KmpInt32Ty =
2350 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2351 DidIt = CGF.CreateMemTempWithoutCast(KmpInt32Ty, ".omp.copyprivate.did_it");
2352 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2353 }
2354 // Prepare arguments and build a call to __kmpc_single
2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2356 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2357 CGM.getModule(), OMPRTL___kmpc_single),
2358 Args,
2359 OMPBuilder.getOrCreateRuntimeFunction(
2360 CGM.getModule(), OMPRTL___kmpc_end_single),
2361 Args,
2362 /*Conditional=*/true);
2363 SingleOpGen.setAction(Action);
2364 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2365 if (DidIt.isValid()) {
2366 // did_it = 1;
2367 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2368 }
2369 Action.Done(CGF);
2370 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2371 // <copy_func>, did_it);
2372 if (DidIt.isValid()) {
2373 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2374 QualType CopyprivateArrayTy = C.getConstantArrayType(
2375 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2376 /*IndexTypeQuals=*/0);
2377 // Create a list of all private variables for copyprivate.
2378 Address CopyprivateList = CGF.CreateMemTempWithoutCast(
2379 CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2380 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2381 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2382 CGF.Builder.CreateStore(
2384 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2385 CGF.VoidPtrTy),
2386 Elem);
2387 }
2388 // Build function that copies private values from single region to all other
2389 // threads in the corresponding parallel region.
2390 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2391 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2392 SrcExprs, DstExprs, AssignmentOps, Loc);
2393 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2395 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2396 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2397 llvm::Value *Args[] = {
2398 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2399 getThreadID(CGF, Loc), // i32 <gtid>
2400 BufSize, // size_t <buf_size>
2401 CL.emitRawPointer(CGF), // void *<copyprivate list>
2402 CpyFn, // void (*) (void *, void *) <copy_func>
2403 DidItVal // i32 did_it
2404 };
2405 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2406 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2407 Args);
2408 }
2409}
2410
2412 const RegionCodeGenTy &OrderedOpGen,
2413 SourceLocation Loc, bool IsThreads) {
2414 if (!CGF.HaveInsertPoint())
2415 return;
2416 // __kmpc_ordered(ident_t *, gtid);
2417 // OrderedOpGen();
2418 // __kmpc_end_ordered(ident_t *, gtid);
2419 // Prepare arguments and build a call to __kmpc_ordered
2420 if (IsThreads) {
2421 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2422 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2423 CGM.getModule(), OMPRTL___kmpc_ordered),
2424 Args,
2425 OMPBuilder.getOrCreateRuntimeFunction(
2426 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2427 Args);
2428 OrderedOpGen.setAction(Action);
2429 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2430 return;
2431 }
2432 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2433}
2434
2436 unsigned Flags;
2437 if (Kind == OMPD_for)
2438 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2439 else if (Kind == OMPD_sections)
2440 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2441 else if (Kind == OMPD_single)
2442 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2443 else if (Kind == OMPD_barrier)
2444 Flags = OMP_IDENT_BARRIER_EXPL;
2445 else
2446 Flags = OMP_IDENT_BARRIER_IMPL;
2447 return Flags;
2448}
2449
2451 CodeGenFunction &CGF, const OMPLoopDirective &S,
2452 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2453 // Check if the loop directive is actually a doacross loop directive. In this
2454 // case choose static, 1 schedule.
2455 if (llvm::any_of(
2456 S.getClausesOfKind<OMPOrderedClause>(),
2457 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2458 ScheduleKind = OMPC_SCHEDULE_static;
2459 // Chunk size is 1 in this case.
2460 llvm::APInt ChunkSize(32, 1);
2461 ChunkExpr = IntegerLiteral::Create(
2462 CGF.getContext(), ChunkSize,
2463 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2464 SourceLocation());
2465 }
2466}
2467
2469 OpenMPDirectiveKind Kind, bool EmitChecks,
2470 bool ForceSimpleCall) {
2471 // Check if we should use the OMPBuilder
2472 auto *OMPRegionInfo =
2473 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2474 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2475 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2476 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2477 EmitChecks));
2478 CGF.Builder.restoreIP(AfterIP);
2479 return;
2480 }
2481
2482 if (!CGF.HaveInsertPoint())
2483 return;
2484 // Build call __kmpc_cancel_barrier(loc, thread_id);
2485 // Build call __kmpc_barrier(loc, thread_id);
2486 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2487 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2488 // thread_id);
2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2490 getThreadID(CGF, Loc)};
2491 if (OMPRegionInfo) {
2492 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2493 llvm::Value *Result = CGF.EmitRuntimeCall(
2494 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2495 OMPRTL___kmpc_cancel_barrier),
2496 Args);
2497 if (EmitChecks) {
2498 // if (__kmpc_cancel_barrier()) {
2499 // exit from construct;
2500 // }
2501 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2502 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2503 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2504 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2505 CGF.EmitBlock(ExitBB);
2506 // exit from construct;
2507 CodeGenFunction::JumpDest CancelDestination =
2508 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2509 CGF.EmitBranchThroughCleanup(CancelDestination);
2510 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2511 }
2512 return;
2513 }
2514 }
2515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516 CGM.getModule(), OMPRTL___kmpc_barrier),
2517 Args);
2518}
2519
2521 Expr *ME, bool IsFatal) {
2522 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2523 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2524 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2525 // *message)
2526 llvm::Value *Args[] = {
2527 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2528 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2529 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2530 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2531 CGM.getModule(), OMPRTL___kmpc_error),
2532 Args);
2533}
2534
2535/// Map the OpenMP loop schedule to the runtime enumeration.
2536static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2537 bool Chunked, bool Ordered) {
2538 switch (ScheduleKind) {
2539 case OMPC_SCHEDULE_static:
2540 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2541 : (Ordered ? OMP_ord_static : OMP_sch_static);
2542 case OMPC_SCHEDULE_dynamic:
2543 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2544 case OMPC_SCHEDULE_guided:
2545 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2546 case OMPC_SCHEDULE_runtime:
2547 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2548 case OMPC_SCHEDULE_auto:
2549 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2551 assert(!Chunked && "chunk was specified but schedule kind not known");
2552 return Ordered ? OMP_ord_static : OMP_sch_static;
2553 }
2554 llvm_unreachable("Unexpected runtime schedule");
2555}
2556
2557/// Map the OpenMP distribute schedule to the runtime enumeration.
2558static OpenMPSchedType
2560 // only static is allowed for dist_schedule
2561 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2562}
2563
2565 bool Chunked) const {
2566 OpenMPSchedType Schedule =
2567 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2568 return Schedule == OMP_sch_static;
2569}
2570
2572 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2573 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2574 return Schedule == OMP_dist_sch_static;
2575}
2576
2578 bool Chunked) const {
2579 OpenMPSchedType Schedule =
2580 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2581 return Schedule == OMP_sch_static_chunked;
2582}
2583
2585 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2586 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2587 return Schedule == OMP_dist_sch_static_chunked;
2588}
2589
2591 OpenMPSchedType Schedule =
2592 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2593 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2594 return Schedule != OMP_sch_static;
2595}
2596
2597static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2600 int Modifier = 0;
2601 switch (M1) {
2602 case OMPC_SCHEDULE_MODIFIER_monotonic:
2603 Modifier = OMP_sch_modifier_monotonic;
2604 break;
2605 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2606 Modifier = OMP_sch_modifier_nonmonotonic;
2607 break;
2608 case OMPC_SCHEDULE_MODIFIER_simd:
2609 if (Schedule == OMP_sch_static_chunked)
2610 Schedule = OMP_sch_static_balanced_chunked;
2611 break;
2614 break;
2615 }
2616 switch (M2) {
2617 case OMPC_SCHEDULE_MODIFIER_monotonic:
2618 Modifier = OMP_sch_modifier_monotonic;
2619 break;
2620 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2621 Modifier = OMP_sch_modifier_nonmonotonic;
2622 break;
2623 case OMPC_SCHEDULE_MODIFIER_simd:
2624 if (Schedule == OMP_sch_static_chunked)
2625 Schedule = OMP_sch_static_balanced_chunked;
2626 break;
2629 break;
2630 }
2631 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2632 // If the static schedule kind is specified or if the ordered clause is
2633 // specified, and if the nonmonotonic modifier is not specified, the effect is
2634 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2635 // modifier is specified, the effect is as if the nonmonotonic modifier is
2636 // specified.
2637 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2638 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2639 Schedule == OMP_sch_static_balanced_chunked ||
2640 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2641 Schedule == OMP_dist_sch_static_chunked ||
2642 Schedule == OMP_dist_sch_static ||
2643 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone))
2644 Modifier = OMP_sch_modifier_nonmonotonic;
2645 }
2646 return Schedule | Modifier;
2647}
2648
2651 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2652 bool Ordered, const DispatchRTInput &DispatchValues) {
2653 if (!CGF.HaveInsertPoint())
2654 return;
2655 OpenMPSchedType Schedule = getRuntimeSchedule(
2656 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2657 assert(Ordered ||
2658 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2659 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2660 Schedule != OMP_sch_static_balanced_chunked));
2661 // Call __kmpc_dispatch_init(
2662 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2663 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2664 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2665
2666 // If the Chunk was not specified in the clause - use default value 1.
2667 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2668 : CGF.Builder.getIntN(IVSize, 1);
2669 llvm::Value *Args[] = {
2670 emitUpdateLocation(CGF, Loc),
2671 getThreadID(CGF, Loc),
2672 CGF.Builder.getInt32(addMonoNonMonoModifier(
2673 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2674 DispatchValues.LB, // Lower
2675 DispatchValues.UB, // Upper
2676 CGF.Builder.getIntN(IVSize, 1), // Stride
2677 Chunk // Chunk
2678 };
2679 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2680 Args);
2681}
2682
2684 SourceLocation Loc) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2688 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2689 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2690}
2691
2693 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2694 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2696 const CGOpenMPRuntime::StaticRTInput &Values) {
2697 if (!CGF.HaveInsertPoint())
2698 return;
2699
2700 assert(!Values.Ordered);
2701 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2702 Schedule == OMP_sch_static_balanced_chunked ||
2703 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2704 Schedule == OMP_dist_sch_static ||
2705 Schedule == OMP_dist_sch_static_chunked ||
2706 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone);
2707
2708 // Call __kmpc_for_static_init(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2710 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2711 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2712 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2713 llvm::Value *Chunk = Values.Chunk;
2714 if (Chunk == nullptr) {
2715 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2716 Schedule == OMP_dist_sch_static) &&
2717 "expected static non-chunked schedule");
2718 // If the Chunk was not specified in the clause - use default value 1.
2719 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2720 } else {
2721 assert((Schedule == OMP_sch_static_chunked ||
2722 Schedule == OMP_sch_static_balanced_chunked ||
2723 Schedule == OMP_ord_static_chunked ||
2724 Schedule == OMP_dist_sch_static_chunked ||
2725 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone) &&
2726 "expected static chunked schedule");
2727 }
2728 llvm::Value *Args[] = {
2729 UpdateLocation,
2730 ThreadId,
2731 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2732 M2)), // Schedule type
2733 Values.IL.emitRawPointer(CGF), // &isLastIter
2734 Values.LB.emitRawPointer(CGF), // &LB
2735 Values.UB.emitRawPointer(CGF), // &UB
2736 Values.ST.emitRawPointer(CGF), // &Stride
2737 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2738 Chunk // Chunk
2739 };
2740 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2741}
2742
2744 SourceLocation Loc,
2745 OpenMPDirectiveKind DKind,
2746 const OpenMPScheduleTy &ScheduleKind,
2747 const StaticRTInput &Values) {
2748 OpenMPSchedType ScheduleNum =
2749 ScheduleKind.UseFusedDistChunkSchedule
2750 ? OMP_dist_sch_static_chunked_sch_static_chunkone
2751 : getRuntimeSchedule(ScheduleKind.Schedule, Values.Chunk != nullptr,
2752 Values.Ordered);
2753 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2754 "Expected loop-based or sections-based directive.");
2755 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2757 ? OMP_IDENT_WORK_LOOP
2758 : OMP_IDENT_WORK_SECTIONS);
2759 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2760 llvm::FunctionCallee StaticInitFunction =
2761 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2762 false);
2764 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2765 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2766}
2767
2771 const CGOpenMPRuntime::StaticRTInput &Values) {
2772 OpenMPSchedType ScheduleNum =
2773 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2774 llvm::Value *UpdatedLocation =
2775 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2776 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2777 llvm::FunctionCallee StaticInitFunction;
2778 bool isGPUDistribute =
2779 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2780 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2781 Values.IVSize, Values.IVSigned, isGPUDistribute);
2782
2783 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2784 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2786}
2787
2789 SourceLocation Loc,
2790 OpenMPDirectiveKind DKind) {
2791 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2792 DKind == OMPD_sections) &&
2793 "Expected distribute, for, or sections directive kind");
2794 if (!CGF.HaveInsertPoint())
2795 return;
2796 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2797 llvm::Value *Args[] = {
2798 emitUpdateLocation(CGF, Loc,
2800 (DKind == OMPD_target_teams_loop)
2801 ? OMP_IDENT_WORK_DISTRIBUTE
2802 : isOpenMPLoopDirective(DKind)
2803 ? OMP_IDENT_WORK_LOOP
2804 : OMP_IDENT_WORK_SECTIONS),
2805 getThreadID(CGF, Loc)};
2807 if (isOpenMPDistributeDirective(DKind) &&
2808 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2809 CGF.EmitRuntimeCall(
2810 OMPBuilder.getOrCreateRuntimeFunction(
2811 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2812 Args);
2813 else
2814 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2815 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2816 Args);
2817}
2818
2820 SourceLocation Loc,
2821 unsigned IVSize,
2822 bool IVSigned) {
2823 if (!CGF.HaveInsertPoint())
2824 return;
2825 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2826 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2827 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2828 Args);
2829}
2830
2832 SourceLocation Loc, unsigned IVSize,
2833 bool IVSigned, Address IL,
2834 Address LB, Address UB,
2835 Address ST) {
2836 // Call __kmpc_dispatch_next(
2837 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2838 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2839 // kmp_int[32|64] *p_stride);
2840 llvm::Value *Args[] = {
2841 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2842 IL.emitRawPointer(CGF), // &isLastIter
2843 LB.emitRawPointer(CGF), // &Lower
2844 UB.emitRawPointer(CGF), // &Upper
2845 ST.emitRawPointer(CGF) // &Stride
2846 };
2847 llvm::Value *Call = CGF.EmitRuntimeCall(
2848 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2849 return CGF.EmitScalarConversion(
2850 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2851 CGF.getContext().BoolTy, Loc);
2852}
2853
2855 const Expr *Message,
2856 SourceLocation Loc) {
2857 if (!Message)
2858 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2859 return CGF.EmitScalarExpr(Message);
2860}
2861
2862llvm::Value *
2864 SourceLocation Loc) {
2865 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2866 // as if sev-level is fatal."
2867 return llvm::ConstantInt::get(CGM.Int32Ty,
2868 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2869}
2870
2872 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2874 SourceLocation SeverityLoc, const Expr *Message,
2875 SourceLocation MessageLoc) {
2876 if (!CGF.HaveInsertPoint())
2877 return;
2879 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2880 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2881 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2882 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2883 // messsage) if strict modifier is used.
2884 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2885 if (Modifier == OMPC_NUMTHREADS_strict) {
2886 FnID = OMPRTL___kmpc_push_num_threads_strict;
2887 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2888 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2889 }
2890 CGF.EmitRuntimeCall(
2891 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2892}
2893
2895 ProcBindKind ProcBind,
2896 SourceLocation Loc) {
2897 if (!CGF.HaveInsertPoint())
2898 return;
2899 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2900 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2901 llvm::Value *Args[] = {
2902 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2903 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2904 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2905 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2906 Args);
2907}
2908
2910 SourceLocation Loc, llvm::AtomicOrdering AO) {
2911 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2912 OMPBuilder.createFlush(CGF.Builder);
2913 } else {
2914 if (!CGF.HaveInsertPoint())
2915 return;
2916 // Build call void __kmpc_flush(ident_t *loc)
2917 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2918 CGM.getModule(), OMPRTL___kmpc_flush),
2919 emitUpdateLocation(CGF, Loc));
2920 }
2921}
2922
2923namespace {
2924/// Indexes of fields for type kmp_task_t.
2925enum KmpTaskTFields {
2926 /// List of shared variables.
2927 KmpTaskTShareds,
2928 /// Task routine.
2929 KmpTaskTRoutine,
2930 /// Partition id for the untied tasks.
2931 KmpTaskTPartId,
2932 /// Function with call of destructors for private variables.
2933 Data1,
2934 /// Task priority.
2935 Data2,
2936 /// (Taskloops only) Lower bound.
2937 KmpTaskTLowerBound,
2938 /// (Taskloops only) Upper bound.
2939 KmpTaskTUpperBound,
2940 /// (Taskloops only) Stride.
2941 KmpTaskTStride,
2942 /// (Taskloops only) Is last iteration flag.
2943 KmpTaskTLastIter,
2944 /// (Taskloops only) Reduction data.
2945 KmpTaskTReductions,
2946};
2947} // anonymous namespace
2948
2950 // If we are in simd mode or there are no entries, we don't need to do
2951 // anything.
2952 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2953 return;
2954
2955 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2956 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2957 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2958 SourceLocation Loc;
2959 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2960 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2961 E = CGM.getContext().getSourceManager().fileinfo_end();
2962 I != E; ++I) {
2963 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2964 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2965 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2966 I->getFirst(), EntryInfo.Line, 1);
2967 break;
2968 }
2969 }
2970 }
2971 switch (Kind) {
2972 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2973 CGM.getDiags().Report(Loc,
2974 diag::err_target_region_offloading_entry_incorrect)
2975 << EntryInfo.ParentName;
2976 } break;
2977 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2978 CGM.getDiags().Report(
2979 Loc, diag::err_target_var_offloading_entry_incorrect_with_parent)
2980 << EntryInfo.ParentName;
2981 } break;
2982 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2983 CGM.getDiags().Report(diag::err_target_var_offloading_entry_incorrect);
2984 } break;
2985 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2986 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2987 DiagnosticsEngine::Error, "Offloading entry for indirect declare "
2988 "target variable is incorrect: the "
2989 "address is invalid.");
2990 CGM.getDiags().Report(DiagID);
2991 } break;
2992 }
2993 };
2994
2995 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2996}
2997
2999 if (!KmpRoutineEntryPtrTy) {
3000 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3001 ASTContext &C = CGM.getContext();
3002 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3004 KmpRoutineEntryPtrQTy = C.getPointerType(
3005 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3006 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3007 }
3008}
3009
3010namespace {
3011struct PrivateHelpersTy {
3012 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3013 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3014 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3015 PrivateElemInit(PrivateElemInit) {}
3016 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3017 const Expr *OriginalRef = nullptr;
3018 const VarDecl *Original = nullptr;
3019 const VarDecl *PrivateCopy = nullptr;
3020 const VarDecl *PrivateElemInit = nullptr;
3021 bool isLocalPrivate() const {
3022 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3023 }
3024};
3025typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3026} // anonymous namespace
3027
3028static bool isAllocatableDecl(const VarDecl *VD) {
3029 const VarDecl *CVD = VD->getCanonicalDecl();
3030 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3031 return false;
3032 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3033 // Use the default allocation.
3034 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3035 !AA->getAllocator());
3036}
3037
3038static RecordDecl *
3040 if (!Privates.empty()) {
3041 ASTContext &C = CGM.getContext();
3042 // Build struct .kmp_privates_t. {
3043 // /* private vars */
3044 // };
3045 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3046 RD->startDefinition();
3047 for (const auto &Pair : Privates) {
3048 const VarDecl *VD = Pair.second.Original;
3050 // If the private variable is a local variable with lvalue ref type,
3051 // allocate the pointer instead of the pointee type.
3052 if (Pair.second.isLocalPrivate()) {
3053 if (VD->getType()->isLValueReferenceType())
3054 Type = C.getPointerType(Type);
3055 if (isAllocatableDecl(VD))
3056 Type = C.getPointerType(Type);
3057 }
3059 if (VD->hasAttrs()) {
3060 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3061 E(VD->getAttrs().end());
3062 I != E; ++I)
3063 FD->addAttr(*I);
3064 }
3065 }
3066 RD->completeDefinition();
3067 return RD;
3068 }
3069 return nullptr;
3070}
3071
3072static RecordDecl *
3074 QualType KmpInt32Ty,
3075 QualType KmpRoutineEntryPointerQTy) {
3076 ASTContext &C = CGM.getContext();
3077 // Build struct kmp_task_t {
3078 // void * shareds;
3079 // kmp_routine_entry_t routine;
3080 // kmp_int32 part_id;
3081 // kmp_cmplrdata_t data1;
3082 // kmp_cmplrdata_t data2;
3083 // For taskloops additional fields:
3084 // kmp_uint64 lb;
3085 // kmp_uint64 ub;
3086 // kmp_int64 st;
3087 // kmp_int32 liter;
3088 // void * reductions;
3089 // };
3090 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
3091 UD->startDefinition();
3092 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3093 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3094 UD->completeDefinition();
3095 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
3096 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3097 RD->startDefinition();
3098 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3099 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3100 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3101 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3102 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3103 if (isOpenMPTaskLoopDirective(Kind)) {
3104 QualType KmpUInt64Ty =
3105 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3106 QualType KmpInt64Ty =
3107 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3108 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3109 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3110 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3111 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3112 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3113 }
3114 RD->completeDefinition();
3115 return RD;
3116}
3117
3118static RecordDecl *
3121 ASTContext &C = CGM.getContext();
3122 // Build struct kmp_task_t_with_privates {
3123 // kmp_task_t task_data;
3124 // .kmp_privates_t. privates;
3125 // };
3126 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3127 RD->startDefinition();
3128 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3129 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3130 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
3131 RD->completeDefinition();
3132 return RD;
3133}
3134
3135/// Emit a proxy function which accepts kmp_task_t as the second
3136/// argument.
3137/// \code
3138/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3139/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3140/// For taskloops:
3141/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3142/// tt->reductions, tt->shareds);
3143/// return 0;
3144/// }
3145/// \endcode
3146static llvm::Function *
3148 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3149 QualType KmpTaskTWithPrivatesPtrQTy,
3150 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3151 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3152 llvm::Value *TaskPrivatesMap) {
3153 ASTContext &C = CGM.getContext();
3154 auto *GtidArg =
3155 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3156 KmpInt32Ty, ImplicitParamKind::Other);
3157 auto *TaskTypeArg = ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3159 KmpTaskTWithPrivatesPtrQTy.withRestrict(), ImplicitParamKind::Other);
3160 FunctionArgList Args{GtidArg, TaskTypeArg};
3161 const auto &TaskEntryFnInfo =
3162 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3163 llvm::FunctionType *TaskEntryTy =
3164 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3165 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3166 auto *TaskEntry = llvm::Function::Create(
3167 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3168 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3169 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3170 TaskEntry->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3171 TaskEntry->setDoesNotRecurse();
3172 CodeGenFunction CGF(CGM);
3173 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3174 Loc, Loc);
3175
3176 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3177 // tt,
3178 // For taskloops:
3179 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3180 // tt->task_data.shareds);
3181 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3182 CGF.GetAddrOfLocalVar(GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3183 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3184 CGF.GetAddrOfLocalVar(TaskTypeArg),
3185 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3186 const auto *KmpTaskTWithPrivatesQTyRD =
3187 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3188 LValue Base =
3189 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3190 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3191 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3192 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3193 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3194
3195 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3196 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3197 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3198 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3199 CGF.ConvertTypeForMem(SharedsPtrTy));
3200
3201 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3202 llvm::Value *PrivatesParam;
3203 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3204 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3205 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3206 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3207 } else {
3208 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3209 }
3210
3211 llvm::Value *CommonArgs[] = {
3212 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3213 CGF.Builder
3214 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3215 CGF.VoidPtrTy, CGF.Int8Ty)
3216 .emitRawPointer(CGF)};
3217 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3218 std::end(CommonArgs));
3219 if (isOpenMPTaskLoopDirective(Kind)) {
3220 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3221 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3222 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3223 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3224 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3225 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3226 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3227 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3228 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3229 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3230 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3231 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3232 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3233 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3234 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3235 CallArgs.push_back(LBParam);
3236 CallArgs.push_back(UBParam);
3237 CallArgs.push_back(StParam);
3238 CallArgs.push_back(LIParam);
3239 CallArgs.push_back(RParam);
3240 }
3241 CallArgs.push_back(SharedsParam);
3242
3243 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3244 CallArgs);
3245 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3246 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3247 CGF.FinishFunction();
3248 return TaskEntry;
3249}
3250
3252 SourceLocation Loc,
3253 QualType KmpInt32Ty,
3254 QualType KmpTaskTWithPrivatesPtrQTy,
3255 QualType KmpTaskTWithPrivatesQTy) {
3256 ASTContext &C = CGM.getContext();
3257 auto *GtidArg =
3258 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3259 KmpInt32Ty, ImplicitParamKind::Other);
3260 auto *TaskTypeArg = ImplicitParamDecl::Create(
3261 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3262 KmpTaskTWithPrivatesPtrQTy.withRestrict(), ImplicitParamKind::Other);
3263 FunctionArgList Args{GtidArg, TaskTypeArg};
3264 const auto &DestructorFnInfo =
3265 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3266 llvm::FunctionType *DestructorFnTy =
3267 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3268 std::string Name =
3269 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3270 auto *DestructorFn =
3271 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3272 Name, &CGM.getModule());
3273 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3274 DestructorFnInfo);
3275 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3276 DestructorFn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3277 DestructorFn->setDoesNotRecurse();
3278 CodeGenFunction CGF(CGM);
3279 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3280 Args, Loc, Loc);
3281
3282 LValue Base = CGF.EmitLoadOfPointerLValue(
3283 CGF.GetAddrOfLocalVar(TaskTypeArg),
3284 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3285 const auto *KmpTaskTWithPrivatesQTyRD =
3286 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3287 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3288 Base = CGF.EmitLValueForField(Base, *FI);
3289 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3290 if (QualType::DestructionKind DtorKind =
3291 Field->getType().isDestructedType()) {
3292 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3293 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3294 }
3295 }
3296 CGF.FinishFunction();
3297 return DestructorFn;
3298}
3299
3300/// Emit a privates mapping function for correct handling of private and
3301/// firstprivate variables.
3302/// \code
3303/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3304/// **noalias priv1,..., <tyn> **noalias privn) {
3305/// *priv1 = &.privates.priv1;
3306/// ...;
3307/// *privn = &.privates.privn;
3308/// }
3309/// \endcode
3310static llvm::Value *
3312 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3314 ASTContext &C = CGM.getContext();
3315 FunctionArgList Args;
3316 auto *TaskPrivatesArg = ImplicitParamDecl::Create(
3317 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3318 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3320 Args.push_back(TaskPrivatesArg);
3321 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3322 unsigned Counter = 1;
3323 for (const Expr *E : Data.PrivateVars) {
3324 Args.push_back(ImplicitParamDecl::Create(
3325 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3326 C.getPointerType(C.getPointerType(E->getType()))
3327 .withConst()
3328 .withRestrict(),
3330 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3331 PrivateVarsPos[VD] = Counter;
3332 ++Counter;
3333 }
3334 for (const Expr *E : Data.FirstprivateVars) {
3335 Args.push_back(ImplicitParamDecl::Create(
3336 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3337 C.getPointerType(C.getPointerType(E->getType()))
3338 .withConst()
3339 .withRestrict(),
3341 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3342 PrivateVarsPos[VD] = Counter;
3343 ++Counter;
3344 }
3345 for (const Expr *E : Data.LastprivateVars) {
3346 Args.push_back(ImplicitParamDecl::Create(
3347 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3348 C.getPointerType(C.getPointerType(E->getType()))
3349 .withConst()
3350 .withRestrict(),
3352 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3353 PrivateVarsPos[VD] = Counter;
3354 ++Counter;
3355 }
3356 for (const VarDecl *VD : Data.PrivateLocals) {
3358 if (VD->getType()->isLValueReferenceType())
3359 Ty = C.getPointerType(Ty);
3360 if (isAllocatableDecl(VD))
3361 Ty = C.getPointerType(Ty);
3362 Args.push_back(ImplicitParamDecl::Create(
3363 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3364 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3366 PrivateVarsPos[VD] = Counter;
3367 ++Counter;
3368 }
3369 const auto &TaskPrivatesMapFnInfo =
3370 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3371 llvm::FunctionType *TaskPrivatesMapTy =
3372 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3373 std::string Name =
3374 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3375 auto *TaskPrivatesMap = llvm::Function::Create(
3376 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3377 &CGM.getModule());
3378 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3379 TaskPrivatesMapFnInfo);
3380 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3381 TaskPrivatesMap->addFnAttr("sample-profile-suffix-elision-policy",
3382 "selected");
3383 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3384 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3385 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3386 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3387 }
3388 CodeGenFunction CGF(CGM);
3389 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3390 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3391
3392 // *privi = &.privates.privi;
3393 LValue Base = CGF.EmitLoadOfPointerLValue(
3394 CGF.GetAddrOfLocalVar(TaskPrivatesArg),
3395 TaskPrivatesArg->getType()->castAs<PointerType>());
3396 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3397 Counter = 0;
3398 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3399 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3400 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3401 LValue RefLVal =
3402 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3403 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3404 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3405 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3406 ++Counter;
3407 }
3408 CGF.FinishFunction();
3409 return TaskPrivatesMap;
3410}
3411
3412/// Emit initialization for private variables in task-based directives.
3414 const OMPExecutableDirective &D,
3415 Address KmpTaskSharedsPtr, LValue TDBase,
3416 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3417 QualType SharedsTy, QualType SharedsPtrTy,
3418 const OMPTaskDataTy &Data,
3419 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3420 ASTContext &C = CGF.getContext();
3421 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3422 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3423 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3424 ? OMPD_taskloop
3425 : OMPD_task;
3426 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3427 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3428 LValue SrcBase;
3429 bool IsTargetTask =
3430 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3431 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3432 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3433 // PointersArray, SizesArray, and MappersArray. The original variables for
3434 // these arrays are not captured and we get their addresses explicitly.
3435 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3436 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3437 SrcBase = CGF.MakeAddrLValue(
3439 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3440 CGF.ConvertTypeForMem(SharedsTy)),
3441 SharedsTy);
3442 }
3443 FI = FI->getType()->castAsRecordDecl()->field_begin();
3444 for (const PrivateDataTy &Pair : Privates) {
3445 // Do not initialize private locals.
3446 if (Pair.second.isLocalPrivate()) {
3447 ++FI;
3448 continue;
3449 }
3450 const VarDecl *VD = Pair.second.PrivateCopy;
3451 const Expr *Init = VD->getAnyInitializer();
3452 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3453 !CGF.isTrivialInitializer(Init)))) {
3454 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3455 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3456 const VarDecl *OriginalVD = Pair.second.Original;
3457 // Check if the variable is the target-based BasePointersArray,
3458 // PointersArray, SizesArray, or MappersArray.
3459 LValue SharedRefLValue;
3460 QualType Type = PrivateLValue.getType();
3461 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3462 if (IsTargetTask && !SharedField) {
3463 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3464 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3465 cast<CapturedDecl>(OriginalVD->getDeclContext())
3466 ->getNumParams() == 0 &&
3468 cast<CapturedDecl>(OriginalVD->getDeclContext())
3469 ->getDeclContext()) &&
3470 "Expected artificial target data variable.");
3471 SharedRefLValue =
3472 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3473 } else if (ForDup) {
3474 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3475 SharedRefLValue = CGF.MakeAddrLValue(
3476 SharedRefLValue.getAddress().withAlignment(
3477 C.getDeclAlign(OriginalVD)),
3478 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3479 SharedRefLValue.getTBAAInfo());
3480 } else if (CGF.LambdaCaptureFields.count(
3481 Pair.second.Original->getCanonicalDecl()) > 0 ||
3482 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3483 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3484 } else {
3485 // Processing for implicitly captured variables.
3486 InlinedOpenMPRegionRAII Region(
3487 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3488 /*HasCancel=*/false, /*NoInheritance=*/true);
3489 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3490 }
3491 if (Type->isArrayType()) {
3492 // Initialize firstprivate array.
3494 // Perform simple memcpy.
3495 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3496 } else {
3497 // Initialize firstprivate array using element-by-element
3498 // initialization.
3500 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3501 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3502 Address SrcElement) {
3503 // Clean up any temporaries needed by the initialization.
3504 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3505 InitScope.addPrivate(Elem, SrcElement);
3506 (void)InitScope.Privatize();
3507 // Emit initialization for single element.
3508 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3509 CGF, &CapturesInfo);
3510 CGF.EmitAnyExprToMem(Init, DestElement,
3511 Init->getType().getQualifiers(),
3512 /*IsInitializer=*/false);
3513 });
3514 }
3515 } else {
3516 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3517 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3518 (void)InitScope.Privatize();
3519 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3520 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3521 /*capturedByInit=*/false);
3522 }
3523 } else {
3524 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3525 }
3526 }
3527 ++FI;
3528 }
3529}
3530
3531/// Check if duplication function is required for taskloops.
3534 bool InitRequired = false;
3535 for (const PrivateDataTy &Pair : Privates) {
3536 if (Pair.second.isLocalPrivate())
3537 continue;
3538 const VarDecl *VD = Pair.second.PrivateCopy;
3539 const Expr *Init = VD->getAnyInitializer();
3540 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3542 if (InitRequired)
3543 break;
3544 }
3545 return InitRequired;
3546}
3547
3548
3549/// Emit task_dup function (for initialization of
3550/// private/firstprivate/lastprivate vars and last_iter flag)
3551/// \code
3552/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3553/// lastpriv) {
3554/// // setup lastprivate flag
3555/// task_dst->last = lastpriv;
3556/// // could be constructor calls here...
3557/// }
3558/// \endcode
3559static llvm::Value *
3561 const OMPExecutableDirective &D,
3562 QualType KmpTaskTWithPrivatesPtrQTy,
3563 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3564 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3565 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3566 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3567 ASTContext &C = CGM.getContext();
3568 auto *DstArg = ImplicitParamDecl::Create(
3569 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy,
3571 auto *SrcArg = ImplicitParamDecl::Create(
3572 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy,
3574 auto *LastprivArg =
3575 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3577 FunctionArgList Args{DstArg, SrcArg, LastprivArg};
3578 const auto &TaskDupFnInfo =
3579 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3580 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3581 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3582 auto *TaskDup = llvm::Function::Create(
3583 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3584 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3585 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3586 TaskDup->addFnAttr("sample-profile-suffix-elision-policy", "selected");
3587 TaskDup->setDoesNotRecurse();
3588 CodeGenFunction CGF(CGM);
3589 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3590 Loc);
3591
3592 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3593 CGF.GetAddrOfLocalVar(DstArg),
3594 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3595 // task_dst->liter = lastpriv;
3596 if (WithLastIter) {
3597 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3598 LValue Base = CGF.EmitLValueForField(
3599 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3600 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3601 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3602 CGF.GetAddrOfLocalVar(LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3603 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3604 }
3605
3606 // Emit initial values for private copies (if any).
3607 assert(!Privates.empty());
3608 Address KmpTaskSharedsPtr = Address::invalid();
3609 if (!Data.FirstprivateVars.empty()) {
3610 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3611 CGF.GetAddrOfLocalVar(SrcArg),
3612 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3613 LValue Base = CGF.EmitLValueForField(
3614 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3615 KmpTaskSharedsPtr = Address(
3617 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3618 KmpTaskTShareds)),
3619 Loc),
3620 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3621 }
3622 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3623 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3624 CGF.FinishFunction();
3625 return TaskDup;
3626}
3627
3628/// Checks if destructor function is required to be generated.
3629/// \return true if cleanups are required, false otherwise.
3630static bool
3631checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3633 for (const PrivateDataTy &P : Privates) {
3634 if (P.second.isLocalPrivate())
3635 continue;
3636 QualType Ty = P.second.Original->getType().getNonReferenceType();
3637 if (Ty.isDestructedType())
3638 return true;
3639 }
3640 return false;
3641}
3642
3643namespace {
3644/// Loop generator for OpenMP iterator expression.
3645class OMPIteratorGeneratorScope final
3647 CodeGenFunction &CGF;
3648 const OMPIteratorExpr *E = nullptr;
3649 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3650 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3651 OMPIteratorGeneratorScope() = delete;
3652 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3653
3654public:
3655 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3656 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3657 if (!E)
3658 return;
3659 SmallVector<llvm::Value *, 4> Uppers;
3660 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3661 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3662 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3663 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3664 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3665 addPrivate(
3666 HelperData.CounterVD,
3667 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3668 }
3669 Privatize();
3670
3671 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3672 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3673 LValue CLVal =
3674 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3675 HelperData.CounterVD->getType());
3676 // Counter = 0;
3677 CGF.EmitStoreOfScalar(
3678 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3679 CLVal);
3680 CodeGenFunction::JumpDest &ContDest =
3681 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3682 CodeGenFunction::JumpDest &ExitDest =
3683 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3684 // N = <number-of_iterations>;
3685 llvm::Value *N = Uppers[I];
3686 // cont:
3687 // if (Counter < N) goto body; else goto exit;
3688 CGF.EmitBlock(ContDest.getBlock());
3689 auto *CVal =
3690 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3691 llvm::Value *Cmp =
3692 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3693 ? CGF.Builder.CreateICmpSLT(CVal, N)
3694 : CGF.Builder.CreateICmpULT(CVal, N);
3695 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3696 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3697 // body:
3698 CGF.EmitBlock(BodyBB);
3699 // Iteri = Begini + Counter * Stepi;
3700 CGF.EmitIgnoredExpr(HelperData.Update);
3701 }
3702 }
3703 ~OMPIteratorGeneratorScope() {
3704 if (!E)
3705 return;
3706 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3707 // Counter = Counter + 1;
3708 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3709 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3710 // goto cont;
3711 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3712 // exit:
3713 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3714 }
3715 }
3716};
3717} // namespace
3718
3719static std::pair<llvm::Value *, llvm::Value *>
3721 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3722 llvm::Value *Addr;
3723 if (OASE) {
3724 const Expr *Base = OASE->getBase();
3725 Addr = CGF.EmitScalarExpr(Base);
3726 } else {
3727 Addr = CGF.EmitLValue(E).getPointer(CGF);
3728 }
3729 llvm::Value *SizeVal;
3730 QualType Ty = E->getType();
3731 if (OASE) {
3732 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3733 for (const Expr *SE : OASE->getDimensions()) {
3734 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3735 Sz = CGF.EmitScalarConversion(
3736 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3737 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3738 }
3739 } else if (const auto *ASE =
3740 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3741 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3742 Address UpAddrAddress = UpAddrLVal.getAddress();
3743 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3744 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3745 /*Idx0=*/1);
3746 SizeVal = CGF.Builder.CreatePtrDiff(UpAddr, Addr, "", /*IsNUW=*/true);
3747 } else {
3748 SizeVal = CGF.getTypeSize(Ty);
3749 }
3750 return std::make_pair(Addr, SizeVal);
3751}
3752
3753/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3754static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3755 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3756 if (KmpTaskAffinityInfoTy.isNull()) {
3757 RecordDecl *KmpAffinityInfoRD =
3758 C.buildImplicitRecord("kmp_task_affinity_info_t");
3759 KmpAffinityInfoRD->startDefinition();
3760 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3761 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3762 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3763 KmpAffinityInfoRD->completeDefinition();
3764 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3765 }
3766}
3767
3770 const OMPExecutableDirective &D,
3771 llvm::Function *TaskFunction, QualType SharedsTy,
3772 Address Shareds, const OMPTaskDataTy &Data) {
3773 ASTContext &C = CGM.getContext();
3775 // Aggregate privates and sort them by the alignment.
3776 const auto *I = Data.PrivateCopies.begin();
3777 for (const Expr *E : Data.PrivateVars) {
3778 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3779 Privates.emplace_back(
3780 C.getDeclAlign(VD),
3781 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3782 /*PrivateElemInit=*/nullptr));
3783 ++I;
3784 }
3785 I = Data.FirstprivateCopies.begin();
3786 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3787 for (const Expr *E : Data.FirstprivateVars) {
3788 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3789 Privates.emplace_back(
3790 C.getDeclAlign(VD),
3791 PrivateHelpersTy(
3792 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3793 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3794 ++I;
3795 ++IElemInitRef;
3796 }
3797 I = Data.LastprivateCopies.begin();
3798 for (const Expr *E : Data.LastprivateVars) {
3799 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3800 Privates.emplace_back(
3801 C.getDeclAlign(VD),
3802 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3803 /*PrivateElemInit=*/nullptr));
3804 ++I;
3805 }
3806 for (const VarDecl *VD : Data.PrivateLocals) {
3807 if (isAllocatableDecl(VD))
3808 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3809 else
3810 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3811 }
3812 llvm::stable_sort(Privates,
3813 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3814 return L.first > R.first;
3815 });
3816 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3817 // Build type kmp_routine_entry_t (if not built yet).
3818 emitKmpRoutineEntryT(KmpInt32Ty);
3819 // Build type kmp_task_t (if not built yet).
3820 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3821 if (SavedKmpTaskloopTQTy.isNull()) {
3822 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3823 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3824 }
3826 } else {
3827 assert((D.getDirectiveKind() == OMPD_task ||
3828 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3829 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3830 "Expected taskloop, task or target directive");
3831 if (SavedKmpTaskTQTy.isNull()) {
3832 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3833 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3834 }
3836 }
3837 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3838 // Build particular struct kmp_task_t for the given task.
3839 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3841 CanQualType KmpTaskTWithPrivatesQTy =
3842 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3843 QualType KmpTaskTWithPrivatesPtrQTy =
3844 C.getPointerType(KmpTaskTWithPrivatesQTy);
3845 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3846 llvm::Value *KmpTaskTWithPrivatesTySize =
3847 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3848 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3849
3850 // Emit initial values for private copies (if any).
3851 llvm::Value *TaskPrivatesMap = nullptr;
3852 llvm::Type *TaskPrivatesMapTy =
3853 std::next(TaskFunction->arg_begin(), 3)->getType();
3854 if (!Privates.empty()) {
3855 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3856 TaskPrivatesMap =
3857 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3858 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3859 TaskPrivatesMap, TaskPrivatesMapTy);
3860 } else {
3861 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3862 cast<llvm::PointerType>(TaskPrivatesMapTy));
3863 }
3864 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3865 // kmp_task_t *tt);
3866 llvm::Function *TaskEntry = emitProxyTaskFunction(
3867 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3868 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3869 TaskPrivatesMap);
3870
3871 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3872 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3873 // kmp_routine_entry_t *task_entry);
3874 // Task flags. Format is taken from
3875 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3876 // description of kmp_tasking_flags struct.
3877 enum {
3878 TiedFlag = 0x1,
3879 FinalFlag = 0x2,
3880 DestructorsFlag = 0x8,
3881 PriorityFlag = 0x20,
3882 DetachableFlag = 0x40,
3883 FreeAgentFlag = 0x80,
3884 TransparentFlag = 0x100,
3885 };
3886 unsigned Flags = Data.Tied ? TiedFlag : 0;
3887 bool NeedsCleanup = false;
3888 if (!Privates.empty()) {
3889 NeedsCleanup =
3890 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3891 if (NeedsCleanup)
3892 Flags = Flags | DestructorsFlag;
3893 }
3894 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3895 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3896 if (Kind == OMPC_THREADSET_omp_pool)
3897 Flags = Flags | FreeAgentFlag;
3898 }
3899 if (D.getSingleClause<OMPTransparentClause>())
3900 Flags |= TransparentFlag;
3901
3902 if (Data.Priority.getInt())
3903 Flags = Flags | PriorityFlag;
3904 if (D.hasClausesOfKind<OMPDetachClause>())
3905 Flags = Flags | DetachableFlag;
3906 llvm::Value *TaskFlags =
3907 Data.Final.getPointer()
3908 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3909 CGF.Builder.getInt32(FinalFlag),
3910 CGF.Builder.getInt32(/*C=*/0))
3911 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3912 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3913 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3915 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3917 TaskEntry, KmpRoutineEntryPtrTy)};
3918 llvm::Value *NewTask;
3919 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3920 // Check if we have any device clause associated with the directive.
3921 const Expr *Device = nullptr;
3922 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3923 Device = C->getDevice();
3924 // Emit device ID if any otherwise use default value.
3925 llvm::Value *DeviceID;
3926 if (Device)
3927 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3928 CGF.Int64Ty, /*isSigned=*/true);
3929 else
3930 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3931 AllocArgs.push_back(DeviceID);
3932 NewTask = CGF.EmitRuntimeCall(
3933 OMPBuilder.getOrCreateRuntimeFunction(
3934 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3935 AllocArgs);
3936 } else {
3937 NewTask =
3938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3939 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3940 AllocArgs);
3941 }
3942 // Emit detach clause initialization.
3943 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3944 // task_descriptor);
3945 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3946 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3947 LValue EvtLVal = CGF.EmitLValue(Evt);
3948
3949 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3950 // int gtid, kmp_task_t *task);
3951 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3952 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3953 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3954 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3955 OMPBuilder.getOrCreateRuntimeFunction(
3956 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3957 {Loc, Tid, NewTask});
3958 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3959 Evt->getExprLoc());
3960 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3961 }
3962 // Process affinity clauses.
3963 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3964 // Process list of affinity data.
3965 ASTContext &C = CGM.getContext();
3966 Address AffinitiesArray = Address::invalid();
3967 // Calculate number of elements to form the array of affinity data.
3968 llvm::Value *NumOfElements = nullptr;
3969 unsigned NumAffinities = 0;
3970 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3971 if (const Expr *Modifier = C->getModifier()) {
3972 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3973 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3974 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3975 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3976 NumOfElements =
3977 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3978 }
3979 } else {
3980 NumAffinities += C->varlist_size();
3981 }
3982 }
3984 // Fields ids in kmp_task_affinity_info record.
3985 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3986
3987 QualType KmpTaskAffinityInfoArrayTy;
3988 if (NumOfElements) {
3989 NumOfElements = CGF.Builder.CreateNUWAdd(
3990 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3991 auto *OVE = new (C) OpaqueValueExpr(
3992 Loc,
3993 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3994 VK_PRValue);
3995 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3996 RValue::get(NumOfElements));
3997 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3999 /*IndexTypeQuals=*/0);
4000 // Properly emit variable-sized array.
4001 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4003 CGF.EmitVarDecl(*PD);
4004 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4005 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4006 /*isSigned=*/false);
4007 } else {
4008 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4010 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4011 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4012 AffinitiesArray = CGF.CreateMemTempWithoutCast(KmpTaskAffinityInfoArrayTy,
4013 ".affs.arr.addr");
4014 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4015 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4016 /*isSigned=*/false);
4017 }
4018
4019 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4020 // Fill array by elements without iterators.
4021 unsigned Pos = 0;
4022 bool HasIterator = false;
4023 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4024 if (C->getModifier()) {
4025 HasIterator = true;
4026 continue;
4027 }
4028 for (const Expr *E : C->varlist()) {
4029 llvm::Value *Addr;
4030 llvm::Value *Size;
4031 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4032 LValue Base =
4033 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4035 // affs[i].base_addr = &<Affinities[i].second>;
4036 LValue BaseAddrLVal = CGF.EmitLValueForField(
4037 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4038 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4039 BaseAddrLVal);
4040 // affs[i].len = sizeof(<Affinities[i].second>);
4041 LValue LenLVal = CGF.EmitLValueForField(
4042 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4043 CGF.EmitStoreOfScalar(Size, LenLVal);
4044 ++Pos;
4045 }
4046 }
4047 LValue PosLVal;
4048 if (HasIterator) {
4049 PosLVal = CGF.MakeAddrLValue(
4050 CGF.CreateMemTempWithoutCast(C.getSizeType(), "affs.counter.addr"),
4051 C.getSizeType());
4052 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4053 }
4054 // Process elements with iterators.
4055 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4056 const Expr *Modifier = C->getModifier();
4057 if (!Modifier)
4058 continue;
4059 OMPIteratorGeneratorScope IteratorScope(
4060 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4061 for (const Expr *E : C->varlist()) {
4062 llvm::Value *Addr;
4063 llvm::Value *Size;
4064 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4065 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4066 LValue Base =
4067 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
4069 // affs[i].base_addr = &<Affinities[i].second>;
4070 LValue BaseAddrLVal = CGF.EmitLValueForField(
4071 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4072 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4073 BaseAddrLVal);
4074 // affs[i].len = sizeof(<Affinities[i].second>);
4075 LValue LenLVal = CGF.EmitLValueForField(
4076 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4077 CGF.EmitStoreOfScalar(Size, LenLVal);
4078 Idx = CGF.Builder.CreateNUWAdd(
4079 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4080 CGF.EmitStoreOfScalar(Idx, PosLVal);
4081 }
4082 }
4083 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4084 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4085 // naffins, kmp_task_affinity_info_t *affin_list);
4086 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4087 llvm::Value *GTid = getThreadID(CGF, Loc);
4088 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4089 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
4090 // FIXME: Emit the function and ignore its result for now unless the
4091 // runtime function is properly implemented.
4092 (void)CGF.EmitRuntimeCall(
4093 OMPBuilder.getOrCreateRuntimeFunction(
4094 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4095 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4096 }
4097 llvm::Value *NewTaskNewTaskTTy =
4099 NewTask, KmpTaskTWithPrivatesPtrTy);
4100 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
4101 KmpTaskTWithPrivatesQTy);
4102 LValue TDBase =
4103 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4104 // Fill the data in the resulting kmp_task_t record.
4105 // Copy shareds if there are any.
4106 Address KmpTaskSharedsPtr = Address::invalid();
4107 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4108 KmpTaskSharedsPtr = Address(
4109 CGF.EmitLoadOfScalar(
4111 TDBase,
4112 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4113 Loc),
4114 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4115 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4116 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4117 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4118 }
4119 // Emit initial values for private copies (if any).
4121 if (!Privates.empty()) {
4122 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4123 SharedsTy, SharedsPtrTy, Data, Privates,
4124 /*ForDup=*/false);
4125 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4126 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4127 Result.TaskDupFn = emitTaskDupFunction(
4128 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4129 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4130 /*WithLastIter=*/!Data.LastprivateVars.empty());
4131 }
4132 }
4133 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4134 enum { Priority = 0, Destructors = 1 };
4135 // Provide pointer to function with destructors for privates.
4136 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4137 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4138 assert(KmpCmplrdataUD->isUnion());
4139 if (NeedsCleanup) {
4140 llvm::Value *DestructorFn = emitDestructorsFunction(
4141 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4142 KmpTaskTWithPrivatesQTy);
4143 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4144 LValue DestructorsLV = CGF.EmitLValueForField(
4145 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4147 DestructorFn, KmpRoutineEntryPtrTy),
4148 DestructorsLV);
4149 }
4150 // Set priority.
4151 if (Data.Priority.getInt()) {
4152 LValue Data2LV = CGF.EmitLValueForField(
4153 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4154 LValue PriorityLV = CGF.EmitLValueForField(
4155 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4156 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4157 }
4158 Result.NewTask = NewTask;
4159 Result.TaskEntry = TaskEntry;
4160 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4161 Result.TDBase = TDBase;
4162 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4163 return Result;
4164}
4165
4166/// Translates internal dependency kind into the runtime kind.
4168 RTLDependenceKindTy DepKind;
4169 switch (K) {
4170 case OMPC_DEPEND_in:
4171 DepKind = RTLDependenceKindTy::DepIn;
4172 break;
4173 // Out and InOut dependencies must use the same code.
4174 case OMPC_DEPEND_out:
4175 case OMPC_DEPEND_inout:
4176 DepKind = RTLDependenceKindTy::DepInOut;
4177 break;
4178 case OMPC_DEPEND_mutexinoutset:
4179 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4180 break;
4181 case OMPC_DEPEND_inoutset:
4182 DepKind = RTLDependenceKindTy::DepInOutSet;
4183 break;
4184 case OMPC_DEPEND_outallmemory:
4185 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4186 break;
4187 case OMPC_DEPEND_source:
4188 case OMPC_DEPEND_sink:
4189 case OMPC_DEPEND_depobj:
4190 case OMPC_DEPEND_inoutallmemory:
4192 llvm_unreachable("Unknown task dependence type");
4193 }
4194 return DepKind;
4195}
4196
4197/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4198static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4199 QualType &FlagsTy) {
4200 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4201 if (KmpDependInfoTy.isNull()) {
4202 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4203 KmpDependInfoRD->startDefinition();
4204 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4205 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4206 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4207 KmpDependInfoRD->completeDefinition();
4208 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4209 }
4210}
4211
4212std::pair<llvm::Value *, LValue>
4214 SourceLocation Loc) {
4215 ASTContext &C = CGM.getContext();
4216 QualType FlagsTy;
4217 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4218 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4219 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4221 DepobjLVal.getAddress().withElementType(
4222 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4223 KmpDependInfoPtrTy->castAs<PointerType>());
4224 Address DepObjAddr = CGF.Builder.CreateGEP(
4225 CGF, Base.getAddress(),
4226 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4227 LValue NumDepsBase = CGF.MakeAddrLValue(
4228 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4229 // NumDeps = deps[i].base_addr;
4230 LValue BaseAddrLVal = CGF.EmitLValueForField(
4231 NumDepsBase,
4232 *std::next(KmpDependInfoRD->field_begin(),
4233 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4234 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4235 return std::make_pair(NumDeps, Base);
4236}
4237
4238static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4239 llvm::PointerUnion<unsigned *, LValue *> Pos,
4241 Address DependenciesArray) {
4242 CodeGenModule &CGM = CGF.CGM;
4243 ASTContext &C = CGM.getContext();
4244 QualType FlagsTy;
4245 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4246 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4247 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4248
4249 OMPIteratorGeneratorScope IteratorScope(
4250 CGF, cast_or_null<OMPIteratorExpr>(
4251 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4252 : nullptr));
4253 for (const Expr *E : Data.DepExprs) {
4254 llvm::Value *Addr;
4255 llvm::Value *Size;
4256
4257 // The expression will be a nullptr in the 'omp_all_memory' case.
4258 if (E) {
4259 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4260 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4261 } else {
4262 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4263 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4264 }
4265 LValue Base;
4266 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4267 Base = CGF.MakeAddrLValue(
4268 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4269 } else {
4270 assert(E && "Expected a non-null expression");
4271 LValue &PosLVal = *cast<LValue *>(Pos);
4272 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4273 Base = CGF.MakeAddrLValue(
4274 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4275 }
4276 // deps[i].base_addr = &<Dependencies[i].second>;
4277 LValue BaseAddrLVal = CGF.EmitLValueForField(
4278 Base,
4279 *std::next(KmpDependInfoRD->field_begin(),
4280 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4281 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4282 // deps[i].len = sizeof(<Dependencies[i].second>);
4283 LValue LenLVal = CGF.EmitLValueForField(
4284 Base, *std::next(KmpDependInfoRD->field_begin(),
4285 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4286 CGF.EmitStoreOfScalar(Size, LenLVal);
4287 // deps[i].flags = <Dependencies[i].first>;
4288 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4289 LValue FlagsLVal = CGF.EmitLValueForField(
4290 Base,
4291 *std::next(KmpDependInfoRD->field_begin(),
4292 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4294 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4295 FlagsLVal);
4296 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4297 ++(*P);
4298 } else {
4299 LValue &PosLVal = *cast<LValue *>(Pos);
4300 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4301 Idx = CGF.Builder.CreateNUWAdd(Idx,
4302 llvm::ConstantInt::get(Idx->getType(), 1));
4303 CGF.EmitStoreOfScalar(Idx, PosLVal);
4304 }
4305 }
4306}
4307
4311 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4312 "Expected depobj dependency kind.");
4314 SmallVector<LValue, 4> SizeLVals;
4315 ASTContext &C = CGF.getContext();
4316 {
4317 OMPIteratorGeneratorScope IteratorScope(
4318 CGF, cast_or_null<OMPIteratorExpr>(
4319 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4320 : nullptr));
4321 for (const Expr *E : Data.DepExprs) {
4322 llvm::Value *NumDeps;
4323 LValue Base;
4324 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4325 std::tie(NumDeps, Base) =
4326 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4327 LValue NumLVal = CGF.MakeAddrLValue(
4328 CGF.CreateMemTempWithoutCast(C.getUIntPtrType(), "depobj.size.addr"),
4329 C.getUIntPtrType());
4330 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4331 NumLVal.getAddress());
4332 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4333 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4334 CGF.EmitStoreOfScalar(Add, NumLVal);
4335 SizeLVals.push_back(NumLVal);
4336 }
4337 }
4338 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4339 llvm::Value *Size =
4340 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4341 Sizes.push_back(Size);
4342 }
4343 return Sizes;
4344}
4345
4348 LValue PosLVal,
4350 Address DependenciesArray) {
4351 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4352 "Expected depobj dependency kind.");
4353 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4354 {
4355 OMPIteratorGeneratorScope IteratorScope(
4356 CGF, cast_or_null<OMPIteratorExpr>(
4357 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4358 : nullptr));
4359 for (const Expr *E : Data.DepExprs) {
4360 llvm::Value *NumDeps;
4361 LValue Base;
4362 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4363 std::tie(NumDeps, Base) =
4364 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4365
4366 // memcopy dependency data.
4367 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4368 ElSize,
4369 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4370 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4371 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4372 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4373
4374 // Increase pos.
4375 // pos += size;
4376 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4377 CGF.EmitStoreOfScalar(Add, PosLVal);
4378 }
4379 }
4380}
4381
4382std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4384 SourceLocation Loc) {
4385 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4386 return D.DepExprs.empty();
4387 }))
4388 return std::make_pair(nullptr, Address::invalid());
4389 // Process list of dependencies.
4390 ASTContext &C = CGM.getContext();
4391 Address DependenciesArray = Address::invalid();
4392 llvm::Value *NumOfElements = nullptr;
4393 unsigned NumDependencies = std::accumulate(
4394 Dependencies.begin(), Dependencies.end(), 0,
4395 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4396 return D.DepKind == OMPC_DEPEND_depobj
4397 ? V
4398 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4399 });
4400 QualType FlagsTy;
4401 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4402 bool HasDepobjDeps = false;
4403 bool HasRegularWithIterators = false;
4404 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4405 llvm::Value *NumOfRegularWithIterators =
4406 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4407 // Calculate number of depobj dependencies and regular deps with the
4408 // iterators.
4409 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4410 if (D.DepKind == OMPC_DEPEND_depobj) {
4413 for (llvm::Value *Size : Sizes) {
4414 NumOfDepobjElements =
4415 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4416 }
4417 HasDepobjDeps = true;
4418 continue;
4419 }
4420 // Include number of iterations, if any.
4421
4422 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4423 llvm::Value *ClauseIteratorSpace =
4424 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4425 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4426 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4427 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4428 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4429 }
4430 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4431 ClauseIteratorSpace,
4432 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4433 NumOfRegularWithIterators =
4434 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4435 HasRegularWithIterators = true;
4436 continue;
4437 }
4438 }
4439
4440 QualType KmpDependInfoArrayTy;
4441 if (HasDepobjDeps || HasRegularWithIterators) {
4442 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4443 /*isSigned=*/false);
4444 if (HasDepobjDeps) {
4445 NumOfElements =
4446 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4447 }
4448 if (HasRegularWithIterators) {
4449 NumOfElements =
4450 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4451 }
4452 auto *OVE = new (C) OpaqueValueExpr(
4453 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4454 VK_PRValue);
4455 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4456 RValue::get(NumOfElements));
4457 KmpDependInfoArrayTy =
4458 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4459 /*IndexTypeQuals=*/0);
4460 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4461 // Properly emit variable-sized array.
4462 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4464 CGF.EmitVarDecl(*PD);
4465 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4466 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4467 /*isSigned=*/false);
4468 } else {
4469 KmpDependInfoArrayTy = C.getConstantArrayType(
4470 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4471 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4472 DependenciesArray =
4473 CGF.CreateMemTempWithoutCast(KmpDependInfoArrayTy, ".dep.arr.addr");
4474 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4475 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4476 /*isSigned=*/false);
4477 }
4478 unsigned Pos = 0;
4479 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4480 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4481 continue;
4482 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4483 }
4484 // Copy regular dependencies with iterators.
4485 LValue PosLVal = CGF.MakeAddrLValue(
4486 CGF.CreateMemTempWithoutCast(C.getSizeType(), "dep.counter.addr"),
4487 C.getSizeType());
4488 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4489 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4490 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4491 continue;
4492 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4493 }
4494 // Copy final depobj arrays without iterators.
4495 if (HasDepobjDeps) {
4496 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4497 if (Dep.DepKind != OMPC_DEPEND_depobj)
4498 continue;
4499 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4500 }
4501 }
4502 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4503 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4504 return std::make_pair(NumOfElements, DependenciesArray);
4505}
4506
4508 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4509 SourceLocation Loc) {
4510 if (Dependencies.DepExprs.empty())
4511 return Address::invalid();
4512 // Process list of dependencies.
4513 ASTContext &C = CGM.getContext();
4514 Address DependenciesArray = Address::invalid();
4515 unsigned NumDependencies = Dependencies.DepExprs.size();
4516 QualType FlagsTy;
4517 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4518 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4519
4520 llvm::Value *Size;
4521 // Define type kmp_depend_info[<Dependencies.size()>];
4522 // For depobj reserve one extra element to store the number of elements.
4523 // It is required to handle depobj(x) update(in) construct.
4524 // kmp_depend_info[<Dependencies.size()>] deps;
4525 llvm::Value *NumDepsVal;
4526 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4527 if (const auto *IE =
4528 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4529 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4530 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4531 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4532 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4533 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4534 }
4535 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4536 NumDepsVal);
4537 CharUnits SizeInBytes =
4538 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4539 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4540 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4541 NumDepsVal =
4542 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4543 } else {
4544 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4545 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4546 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4547 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4548 Size = CGM.getSize(Sz.alignTo(Align));
4549 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4550 }
4551 // Need to allocate on the dynamic memory.
4552 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4553 // Use default allocator.
4554 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4555 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4556
4557 llvm::Value *Addr =
4558 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4559 CGM.getModule(), OMPRTL___kmpc_alloc),
4560 Args, ".dep.arr.addr");
4561 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4563 Addr, CGF.Builder.getPtrTy(0));
4564 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4565 // Write number of elements in the first element of array for depobj.
4566 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4567 // deps[i].base_addr = NumDependencies;
4568 LValue BaseAddrLVal = CGF.EmitLValueForField(
4569 Base,
4570 *std::next(KmpDependInfoRD->field_begin(),
4571 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4572 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4573 llvm::PointerUnion<unsigned *, LValue *> Pos;
4574 unsigned Idx = 1;
4575 LValue PosLVal;
4576 if (Dependencies.IteratorExpr) {
4577 PosLVal = CGF.MakeAddrLValue(
4578 CGF.CreateMemTempWithoutCast(C.getSizeType(), "iterator.counter.addr"),
4579 C.getSizeType());
4580 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4581 /*IsInit=*/true);
4582 Pos = &PosLVal;
4583 } else {
4584 Pos = &Idx;
4585 }
4586 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4587 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4588 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4589 CGF.Int8Ty);
4590 return DependenciesArray;
4591}
4592
4594 SourceLocation Loc) {
4595 ASTContext &C = CGM.getContext();
4596 QualType FlagsTy;
4597 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4598 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4599 C.VoidPtrTy.castAs<PointerType>());
4600 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4602 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4604 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4605 Addr.getElementType(), Addr.emitRawPointer(CGF),
4606 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4607 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4608 CGF.VoidPtrTy);
4609 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4610 // Use default allocator.
4611 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4612 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4613
4614 // _kmpc_free(gtid, addr, nullptr);
4615 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4616 CGM.getModule(), OMPRTL___kmpc_free),
4617 Args);
4618}
4619
4621 OpenMPDependClauseKind NewDepKind,
4622 SourceLocation Loc) {
4623 ASTContext &C = CGM.getContext();
4624 QualType FlagsTy;
4625 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4627 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4628 llvm::Value *NumDeps;
4629 LValue Base;
4630 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4631
4632 Address Begin = Base.getAddress();
4633 // Cast from pointer to array type to pointer to single element.
4634 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4635 Begin.emitRawPointer(CGF), NumDeps);
4636 // The basic structure here is a while-do loop.
4637 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4638 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4639 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4640 CGF.EmitBlock(BodyBB);
4641 llvm::PHINode *ElementPHI =
4642 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4643 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4644 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4645 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4646 Base.getTBAAInfo());
4647 // deps[i].flags = NewDepKind;
4648 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4649 LValue FlagsLVal = CGF.EmitLValueForField(
4650 Base, *std::next(KmpDependInfoRD->field_begin(),
4651 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4653 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4654 FlagsLVal);
4655
4656 // Shift the address forward by one element.
4657 llvm::Value *ElementNext =
4658 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4659 .emitRawPointer(CGF);
4660 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4661 llvm::Value *IsEmpty =
4662 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4663 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4664 // Done.
4665 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4666}
4667
4669 const OMPExecutableDirective &D,
4670 llvm::Function *TaskFunction,
4671 QualType SharedsTy, Address Shareds,
4672 const Expr *IfCond,
4673 const OMPTaskDataTy &Data) {
4674 if (!CGF.HaveInsertPoint())
4675 return;
4676
4678 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4679 llvm::Value *NewTask = Result.NewTask;
4680 llvm::Function *TaskEntry = Result.TaskEntry;
4681 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4682 LValue TDBase = Result.TDBase;
4683 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4684 // Process list of dependences.
4685 Address DependenciesArray = Address::invalid();
4686 llvm::Value *NumOfElements;
4687 std::tie(NumOfElements, DependenciesArray) =
4688 emitDependClause(CGF, Data.Dependences, Loc);
4689
4690 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4691 // libcall.
4692 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4693 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4694 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4695 // list is not empty
4696 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4697 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4698 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4699 llvm::Value *DepTaskArgs[7];
4700 if (!Data.Dependences.empty()) {
4701 DepTaskArgs[0] = UpLoc;
4702 DepTaskArgs[1] = ThreadID;
4703 DepTaskArgs[2] = NewTask;
4704 DepTaskArgs[3] = NumOfElements;
4705 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4706 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4707 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4708 }
4709 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4710 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4711 if (!Data.Tied) {
4712 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4713 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4714 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4715 }
4716 if (!Data.Dependences.empty()) {
4717 CGF.EmitRuntimeCall(
4718 OMPBuilder.getOrCreateRuntimeFunction(
4719 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4720 DepTaskArgs);
4721 } else {
4722 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4723 CGM.getModule(), OMPRTL___kmpc_omp_task),
4724 TaskArgs);
4725 }
4726 // Check if parent region is untied and build return for untied task;
4727 if (auto *Region =
4728 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4729 Region->emitUntiedSwitch(CGF);
4730 };
4731
4732 llvm::Value *DepWaitTaskArgs[7];
4733 if (!Data.Dependences.empty()) {
4734 DepWaitTaskArgs[0] = UpLoc;
4735 DepWaitTaskArgs[1] = ThreadID;
4736 DepWaitTaskArgs[2] = NumOfElements;
4737 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4738 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4739 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4740 DepWaitTaskArgs[6] =
4741 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4742 }
4743 auto &M = CGM.getModule();
4744 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4745 TaskEntry, &Data, &DepWaitTaskArgs,
4746 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4747 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4748 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4749 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4750 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4751 // is specified.
4752 if (!Data.Dependences.empty())
4753 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4754 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4755 DepWaitTaskArgs);
4756 // Call proxy_task_entry(gtid, new_task);
4757 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4758 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4759 Action.Enter(CGF);
4760 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4761 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4762 OutlinedFnArgs);
4763 };
4764
4765 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4766 // kmp_task_t *new_task);
4767 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4768 // kmp_task_t *new_task);
4770 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4771 M, OMPRTL___kmpc_omp_task_begin_if0),
4772 TaskArgs,
4773 OMPBuilder.getOrCreateRuntimeFunction(
4774 M, OMPRTL___kmpc_omp_task_complete_if0),
4775 TaskArgs);
4776 RCG.setAction(Action);
4777 RCG(CGF);
4778 };
4779
4780 if (IfCond) {
4781 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4782 } else {
4783 RegionCodeGenTy ThenRCG(ThenCodeGen);
4784 ThenRCG(CGF);
4785 }
4786}
4787
4789 const OMPLoopDirective &D,
4790 llvm::Function *TaskFunction,
4791 QualType SharedsTy, Address Shareds,
4792 const Expr *IfCond,
4793 const OMPTaskDataTy &Data) {
4794 if (!CGF.HaveInsertPoint())
4795 return;
4797 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4798 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4799 // libcall.
4800 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4801 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4802 // sched, kmp_uint64 grainsize, void *task_dup);
4803 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4804 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4805 llvm::Value *IfVal;
4806 if (IfCond) {
4807 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4808 /*isSigned=*/true);
4809 } else {
4810 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4811 }
4812
4813 LValue LBLVal = CGF.EmitLValueForField(
4814 Result.TDBase,
4815 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4816 const auto *LBVar =
4817 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 LValue UBLVal = CGF.EmitLValueForField(
4821 Result.TDBase,
4822 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4823 const auto *UBVar =
4824 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4825 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4826 /*IsInitializer=*/true);
4827 LValue StLVal = CGF.EmitLValueForField(
4828 Result.TDBase,
4829 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4830 const auto *StVar =
4831 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4832 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4833 /*IsInitializer=*/true);
4834 // Store reductions address.
4835 LValue RedLVal = CGF.EmitLValueForField(
4836 Result.TDBase,
4837 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4838 if (Data.Reductions) {
4839 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4840 } else {
4841 CGF.EmitNullInitialization(RedLVal.getAddress(),
4842 CGF.getContext().VoidPtrTy);
4843 }
4844 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4846 UpLoc,
4847 ThreadID,
4848 Result.NewTask,
4849 IfVal,
4850 LBLVal.getPointer(CGF),
4851 UBLVal.getPointer(CGF),
4852 CGF.EmitLoadOfScalar(StLVal, Loc),
4853 llvm::ConstantInt::getSigned(
4854 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4855 llvm::ConstantInt::getSigned(
4856 CGF.IntTy, Data.Schedule.getPointer()
4857 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4858 : NoSchedule),
4859 Data.Schedule.getPointer()
4860 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4861 /*isSigned=*/false)
4862 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4863 if (Data.HasModifier)
4864 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4865
4866 TaskArgs.push_back(Result.TaskDupFn
4868 Result.TaskDupFn, CGF.VoidPtrTy)
4869 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4871 CGM.getModule(), Data.HasModifier
4872 ? OMPRTL___kmpc_taskloop_5
4873 : OMPRTL___kmpc_taskloop),
4874 TaskArgs);
4875}
4876
4877/// Emit reduction operation for each element of array (required for
4878/// array sections) LHS op = RHS.
4879/// \param Type Type of array.
4880/// \param LHSVar Variable on the left side of the reduction operation
4881/// (references element of array in original variable).
4882/// \param RHSVar Variable on the right side of the reduction operation
4883/// (references element of array in original variable).
4884/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4885/// RHSVar.
4887 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4888 const VarDecl *RHSVar,
4889 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4890 const Expr *, const Expr *)> &RedOpGen,
4891 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4892 const Expr *UpExpr = nullptr) {
4893 // Perform element-by-element initialization.
4894 QualType ElementTy;
4895 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4896 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4897
4898 // Drill down to the base element type on both arrays.
4899 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4900 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4901
4902 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4903 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4904 // Cast from pointer to array type to pointer to single element.
4905 llvm::Value *LHSEnd =
4906 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4907 // The basic structure here is a while-do loop.
4908 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4909 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4910 llvm::Value *IsEmpty =
4911 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4912 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4913
4914 // Enter the loop body, making that address the current address.
4915 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4916 CGF.EmitBlock(BodyBB);
4917
4918 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4919
4920 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4921 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4922 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4923 Address RHSElementCurrent(
4924 RHSElementPHI, RHSAddr.getElementType(),
4925 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4926
4927 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4928 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4929 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4930 Address LHSElementCurrent(
4931 LHSElementPHI, LHSAddr.getElementType(),
4932 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4933
4934 // Emit copy.
4936 Scope.addPrivate(LHSVar, LHSElementCurrent);
4937 Scope.addPrivate(RHSVar, RHSElementCurrent);
4938 Scope.Privatize();
4939 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4940 Scope.ForceCleanup();
4941
4942 // Shift the address forward by one element.
4943 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4944 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4945 "omp.arraycpy.dest.element");
4946 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4947 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4948 "omp.arraycpy.src.element");
4949 // Check whether we've reached the end.
4950 llvm::Value *Done =
4951 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4952 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4953 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4954 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4955
4956 // Done.
4957 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4958}
4959
4960/// Emit reduction combiner. If the combiner is a simple expression emit it as
4961/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4962/// UDR combiner function.
4964 const Expr *ReductionOp) {
4965 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4966 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4967 if (const auto *DRE =
4968 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4969 if (const auto *DRD =
4970 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4971 std::pair<llvm::Function *, llvm::Function *> Reduction =
4975 CGF.EmitIgnoredExpr(ReductionOp);
4976 return;
4977 }
4978 CGF.EmitIgnoredExpr(ReductionOp);
4979}
4980
4982 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4984 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4985 ASTContext &C = CGM.getContext();
4986
4987 // void reduction_func(void *LHSArg, void *RHSArg);
4988 auto *LHSArg =
4989 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4990 C.VoidPtrTy, ImplicitParamKind::Other);
4991 auto *RHSArg =
4992 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4993 C.VoidPtrTy, ImplicitParamKind::Other);
4994 FunctionArgList Args{LHSArg, RHSArg};
4995 const auto &CGFI =
4996 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4997 std::string Name = getReductionFuncName(ReducerName);
4998 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4999 llvm::GlobalValue::InternalLinkage, Name,
5000 &CGM.getModule());
5001 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5002 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5003 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5004 Fn->setDoesNotRecurse();
5005 CodeGenFunction CGF(CGM);
5006 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5007
5008 // Dst = (void*[n])(LHSArg);
5009 // Src = (void*[n])(RHSArg);
5011 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(LHSArg)),
5012 CGF.Builder.getPtrTy(0)),
5013 ArgsElemType, CGF.getPointerAlign());
5015 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(RHSArg)),
5016 CGF.Builder.getPtrTy(0)),
5017 ArgsElemType, CGF.getPointerAlign());
5018
5019 // ...
5020 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5021 // ...
5023 const auto *IPriv = Privates.begin();
5024 unsigned Idx = 0;
5025 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5026 const auto *RHSVar =
5027 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5028 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5029 const auto *LHSVar =
5030 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5031 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5032 QualType PrivTy = (*IPriv)->getType();
5033 if (PrivTy->isVariablyModifiedType()) {
5034 // Get array size and emit VLA type.
5035 ++Idx;
5036 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5037 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5038 const VariableArrayType *VLA =
5039 CGF.getContext().getAsVariableArrayType(PrivTy);
5040 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5042 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5043 CGF.EmitVariablyModifiedType(PrivTy);
5044 }
5045 }
5046 Scope.Privatize();
5047 IPriv = Privates.begin();
5048 const auto *ILHS = LHSExprs.begin();
5049 const auto *IRHS = RHSExprs.begin();
5050 for (const Expr *E : ReductionOps) {
5051 if ((*IPriv)->getType()->isArrayType()) {
5052 // Emit reduction for array section.
5053 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5054 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5056 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5057 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5058 emitReductionCombiner(CGF, E);
5059 });
5060 } else {
5061 // Emit reduction for array subscript or single variable.
5062 emitReductionCombiner(CGF, E);
5063 }
5064 ++IPriv;
5065 ++ILHS;
5066 ++IRHS;
5067 }
5068 Scope.ForceCleanup();
5069 CGF.FinishFunction();
5070 return Fn;
5071}
5072
5074 const Expr *ReductionOp,
5075 const Expr *PrivateRef,
5076 const DeclRefExpr *LHS,
5077 const DeclRefExpr *RHS) {
5078 if (PrivateRef->getType()->isArrayType()) {
5079 // Emit reduction for array section.
5080 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5081 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5083 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5084 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5085 emitReductionCombiner(CGF, ReductionOp);
5086 });
5087 } else {
5088 // Emit reduction for array subscript or single variable.
5089 emitReductionCombiner(CGF, ReductionOp);
5090 }
5091}
5092
5093static std::string generateUniqueName(CodeGenModule &CGM,
5094 llvm::StringRef Prefix, const Expr *Ref);
5095
5097 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5098 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5099
5100 // Create a shared global variable (__shared_reduction_var) to accumulate the
5101 // final result.
5102 //
5103 // Call __kmpc_barrier to synchronize threads before initialization.
5104 //
5105 // The master thread (thread_id == 0) initializes __shared_reduction_var
5106 // with the identity value or initializer.
5107 //
5108 // Call __kmpc_barrier to synchronize before combining.
5109 // For each i:
5110 // - Thread enters critical section.
5111 // - Reads its private value from LHSExprs[i].
5112 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5113 // Privates[i]).
5114 // - Exits critical section.
5115 //
5116 // Call __kmpc_barrier after combining.
5117 //
5118 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5119 //
5120 // Final __kmpc_barrier to synchronize after broadcasting
5121 QualType PrivateType = Privates->getType();
5122 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
5123
5124 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
5125 std::string ReductionVarNameStr;
5126 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
5127 ReductionVarNameStr =
5128 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
5129 else
5130 ReductionVarNameStr = "unnamed_priv_var";
5131
5132 // Create an internal shared variable
5133 std::string SharedName =
5134 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
5135 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5136 LLVMType, ".omp.reduction." + SharedName);
5137
5138 SharedVar->setAlignment(
5139 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
5140
5141 Address SharedResult =
5142 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
5143
5144 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5145 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5146 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5147
5148 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5149 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5150
5151 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5152 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5153 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5154
5155 CGF.EmitBlock(InitBB);
5156
5157 auto EmitSharedInit = [&]() {
5158 if (UDR) { // Check if it's a User-Defined Reduction
5159 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5160 std::pair<llvm::Function *, llvm::Function *> FnPair =
5162 llvm::Function *InitializerFn = FnPair.second;
5163 if (InitializerFn) {
5164 if (const auto *CE =
5165 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5166 const auto *OutDRE = cast<DeclRefExpr>(
5167 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5168 ->getSubExpr());
5169 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5170
5171 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5172 LocalScope.addPrivate(OutVD, SharedResult);
5173
5174 (void)LocalScope.Privatize();
5175 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5176 CE->getCallee()->IgnoreParenImpCasts())) {
5178 CGF, OVE, RValue::get(InitializerFn));
5179 CGF.EmitIgnoredExpr(CE);
5180 } else {
5181 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5182 PrivateType.getQualifiers(),
5183 /*IsInitializer=*/true);
5184 }
5185 } else {
5186 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5187 PrivateType.getQualifiers(),
5188 /*IsInitializer=*/true);
5189 }
5190 } else {
5191 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5192 PrivateType.getQualifiers(),
5193 /*IsInitializer=*/true);
5194 }
5195 } else {
5196 // EmitNullInitialization handles default construction for C++ classes
5197 // and zeroing for scalars, which is a reasonable default.
5198 CGF.EmitNullInitialization(SharedResult, PrivateType);
5199 }
5200 return; // UDR initialization handled
5201 }
5202 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5203 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5204 if (const Expr *InitExpr = VD->getInit()) {
5205 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5206 PrivateType.getQualifiers(), true);
5207 return;
5208 }
5209 }
5210 }
5211 CGF.EmitNullInitialization(SharedResult, PrivateType);
5212 };
5213 EmitSharedInit();
5214 CGF.Builder.CreateBr(InitEndBB);
5215 CGF.EmitBlock(InitEndBB);
5216
5217 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5218 CGM.getModule(), OMPRTL___kmpc_barrier),
5219 BarrierArgs);
5220
5221 const Expr *ReductionOp = ReductionOps;
5222 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5223 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5224 LValue LHSLV = CGF.EmitLValue(Privates);
5225
5226 auto EmitCriticalReduction = [&](auto ReductionGen) {
5227 std::string CriticalName = getName({"reduction_critical"});
5228 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5229 };
5230
5231 if (CurrentUDR) {
5232 // Handle user-defined reduction.
5233 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5234 Action.Enter(CGF);
5235 std::pair<llvm::Function *, llvm::Function *> FnPair =
5236 getUserDefinedReduction(CurrentUDR);
5237 if (FnPair.first) {
5238 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5239 const auto *OutDRE = cast<DeclRefExpr>(
5240 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5241 ->getSubExpr());
5242 const auto *InDRE = cast<DeclRefExpr>(
5243 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5244 ->getSubExpr());
5245 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5246 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5247 SharedLV.getAddress());
5248 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5249 LHSLV.getAddress());
5250 (void)LocalScope.Privatize();
5251 emitReductionCombiner(CGF, ReductionOp);
5252 }
5253 }
5254 };
5255 EmitCriticalReduction(ReductionGen);
5256 } else {
5257 // Handle built-in reduction operations.
5258#ifndef NDEBUG
5259 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5260 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5261 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5262
5263 const Expr *AssignRHS = nullptr;
5264 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5265 if (BinOp->getOpcode() == BO_Assign)
5266 AssignRHS = BinOp->getRHS();
5267 } else if (const auto *OpCall =
5268 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5269 if (OpCall->getOperator() == OO_Equal)
5270 AssignRHS = OpCall->getArg(1);
5271 }
5272
5273 assert(AssignRHS &&
5274 "Private Variable Reduction : Invalid ReductionOp expression");
5275#endif
5276
5277 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5278 Action.Enter(CGF);
5279 const auto *OmpOutDRE =
5280 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5281 const auto *OmpInDRE =
5282 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5283 assert(
5284 OmpOutDRE && OmpInDRE &&
5285 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5286 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5287 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5288 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5289 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5290 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5291 (void)LocalScope.Privatize();
5292 // Emit the actual reduction operation
5293 CGF.EmitIgnoredExpr(ReductionOp);
5294 };
5295 EmitCriticalReduction(ReductionGen);
5296 }
5297
5298 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5299 CGM.getModule(), OMPRTL___kmpc_barrier),
5300 BarrierArgs);
5301
5302 // Broadcast final result
5303 bool IsAggregate = PrivateType->isAggregateType();
5304 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5305 llvm::Value *FinalResultVal = nullptr;
5306 Address FinalResultAddr = Address::invalid();
5307
5308 if (IsAggregate)
5309 FinalResultAddr = SharedResult;
5310 else
5311 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5312
5313 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5314 if (IsAggregate) {
5315 CGF.EmitAggregateCopy(TargetLHSLV,
5316 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5317 PrivateType, AggValueSlot::DoesNotOverlap, false);
5318 } else {
5319 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5320 }
5321 // Final synchronization barrier
5322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5323 CGM.getModule(), OMPRTL___kmpc_barrier),
5324 BarrierArgs);
5325
5326 // Combiner with original list item
5327 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5328 PrePostActionTy &Action) {
5329 Action.Enter(CGF);
5330 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5331 cast<DeclRefExpr>(LHSExprs),
5332 cast<DeclRefExpr>(RHSExprs));
5333 };
5334 EmitCriticalReduction(OriginalListCombiner);
5335}
5336
5338 ArrayRef<const Expr *> OrgPrivates,
5339 ArrayRef<const Expr *> OrgLHSExprs,
5340 ArrayRef<const Expr *> OrgRHSExprs,
5341 ArrayRef<const Expr *> OrgReductionOps,
5342 ReductionOptionsTy Options) {
5343 if (!CGF.HaveInsertPoint())
5344 return;
5345
5346 bool WithNowait = Options.WithNowait;
5347 bool SimpleReduction = Options.SimpleReduction;
5348
5349 // Next code should be emitted for reduction:
5350 //
5351 // static kmp_critical_name lock = { 0 };
5352 //
5353 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5354 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5355 // ...
5356 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5357 // *(Type<n>-1*)rhs[<n>-1]);
5358 // }
5359 //
5360 // ...
5361 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5362 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5363 // RedList, reduce_func, &<lock>)) {
5364 // case 1:
5365 // ...
5366 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5367 // ...
5368 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5369 // break;
5370 // case 2:
5371 // ...
5372 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5373 // ...
5374 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5375 // break;
5376 // default:;
5377 // }
5378 //
5379 // if SimpleReduction is true, only the next code is generated:
5380 // ...
5381 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5382 // ...
5383
5384 ASTContext &C = CGM.getContext();
5385
5386 if (SimpleReduction) {
5388 const auto *IPriv = OrgPrivates.begin();
5389 const auto *ILHS = OrgLHSExprs.begin();
5390 const auto *IRHS = OrgRHSExprs.begin();
5391 for (const Expr *E : OrgReductionOps) {
5392 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5393 cast<DeclRefExpr>(*IRHS));
5394 ++IPriv;
5395 ++ILHS;
5396 ++IRHS;
5397 }
5398 return;
5399 }
5400
5401 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5402 // Only keep entries where the corresponding variable is not private.
5403 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5404 FilteredRHSExprs, FilteredReductionOps;
5405 for (unsigned I : llvm::seq<unsigned>(
5406 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5407 if (!Options.IsPrivateVarReduction[I]) {
5408 FilteredPrivates.emplace_back(OrgPrivates[I]);
5409 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5410 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5411 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5412 }
5413 }
5414 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5415 // processing.
5416 ArrayRef<const Expr *> Privates = FilteredPrivates;
5417 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5418 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5419 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5420
5421 // 1. Build a list of reduction variables.
5422 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5423 auto Size = RHSExprs.size();
5424 for (const Expr *E : Privates) {
5425 if (E->getType()->isVariablyModifiedType())
5426 // Reserve place for array size.
5427 ++Size;
5428 }
5429 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5430 QualType ReductionArrayTy = C.getConstantArrayType(
5431 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5432 /*IndexTypeQuals=*/0);
5433 RawAddress ReductionList =
5434 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5435 const auto *IPriv = Privates.begin();
5436 unsigned Idx = 0;
5437 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5438 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5439 CGF.Builder.CreateStore(
5441 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5442 Elem);
5443 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5444 // Store array size.
5445 ++Idx;
5446 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5447 llvm::Value *Size = CGF.Builder.CreateIntCast(
5448 CGF.getVLASize(
5449 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5450 .NumElts,
5451 CGF.SizeTy, /*isSigned=*/false);
5452 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5453 Elem);
5454 }
5455 }
5456
5457 // 2. Emit reduce_func().
5458 llvm::Function *ReductionFn = emitReductionFunction(
5459 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5460 Privates, LHSExprs, RHSExprs, ReductionOps);
5461
5462 // 3. Create static kmp_critical_name lock = { 0 };
5463 std::string Name = getName({"reduction"});
5464 llvm::Value *Lock = getCriticalRegionLock(Name);
5465
5466 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5467 // RedList, reduce_func, &<lock>);
5468 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5469 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5470 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5471 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5472 ReductionList.getPointer(), CGF.VoidPtrTy);
5473 llvm::Value *Args[] = {
5474 IdentTLoc, // ident_t *<loc>
5475 ThreadId, // i32 <gtid>
5476 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5477 ReductionArrayTySize, // size_type sizeof(RedList)
5478 RL, // void *RedList
5479 ReductionFn, // void (*) (void *, void *) <reduce_func>
5480 Lock // kmp_critical_name *&<lock>
5481 };
5482 llvm::Value *Res = CGF.EmitRuntimeCall(
5483 OMPBuilder.getOrCreateRuntimeFunction(
5484 CGM.getModule(),
5485 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5486 Args);
5487
5488 // 5. Build switch(res)
5489 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5490 llvm::SwitchInst *SwInst =
5491 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5492
5493 // 6. Build case 1:
5494 // ...
5495 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5496 // ...
5497 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5498 // break;
5499 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5500 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5501 CGF.EmitBlock(Case1BB);
5502
5503 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5504 llvm::Value *EndArgs[] = {
5505 IdentTLoc, // ident_t *<loc>
5506 ThreadId, // i32 <gtid>
5507 Lock // kmp_critical_name *&<lock>
5508 };
5509 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5510 CodeGenFunction &CGF, PrePostActionTy &Action) {
5512 const auto *IPriv = Privates.begin();
5513 const auto *ILHS = LHSExprs.begin();
5514 const auto *IRHS = RHSExprs.begin();
5515 for (const Expr *E : ReductionOps) {
5516 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5517 cast<DeclRefExpr>(*IRHS));
5518 ++IPriv;
5519 ++ILHS;
5520 ++IRHS;
5521 }
5522 };
5524 CommonActionTy Action(
5525 nullptr, {},
5526 OMPBuilder.getOrCreateRuntimeFunction(
5527 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5528 : OMPRTL___kmpc_end_reduce),
5529 EndArgs);
5530 RCG.setAction(Action);
5531 RCG(CGF);
5532
5533 CGF.EmitBranch(DefaultBB);
5534
5535 // 7. Build case 2:
5536 // ...
5537 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5538 // ...
5539 // break;
5540 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5541 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5542 CGF.EmitBlock(Case2BB);
5543
5544 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5545 CodeGenFunction &CGF, PrePostActionTy &Action) {
5546 const auto *ILHS = LHSExprs.begin();
5547 const auto *IRHS = RHSExprs.begin();
5548 const auto *IPriv = Privates.begin();
5549 for (const Expr *E : ReductionOps) {
5550 const Expr *XExpr = nullptr;
5551 const Expr *EExpr = nullptr;
5552 const Expr *UpExpr = nullptr;
5553 BinaryOperatorKind BO = BO_Comma;
5554 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5555 if (BO->getOpcode() == BO_Assign) {
5556 XExpr = BO->getLHS();
5557 UpExpr = BO->getRHS();
5558 }
5559 }
5560 // Try to emit update expression as a simple atomic.
5561 const Expr *RHSExpr = UpExpr;
5562 if (RHSExpr) {
5563 // Analyze RHS part of the whole expression.
5564 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5565 RHSExpr->IgnoreParenImpCasts())) {
5566 // If this is a conditional operator, analyze its condition for
5567 // min/max reduction operator.
5568 RHSExpr = ACO->getCond();
5569 }
5570 if (const auto *BORHS =
5571 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5572 EExpr = BORHS->getRHS();
5573 BO = BORHS->getOpcode();
5574 }
5575 }
5576 if (XExpr) {
5577 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5578 auto &&AtomicRedGen = [BO, VD,
5579 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5580 const Expr *EExpr, const Expr *UpExpr) {
5581 LValue X = CGF.EmitLValue(XExpr);
5582 RValue E;
5583 if (EExpr)
5584 E = CGF.EmitAnyExpr(EExpr);
5585 CGF.EmitOMPAtomicSimpleUpdateExpr(
5586 X, E, BO, /*IsXLHSInRHSPart=*/true,
5587 llvm::AtomicOrdering::Monotonic, Loc,
5588 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5589 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5590 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5591 CGF.emitOMPSimpleStore(
5592 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5593 VD->getType().getNonReferenceType(), Loc);
5594 PrivateScope.addPrivate(VD, LHSTemp);
5595 (void)PrivateScope.Privatize();
5596 return CGF.EmitAnyExpr(UpExpr);
5597 });
5598 };
5599 if ((*IPriv)->getType()->isArrayType()) {
5600 // Emit atomic reduction for array section.
5601 const auto *RHSVar =
5602 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5603 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5604 AtomicRedGen, XExpr, EExpr, UpExpr);
5605 } else {
5606 // Emit atomic reduction for array subscript or single variable.
5607 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5608 }
5609 } else {
5610 // Emit as a critical region.
5611 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5612 const Expr *, const Expr *) {
5613 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5614 std::string Name = RT.getName({"atomic_reduction"});
5616 CGF, Name,
5617 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5618 Action.Enter(CGF);
5619 emitReductionCombiner(CGF, E);
5620 },
5621 Loc);
5622 };
5623 if ((*IPriv)->getType()->isArrayType()) {
5624 const auto *LHSVar =
5625 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5626 const auto *RHSVar =
5627 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5628 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5629 CritRedGen);
5630 } else {
5631 CritRedGen(CGF, nullptr, nullptr, nullptr);
5632 }
5633 }
5634 ++ILHS;
5635 ++IRHS;
5636 ++IPriv;
5637 }
5638 };
5639 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5640 if (!WithNowait) {
5641 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5642 llvm::Value *EndArgs[] = {
5643 IdentTLoc, // ident_t *<loc>
5644 ThreadId, // i32 <gtid>
5645 Lock // kmp_critical_name *&<lock>
5646 };
5647 CommonActionTy Action(nullptr, {},
5648 OMPBuilder.getOrCreateRuntimeFunction(
5649 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5650 EndArgs);
5651 AtomicRCG.setAction(Action);
5652 AtomicRCG(CGF);
5653 } else {
5654 AtomicRCG(CGF);
5655 }
5656
5657 CGF.EmitBranch(DefaultBB);
5658 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5659 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5660 "PrivateVarReduction: Privates size mismatch");
5661 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5662 "PrivateVarReduction: ReductionOps size mismatch");
5663 for (unsigned I : llvm::seq<unsigned>(
5664 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5665 if (Options.IsPrivateVarReduction[I])
5666 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5667 OrgRHSExprs[I], OrgReductionOps[I]);
5668 }
5669}
5670
5671/// Generates unique name for artificial threadprivate variables.
5672/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5673static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5674 const Expr *Ref) {
5675 SmallString<256> Buffer;
5676 llvm::raw_svector_ostream Out(Buffer);
5677 const clang::DeclRefExpr *DE;
5678 const VarDecl *D = ::getBaseDecl(Ref, DE);
5679 if (!D)
5680 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5681 D = D->getCanonicalDecl();
5682 std::string Name = CGM.getOpenMPRuntime().getName(
5683 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5684 Out << Prefix << Name << "_"
5686 return std::string(Out.str());
5687}
5688
5689/// Emits reduction initializer function:
5690/// \code
5691/// void @.red_init(void* %arg, void* %orig) {
5692/// %0 = bitcast void* %arg to <type>*
5693/// store <type> <init>, <type>* %0
5694/// ret void
5695/// }
5696/// \endcode
5697static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5698 SourceLocation Loc,
5699 ReductionCodeGen &RCG, unsigned N) {
5700 ASTContext &C = CGM.getContext();
5701 QualType VoidPtrTy = C.VoidPtrTy;
5702 VoidPtrTy.addRestrict();
5703 FunctionArgList Args;
5704 auto *Param =
5705 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5706 VoidPtrTy, ImplicitParamKind::Other);
5707 auto *ParamOrig =
5708 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5709 VoidPtrTy, ImplicitParamKind::Other);
5710 Args.emplace_back(Param);
5711 Args.emplace_back(ParamOrig);
5712 const auto &FnInfo =
5713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5714 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5715 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5716 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5717 Name, &CGM.getModule());
5718 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5719 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5720 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5721 Fn->setDoesNotRecurse();
5722 CodeGenFunction CGF(CGM);
5723 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5724 QualType PrivateType = RCG.getPrivateType(N);
5725 Address PrivateAddr = CGF.EmitLoadOfPointer(
5726 CGF.GetAddrOfLocalVar(Param).withElementType(CGF.Builder.getPtrTy(0)),
5727 C.getPointerType(PrivateType)->castAs<PointerType>());
5728 llvm::Value *Size = nullptr;
5729 // If the size of the reduction item is non-constant, load it from global
5730 // threadprivate variable.
5731 if (RCG.getSizes(N).second) {
5733 CGF, CGM.getContext().getSizeType(),
5734 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5735 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5736 CGM.getContext().getSizeType(), Loc);
5737 }
5738 RCG.emitAggregateType(CGF, N, Size);
5739 Address OrigAddr = Address::invalid();
5740 // If initializer uses initializer from declare reduction construct, emit a
5741 // pointer to the address of the original reduction item (reuired by reduction
5742 // initializer)
5743 if (RCG.usesReductionInitializer(N)) {
5744 Address SharedAddr = CGF.GetAddrOfLocalVar(ParamOrig);
5745 OrigAddr = CGF.EmitLoadOfPointer(
5746 SharedAddr,
5747 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5748 }
5749 // Emit the initializer:
5750 // %0 = bitcast void* %arg to <type>*
5751 // store <type> <init>, <type>* %0
5752 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5753 [](CodeGenFunction &) { return false; });
5754 CGF.FinishFunction();
5755 return Fn;
5756}
5757
5758/// Emits reduction combiner function:
5759/// \code
5760/// void @.red_comb(void* %arg0, void* %arg1) {
5761/// %lhs = bitcast void* %arg0 to <type>*
5762/// %rhs = bitcast void* %arg1 to <type>*
5763/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5764/// store <type> %2, <type>* %lhs
5765/// ret void
5766/// }
5767/// \endcode
5768static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5769 SourceLocation Loc,
5770 ReductionCodeGen &RCG, unsigned N,
5771 const Expr *ReductionOp,
5772 const Expr *LHS, const Expr *RHS,
5773 const Expr *PrivateRef) {
5774 ASTContext &C = CGM.getContext();
5775 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5776 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5777 FunctionArgList Args;
5778 auto *ParamInOut =
5779 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5780 C.VoidPtrTy, ImplicitParamKind::Other);
5781 auto *ParamIn =
5782 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5783 C.VoidPtrTy, ImplicitParamKind::Other);
5784 Args.emplace_back(ParamInOut);
5785 Args.emplace_back(ParamIn);
5786 const auto &FnInfo =
5787 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5788 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5789 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5790 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5791 Name, &CGM.getModule());
5792 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5793 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5794 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5795 Fn->setDoesNotRecurse();
5796 CodeGenFunction CGF(CGM);
5797 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5798 llvm::Value *Size = nullptr;
5799 // If the size of the reduction item is non-constant, load it from global
5800 // threadprivate variable.
5801 if (RCG.getSizes(N).second) {
5803 CGF, CGM.getContext().getSizeType(),
5804 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5805 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5806 CGM.getContext().getSizeType(), Loc);
5807 }
5808 RCG.emitAggregateType(CGF, N, Size);
5809 // Remap lhs and rhs variables to the addresses of the function arguments.
5810 // %lhs = bitcast void* %arg0 to <type>*
5811 // %rhs = bitcast void* %arg1 to <type>*
5812 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5813 PrivateScope.addPrivate(
5814 LHSVD,
5815 // Pull out the pointer to the variable.
5817 CGF.GetAddrOfLocalVar(ParamInOut)
5818 .withElementType(CGF.Builder.getPtrTy(0)),
5819 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5820 PrivateScope.addPrivate(
5821 RHSVD,
5822 // Pull out the pointer to the variable.
5825 CGF.Builder.getPtrTy(0)),
5826 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5827 PrivateScope.Privatize();
5828 // Emit the combiner body:
5829 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5830 // store <type> %2, <type>* %lhs
5832 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5833 cast<DeclRefExpr>(RHS));
5834 CGF.FinishFunction();
5835 return Fn;
5836}
5837
5838/// Emits reduction finalizer function:
5839/// \code
5840/// void @.red_fini(void* %arg) {
5841/// %0 = bitcast void* %arg to <type>*
5842/// <destroy>(<type>* %0)
5843/// ret void
5844/// }
5845/// \endcode
5846static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5847 SourceLocation Loc,
5848 ReductionCodeGen &RCG, unsigned N) {
5849 if (!RCG.needCleanups(N))
5850 return nullptr;
5851 ASTContext &C = CGM.getContext();
5852 FunctionArgList Args;
5853 auto *Param =
5854 ImplicitParamDecl::Create(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5855 C.VoidPtrTy, ImplicitParamKind::Other);
5856 Args.emplace_back(Param);
5857 const auto &FnInfo =
5858 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5859 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5860 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5861 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5862 Name, &CGM.getModule());
5863 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5864 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5865 Fn->addFnAttr("sample-profile-suffix-elision-policy", "selected");
5866 Fn->setDoesNotRecurse();
5867 CodeGenFunction CGF(CGM);
5868 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5869 Address PrivateAddr = CGF.EmitLoadOfPointer(
5870 CGF.GetAddrOfLocalVar(Param), C.VoidPtrTy.castAs<PointerType>());
5871 llvm::Value *Size = nullptr;
5872 // If the size of the reduction item is non-constant, load it from global
5873 // threadprivate variable.
5874 if (RCG.getSizes(N).second) {
5876 CGF, CGM.getContext().getSizeType(),
5877 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5878 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5879 CGM.getContext().getSizeType(), Loc);
5880 }
5881 RCG.emitAggregateType(CGF, N, Size);
5882 // Emit the finalizer body:
5883 // <destroy>(<type>* %0)
5884 RCG.emitCleanups(CGF, N, PrivateAddr);
5885 CGF.FinishFunction(Loc);
5886 return Fn;
5887}
5888
5891 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5892 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5893 return nullptr;
5894
5895 // Build typedef struct:
5896 // kmp_taskred_input {
5897 // void *reduce_shar; // shared reduction item
5898 // void *reduce_orig; // original reduction item used for initialization
5899 // size_t reduce_size; // size of data item
5900 // void *reduce_init; // data initialization routine
5901 // void *reduce_fini; // data finalization routine
5902 // void *reduce_comb; // data combiner routine
5903 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5904 // } kmp_taskred_input_t;
5905 ASTContext &C = CGM.getContext();
5906 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5907 RD->startDefinition();
5908 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5909 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5910 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5911 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5912 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5913 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5914 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5915 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5916 RD->completeDefinition();
5917 CanQualType RDType = C.getCanonicalTagType(RD);
5918 unsigned Size = Data.ReductionVars.size();
5919 llvm::APInt ArraySize(/*numBits=*/64, Size);
5920 QualType ArrayRDType =
5921 C.getConstantArrayType(RDType, ArraySize, nullptr,
5922 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5923 // kmp_task_red_input_t .rd_input.[Size];
5924 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5925 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5926 Data.ReductionCopies, Data.ReductionOps);
5927 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5928 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5929 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5930 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5931 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5932 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5933 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5934 ".rd_input.gep.");
5935 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5936 // ElemLVal.reduce_shar = &Shareds[Cnt];
5937 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5938 RCG.emitSharedOrigLValue(CGF, Cnt);
5939 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5940 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5941 // ElemLVal.reduce_orig = &Origs[Cnt];
5942 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5943 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5944 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5945 RCG.emitAggregateType(CGF, Cnt);
5946 llvm::Value *SizeValInChars;
5947 llvm::Value *SizeVal;
5948 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5949 // We use delayed creation/initialization for VLAs and array sections. It is
5950 // required because runtime does not provide the way to pass the sizes of
5951 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5952 // threadprivate global variables are used to store these values and use
5953 // them in the functions.
5954 bool DelayedCreation = !!SizeVal;
5955 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5956 /*isSigned=*/false);
5957 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5958 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5959 // ElemLVal.reduce_init = init;
5960 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5961 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5962 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5963 // ElemLVal.reduce_fini = fini;
5964 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5965 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5966 llvm::Value *FiniAddr =
5967 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5968 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5969 // ElemLVal.reduce_comb = comb;
5970 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5971 llvm::Value *CombAddr = emitReduceCombFunction(
5972 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5973 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5974 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5975 // ElemLVal.flags = 0;
5976 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5977 if (DelayedCreation) {
5979 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5980 FlagsLVal);
5981 } else
5982 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5983 }
5984 if (Data.IsReductionWithTaskMod) {
5985 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5986 // is_ws, int num, void *data);
5987 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5988 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5989 CGM.IntTy, /*isSigned=*/true);
5990 llvm::Value *Args[] = {
5991 IdentTLoc, GTid,
5992 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5993 /*isSigned=*/true),
5994 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5996 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5997 return CGF.EmitRuntimeCall(
5998 OMPBuilder.getOrCreateRuntimeFunction(
5999 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6000 Args);
6001 }
6002 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6003 llvm::Value *Args[] = {
6004 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6005 /*isSigned=*/true),
6006 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6008 CGM.VoidPtrTy)};
6009 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6010 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6011 Args);
6012}
6013
6015 SourceLocation Loc,
6016 bool IsWorksharingReduction) {
6017 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6018 // is_ws, int num, void *data);
6019 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6020 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6021 CGM.IntTy, /*isSigned=*/true);
6022 llvm::Value *Args[] = {IdentTLoc, GTid,
6023 llvm::ConstantInt::get(CGM.IntTy,
6024 IsWorksharingReduction ? 1 : 0,
6025 /*isSigned=*/true)};
6026 (void)CGF.EmitRuntimeCall(
6027 OMPBuilder.getOrCreateRuntimeFunction(
6028 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6029 Args);
6030}
6031
6033 SourceLocation Loc,
6034 ReductionCodeGen &RCG,
6035 unsigned N) {
6036 auto Sizes = RCG.getSizes(N);
6037 // Emit threadprivate global variable if the type is non-constant
6038 // (Sizes.second = nullptr).
6039 if (Sizes.second) {
6040 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6041 /*isSigned=*/false);
6043 CGF, CGM.getContext().getSizeType(),
6044 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6045 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6046 }
6047}
6048
6050 SourceLocation Loc,
6051 llvm::Value *ReductionsPtr,
6052 LValue SharedLVal) {
6053 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6054 // *d);
6055 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6056 CGM.IntTy,
6057 /*isSigned=*/true),
6058 ReductionsPtr,
6060 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6061 return Address(
6062 CGF.EmitRuntimeCall(
6063 OMPBuilder.getOrCreateRuntimeFunction(
6064 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6065 Args),
6066 CGF.Int8Ty, SharedLVal.getAlignment());
6067}
6068
6070 const OMPTaskDataTy &Data) {
6071 if (!CGF.HaveInsertPoint())
6072 return;
6073
6074 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6075 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6076 OMPBuilder.createTaskwait(CGF.Builder);
6077 } else {
6078 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6079 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6080 auto &M = CGM.getModule();
6081 Address DependenciesArray = Address::invalid();
6082 llvm::Value *NumOfElements;
6083 std::tie(NumOfElements, DependenciesArray) =
6084 emitDependClause(CGF, Data.Dependences, Loc);
6085 if (!Data.Dependences.empty()) {
6086 llvm::Value *DepWaitTaskArgs[7];
6087 DepWaitTaskArgs[0] = UpLoc;
6088 DepWaitTaskArgs[1] = ThreadID;
6089 DepWaitTaskArgs[2] = NumOfElements;
6090 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6091 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6092 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6093 DepWaitTaskArgs[6] =
6094 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
6095
6096 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6097
6098 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6099 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6100 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6101 // kmp_int32 has_no_wait); if dependence info is specified.
6102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6103 M, OMPRTL___kmpc_omp_taskwait_deps_51),
6104 DepWaitTaskArgs);
6105
6106 } else {
6107
6108 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6109 // global_tid);
6110 llvm::Value *Args[] = {UpLoc, ThreadID};
6111 // Ignore return result until untied tasks are supported.
6112 CGF.EmitRuntimeCall(
6113 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6114 Args);
6115 }
6116 }
6117
6118 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6119 Region->emitUntiedSwitch(CGF);
6120}
6121
6123 OpenMPDirectiveKind InnerKind,
6124 const RegionCodeGenTy &CodeGen,
6125 bool HasCancel) {
6126 if (!CGF.HaveInsertPoint())
6127 return;
6128 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6129 InnerKind != OMPD_critical &&
6130 InnerKind != OMPD_master &&
6131 InnerKind != OMPD_masked);
6132 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6133}
6134
6135namespace {
6136enum RTCancelKind {
6137 CancelNoreq = 0,
6138 CancelParallel = 1,
6139 CancelLoop = 2,
6140 CancelSections = 3,
6141 CancelTaskgroup = 4
6142};
6143} // anonymous namespace
6144
6145static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6146 RTCancelKind CancelKind = CancelNoreq;
6147 if (CancelRegion == OMPD_parallel)
6148 CancelKind = CancelParallel;
6149 else if (CancelRegion == OMPD_for)
6150 CancelKind = CancelLoop;
6151 else if (CancelRegion == OMPD_sections)
6152 CancelKind = CancelSections;
6153 else {
6154 assert(CancelRegion == OMPD_taskgroup);
6155 CancelKind = CancelTaskgroup;
6156 }
6157 return CancelKind;
6158}
6159
6162 OpenMPDirectiveKind CancelRegion) {
6163 if (!CGF.HaveInsertPoint())
6164 return;
6165 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6166 // global_tid, kmp_int32 cncl_kind);
6167 if (auto *OMPRegionInfo =
6168 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6169 // For 'cancellation point taskgroup', the task region info may not have a
6170 // cancel. This may instead happen in another adjacent task.
6171 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6172 llvm::Value *Args[] = {
6173 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6174 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6175 // Ignore return result until untied tasks are supported.
6176 llvm::Value *Result = CGF.EmitRuntimeCall(
6177 OMPBuilder.getOrCreateRuntimeFunction(
6178 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6179 Args);
6180 // if (__kmpc_cancellationpoint()) {
6181 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6182 // exit from construct;
6183 // }
6184 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6185 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6186 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6187 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6188 CGF.EmitBlock(ExitBB);
6189 if (CancelRegion == OMPD_parallel)
6190 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6191 // exit from construct;
6192 CodeGenFunction::JumpDest CancelDest =
6193 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6194 CGF.EmitBranchThroughCleanup(CancelDest);
6195 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6196 }
6197 }
6198}
6199
6201 const Expr *IfCond,
6202 OpenMPDirectiveKind CancelRegion) {
6203 if (!CGF.HaveInsertPoint())
6204 return;
6205 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6206 // kmp_int32 cncl_kind);
6207 auto &M = CGM.getModule();
6208 if (auto *OMPRegionInfo =
6209 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6210 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6211 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6212 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6213 llvm::Value *Args[] = {
6214 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6215 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6216 // Ignore return result until untied tasks are supported.
6217 llvm::Value *Result = CGF.EmitRuntimeCall(
6218 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6219 // if (__kmpc_cancel()) {
6220 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6221 // exit from construct;
6222 // }
6223 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6224 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6225 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6226 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6227 CGF.EmitBlock(ExitBB);
6228 if (CancelRegion == OMPD_parallel)
6229 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6230 // exit from construct;
6231 CodeGenFunction::JumpDest CancelDest =
6232 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6233 CGF.EmitBranchThroughCleanup(CancelDest);
6234 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6235 };
6236 if (IfCond) {
6237 emitIfClause(CGF, IfCond, ThenGen,
6238 [](CodeGenFunction &, PrePostActionTy &) {});
6239 } else {
6240 RegionCodeGenTy ThenRCG(ThenGen);
6241 ThenRCG(CGF);
6242 }
6243 }
6244}
6245
6246namespace {
6247/// Cleanup action for uses_allocators support.
6248class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6250
6251public:
6252 OMPUsesAllocatorsActionTy(
6253 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6254 : Allocators(Allocators) {}
6255 void Enter(CodeGenFunction &CGF) override {
6256 if (!CGF.HaveInsertPoint())
6257 return;
6258 for (const auto &AllocatorData : Allocators) {
6260 CGF, AllocatorData.first, AllocatorData.second);
6261 }
6262 }
6263 void Exit(CodeGenFunction &CGF) override {
6264 if (!CGF.HaveInsertPoint())
6265 return;
6266 for (const auto &AllocatorData : Allocators) {
6268 AllocatorData.first);
6269 }
6270 }
6271};
6272} // namespace
6273
6275 const OMPExecutableDirective &D, StringRef ParentName,
6276 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6277 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6278 assert(!ParentName.empty() && "Invalid target entry parent name!");
6281 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6282 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6283 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6284 if (!D.AllocatorTraits)
6285 continue;
6286 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6287 }
6288 }
6289 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6290 CodeGen.setAction(UsesAllocatorAction);
6291 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6292 IsOffloadEntry, CodeGen);
6293}
6294
6296 const Expr *Allocator,
6297 const Expr *AllocatorTraits) {
6298 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6299 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6300 // Use default memspace handle.
6301 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6302 llvm::Value *NumTraits = llvm::ConstantInt::get(
6304 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6305 ->getSize()
6306 .getLimitedValue());
6307 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6309 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6310 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6311 AllocatorTraitsLVal.getBaseInfo(),
6312 AllocatorTraitsLVal.getTBAAInfo());
6313 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6314
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6317 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6318 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6319 // Store to allocator.
6321 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6322 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6323 AllocatorVal =
6324 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6325 Allocator->getType(), Allocator->getExprLoc());
6326 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6327}
6328
6330 const Expr *Allocator) {
6331 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6332 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6333 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6334 llvm::Value *AllocatorVal =
6335 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6336 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6337 CGF.getContext().VoidPtrTy,
6338 Allocator->getExprLoc());
6339 (void)CGF.EmitRuntimeCall(
6340 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6341 OMPRTL___kmpc_destroy_allocator),
6342 {ThreadId, AllocatorVal});
6343}
6344
6347 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6348 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6349 "invalid default attrs structure");
6350 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6351 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6352
6353 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6354 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6355 /*UpperBoundOnly=*/true);
6356
6357 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6358 for (auto *A : C->getAttrs()) {
6359 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6360 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6361 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6362 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6363 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6364 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6365 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6366 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6367 &AttrMaxThreadsVal);
6368 else
6369 continue;
6370
6371 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6372 if (AttrMaxThreadsVal > 0)
6373 MaxThreadsVal = MaxThreadsVal > 0
6374 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6375 : AttrMaxThreadsVal;
6376 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6377 if (AttrMaxBlocksVal > 0)
6378 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6379 : AttrMaxBlocksVal;
6380 }
6381 }
6382}
6383
6385 const OMPExecutableDirective &D, StringRef ParentName,
6386 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6387 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6388
6389 llvm::TargetRegionEntryInfo EntryInfo =
6390 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6391
6392 CodeGenFunction CGF(CGM, true);
6393 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6394 [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
6395 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6396
6397 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6398 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6399 if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
6401 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6402 };
6403
6404 cantFail(OMPBuilder.emitTargetRegionFunction(
6405 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6406 OutlinedFnID));
6407
6408 if (!OutlinedFn)
6409 return;
6410
6411 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6412
6413 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6414 for (auto *A : C->getAttrs()) {
6415 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6416 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6417 }
6418 }
6419 registerVTable(D);
6420}
6421
6422/// Checks if the expression is constant or does not have non-trivial function
6423/// calls.
6424static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6425 // We can skip constant expressions.
6426 // We can skip expressions with trivial calls or simple expressions.
6428 !E->hasNonTrivialCall(Ctx)) &&
6429 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6430}
6431
6433 const Stmt *Body) {
6434 const Stmt *Child = Body->IgnoreContainers();
6435 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6436 Child = nullptr;
6437 for (const Stmt *S : C->body()) {
6438 if (const auto *E = dyn_cast<Expr>(S)) {
6439 if (isTrivial(Ctx, E))
6440 continue;
6441 }
6442 // Some of the statements can be ignored.
6445 continue;
6446 // Analyze declarations.
6447 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6448 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6449 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6450 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6451 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6452 isa<UsingDirectiveDecl>(D) ||
6453 isa<OMPDeclareReductionDecl>(D) ||
6454 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6455 return true;
6456 const auto *VD = dyn_cast<VarDecl>(D);
6457 if (!VD)
6458 return false;
6459 return VD->hasGlobalStorage() || !VD->isUsed();
6460 }))
6461 continue;
6462 }
6463 // Found multiple children - cannot get the one child only.
6464 if (Child)
6465 return nullptr;
6466 Child = S;
6467 }
6468 if (Child)
6469 Child = Child->IgnoreContainers();
6470 }
6471 return Child;
6472}
6473
6475 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6476 int32_t &MaxTeamsVal) {
6477
6478 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6479 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6480 "Expected target-based executable directive.");
6481 switch (DirectiveKind) {
6482 case OMPD_target: {
6483 const auto *CS = D.getInnermostCapturedStmt();
6484 const auto *Body =
6485 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6486 const Stmt *ChildStmt =
6488 if (const auto *NestedDir =
6489 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6490 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6491 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6492 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6493 ->getNumTeams()
6494 .front();
6495 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6496 if (auto Constant =
6497 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6498 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6499 return NumTeams;
6500 }
6501 MinTeamsVal = MaxTeamsVal = 0;
6502 return nullptr;
6503 }
6504 MinTeamsVal = MaxTeamsVal = 1;
6505 return nullptr;
6506 }
6507 // A value of -1 is used to check if we need to emit no teams region
6508 MinTeamsVal = MaxTeamsVal = -1;
6509 return nullptr;
6510 }
6511 case OMPD_target_teams_loop:
6512 case OMPD_target_teams:
6513 case OMPD_target_teams_distribute:
6514 case OMPD_target_teams_distribute_simd:
6515 case OMPD_target_teams_distribute_parallel_for:
6516 case OMPD_target_teams_distribute_parallel_for_simd: {
6517 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6518 const Expr *NumTeams =
6519 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6520 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6521 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6522 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6523 return NumTeams;
6524 }
6525 MinTeamsVal = MaxTeamsVal = 0;
6526 return nullptr;
6527 }
6528 case OMPD_target_parallel:
6529 case OMPD_target_parallel_for:
6530 case OMPD_target_parallel_for_simd:
6531 case OMPD_target_parallel_loop:
6532 case OMPD_target_simd:
6533 MinTeamsVal = MaxTeamsVal = 1;
6534 return nullptr;
6535 case OMPD_parallel:
6536 case OMPD_for:
6537 case OMPD_parallel_for:
6538 case OMPD_parallel_loop:
6539 case OMPD_parallel_master:
6540 case OMPD_parallel_sections:
6541 case OMPD_for_simd:
6542 case OMPD_parallel_for_simd:
6543 case OMPD_cancel:
6544 case OMPD_cancellation_point:
6545 case OMPD_ordered:
6546 case OMPD_threadprivate:
6547 case OMPD_allocate:
6548 case OMPD_task:
6549 case OMPD_simd:
6550 case OMPD_tile:
6551 case OMPD_unroll:
6552 case OMPD_sections:
6553 case OMPD_section:
6554 case OMPD_single:
6555 case OMPD_master:
6556 case OMPD_critical:
6557 case OMPD_taskyield:
6558 case OMPD_barrier:
6559 case OMPD_taskwait:
6560 case OMPD_taskgroup:
6561 case OMPD_atomic:
6562 case OMPD_flush:
6563 case OMPD_depobj:
6564 case OMPD_scan:
6565 case OMPD_teams:
6566 case OMPD_target_data:
6567 case OMPD_target_exit_data:
6568 case OMPD_target_enter_data:
6569 case OMPD_distribute:
6570 case OMPD_distribute_simd:
6571 case OMPD_distribute_parallel_for:
6572 case OMPD_distribute_parallel_for_simd:
6573 case OMPD_teams_distribute:
6574 case OMPD_teams_distribute_simd:
6575 case OMPD_teams_distribute_parallel_for:
6576 case OMPD_teams_distribute_parallel_for_simd:
6577 case OMPD_target_update:
6578 case OMPD_declare_simd:
6579 case OMPD_declare_variant:
6580 case OMPD_begin_declare_variant:
6581 case OMPD_end_declare_variant:
6582 case OMPD_declare_target:
6583 case OMPD_end_declare_target:
6584 case OMPD_declare_reduction:
6585 case OMPD_declare_mapper:
6586 case OMPD_taskloop:
6587 case OMPD_taskloop_simd:
6588 case OMPD_master_taskloop:
6589 case OMPD_master_taskloop_simd:
6590 case OMPD_parallel_master_taskloop:
6591 case OMPD_parallel_master_taskloop_simd:
6592 case OMPD_requires:
6593 case OMPD_metadirective:
6594 case OMPD_unknown:
6595 break;
6596 default:
6597 break;
6598 }
6599 llvm_unreachable("Unexpected directive kind.");
6600}
6601
6603 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6604 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6605 "Clauses associated with the teams directive expected to be emitted "
6606 "only for the host!");
6607 CGBuilderTy &Bld = CGF.Builder;
6608 int32_t MinNT = -1, MaxNT = -1;
6609 const Expr *NumTeams =
6610 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6611 if (NumTeams != nullptr) {
6612 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6613
6614 switch (DirectiveKind) {
6615 case OMPD_target: {
6616 const auto *CS = D.getInnermostCapturedStmt();
6617 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6618 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6619 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6620 /*IgnoreResultAssign*/ true);
6621 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6622 /*isSigned=*/true);
6623 }
6624 case OMPD_target_teams:
6625 case OMPD_target_teams_distribute:
6626 case OMPD_target_teams_distribute_simd:
6627 case OMPD_target_teams_distribute_parallel_for:
6628 case OMPD_target_teams_distribute_parallel_for_simd: {
6629 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6630 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6631 /*IgnoreResultAssign*/ true);
6632 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6633 /*isSigned=*/true);
6634 }
6635 default:
6636 break;
6637 }
6638 }
6639
6640 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6641 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6642}
6643
6644/// Check for a num threads constant value (stored in \p DefaultVal), or
6645/// expression (stored in \p E). If the value is conditional (via an if-clause),
6646/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6647/// nullptr, no expression evaluation is perfomed.
6648static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6649 const Expr **E, int32_t &UpperBound,
6650 bool UpperBoundOnly, llvm::Value **CondVal) {
6652 CGF.getContext(), CS->getCapturedStmt());
6653 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6654 if (!Dir)
6655 return;
6656
6657 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6658 // Handle if clause. If if clause present, the number of threads is
6659 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6660 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6661 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6662 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6663 const OMPIfClause *IfClause = nullptr;
6664 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6665 if (C->getNameModifier() == OMPD_unknown ||
6666 C->getNameModifier() == OMPD_parallel) {
6667 IfClause = C;
6668 break;
6669 }
6670 }
6671 if (IfClause) {
6672 const Expr *CondExpr = IfClause->getCondition();
6673 bool Result;
6674 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6675 if (!Result) {
6676 UpperBound = 1;
6677 return;
6678 }
6679 } else {
6681 if (const auto *PreInit =
6682 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6683 for (const auto *I : PreInit->decls()) {
6684 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6685 CGF.EmitVarDecl(cast<VarDecl>(*I));
6686 } else {
6689 CGF.EmitAutoVarCleanups(Emission);
6690 }
6691 }
6692 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6693 }
6694 }
6695 }
6696 }
6697 // Check the value of num_threads clause iff if clause was not specified
6698 // or is not evaluated to false.
6699 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6700 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702 const auto *NumThreadsClause =
6703 Dir->getSingleClause<OMPNumThreadsClause>();
6704 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6705 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6706 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6707 UpperBound =
6708 UpperBound
6709 ? Constant->getZExtValue()
6710 : std::min(UpperBound,
6711 static_cast<int32_t>(Constant->getZExtValue()));
6712 // If we haven't found a upper bound, remember we saw a thread limiting
6713 // clause.
6714 if (UpperBound == -1)
6715 UpperBound = 0;
6716 if (!E)
6717 return;
6718 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6719 if (const auto *PreInit =
6720 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6721 for (const auto *I : PreInit->decls()) {
6722 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6723 CGF.EmitVarDecl(cast<VarDecl>(*I));
6724 } else {
6727 CGF.EmitAutoVarCleanups(Emission);
6728 }
6729 }
6730 }
6731 *E = NTExpr;
6732 }
6733 return;
6734 }
6735 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6736 UpperBound = 1;
6737}
6738
6740 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6741 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6742 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6743 "Clauses associated with the teams directive expected to be emitted "
6744 "only for the host!");
6745 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6746 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6747 "Expected target-based executable directive.");
6748
6749 const Expr *NT = nullptr;
6750 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6751
6752 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6753 if (E->isIntegerConstantExpr(CGF.getContext())) {
6754 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6755 UpperBound = UpperBound ? Constant->getZExtValue()
6756 : std::min(UpperBound,
6757 int32_t(Constant->getZExtValue()));
6758 }
6759 // If we haven't found a upper bound, remember we saw a thread limiting
6760 // clause.
6761 if (UpperBound == -1)
6762 UpperBound = 0;
6763 if (EPtr)
6764 *EPtr = E;
6765 };
6766
6767 auto ReturnSequential = [&]() {
6768 UpperBound = 1;
6769 return NT;
6770 };
6771
6772 switch (DirectiveKind) {
6773 case OMPD_target: {
6774 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6775 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6777 CGF.getContext(), CS->getCapturedStmt());
6778 // TODO: The standard is not clear how to resolve two thread limit clauses,
6779 // let's pick the teams one if it's present, otherwise the target one.
6780 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6781 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6782 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6783 ThreadLimitClause = TLC;
6784 if (ThreadLimitExpr) {
6785 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6788 CGF,
6789 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6790 if (const auto *PreInit =
6791 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6792 for (const auto *I : PreInit->decls()) {
6793 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794 CGF.EmitVarDecl(cast<VarDecl>(*I));
6795 } else {
6798 CGF.EmitAutoVarCleanups(Emission);
6799 }
6800 }
6801 }
6802 }
6803 }
6804 }
6805 if (ThreadLimitClause)
6806 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6807 ThreadLimitExpr);
6808 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6809 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6810 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6811 CS = Dir->getInnermostCapturedStmt();
6813 CGF.getContext(), CS->getCapturedStmt());
6814 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6815 }
6816 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6817 CS = Dir->getInnermostCapturedStmt();
6818 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6819 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6820 return ReturnSequential();
6821 }
6822 return NT;
6823 }
6824 case OMPD_target_teams: {
6825 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6826 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6827 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6828 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6829 ThreadLimitExpr);
6830 }
6831 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6832 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6834 CGF.getContext(), CS->getCapturedStmt());
6835 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6836 if (Dir->getDirectiveKind() == OMPD_distribute) {
6837 CS = Dir->getInnermostCapturedStmt();
6838 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6839 }
6840 }
6841 return NT;
6842 }
6843 case OMPD_target_teams_distribute:
6844 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6845 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6846 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6847 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6848 ThreadLimitExpr);
6849 }
6850 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6851 UpperBoundOnly, CondVal);
6852 return NT;
6853 case OMPD_target_teams_loop:
6854 case OMPD_target_parallel_loop:
6855 case OMPD_target_parallel:
6856 case OMPD_target_parallel_for:
6857 case OMPD_target_parallel_for_simd:
6858 case OMPD_target_teams_distribute_parallel_for:
6859 case OMPD_target_teams_distribute_parallel_for_simd: {
6860 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6861 const OMPIfClause *IfClause = nullptr;
6862 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6863 if (C->getNameModifier() == OMPD_unknown ||
6864 C->getNameModifier() == OMPD_parallel) {
6865 IfClause = C;
6866 break;
6867 }
6868 }
6869 if (IfClause) {
6870 const Expr *Cond = IfClause->getCondition();
6871 bool Result;
6872 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6873 if (!Result)
6874 return ReturnSequential();
6875 } else {
6877 *CondVal = CGF.EvaluateExprAsBool(Cond);
6878 }
6879 }
6880 }
6881 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6882 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6883 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6884 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6885 ThreadLimitExpr);
6886 }
6887 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6888 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6889 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6890 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6891 return NumThreadsClause->getNumThreads();
6892 }
6893 return NT;
6894 }
6895 case OMPD_target_teams_distribute_simd:
6896 case OMPD_target_simd:
6897 return ReturnSequential();
6898 default:
6899 break;
6900 }
6901 llvm_unreachable("Unsupported directive kind.");
6902}
6903
6905 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6906 llvm::Value *NumThreadsVal = nullptr;
6907 llvm::Value *CondVal = nullptr;
6908 llvm::Value *ThreadLimitVal = nullptr;
6909 const Expr *ThreadLimitExpr = nullptr;
6910 int32_t UpperBound = -1;
6911
6913 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6914 &ThreadLimitExpr);
6915
6916 // Thread limit expressions are used below, emit them.
6917 if (ThreadLimitExpr) {
6918 ThreadLimitVal =
6919 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6920 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6921 /*isSigned=*/false);
6922 }
6923
6924 // Generate the num teams expression.
6925 if (UpperBound == 1) {
6926 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6927 } else if (NT) {
6928 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6929 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6930 /*isSigned=*/false);
6931 } else if (ThreadLimitVal) {
6932 // If we do not have a num threads value but a thread limit, replace the
6933 // former with the latter. We know handled the thread limit expression.
6934 NumThreadsVal = ThreadLimitVal;
6935 ThreadLimitVal = nullptr;
6936 } else {
6937 // Default to "0" which means runtime choice.
6938 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6939 NumThreadsVal = CGF.Builder.getInt32(0);
6940 }
6941
6942 // Handle if clause. If if clause present, the number of threads is
6943 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6944 if (CondVal) {
6946 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6947 CGF.Builder.getInt32(1));
6948 }
6949
6950 // If the thread limit and num teams expression were present, take the
6951 // minimum.
6952 if (ThreadLimitVal) {
6953 NumThreadsVal = CGF.Builder.CreateSelect(
6954 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6955 ThreadLimitVal, NumThreadsVal);
6956 }
6957
6958 return NumThreadsVal;
6959}
6960
6961namespace {
6963
6964// Utility to handle information from clauses associated with a given
6965// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6966// It provides a convenient interface to obtain the information and generate
6967// code for that information.
6968class MappableExprsHandler {
6969public:
6970 /// Custom comparator for attach-pointer expressions that compares them by
6971 /// complexity (i.e. their component-depth) first, then by the order in which
6972 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6973 /// different.
6974 struct AttachPtrExprComparator {
6975 const MappableExprsHandler &Handler;
6976 // Cache of previous equality comparison results.
6977 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6978 CachedEqualityComparisons;
6979
6980 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6981 AttachPtrExprComparator() = delete;
6982
6983 // Return true iff LHS is "less than" RHS.
6984 bool operator()(const Expr *LHS, const Expr *RHS) const {
6985 if (LHS == RHS)
6986 return false;
6987
6988 // First, compare by complexity (depth)
6989 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6990 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6991
6992 std::optional<size_t> DepthLHS =
6993 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6994 : std::nullopt;
6995 std::optional<size_t> DepthRHS =
6996 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6997 : std::nullopt;
6998
6999 // std::nullopt (no attach pointer) has lowest complexity
7000 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
7001 // Both have same complexity, now check semantic equality
7002 if (areEqual(LHS, RHS))
7003 return false;
7004 // Different semantically, compare by computation order
7005 return wasComputedBefore(LHS, RHS);
7006 }
7007 if (!DepthLHS.has_value())
7008 return true; // LHS has lower complexity
7009 if (!DepthRHS.has_value())
7010 return false; // RHS has lower complexity
7011
7012 // Both have values, compare by depth (lower depth = lower complexity)
7013 if (DepthLHS.value() != DepthRHS.value())
7014 return DepthLHS.value() < DepthRHS.value();
7015
7016 // Same complexity, now check semantic equality
7017 if (areEqual(LHS, RHS))
7018 return false;
7019 // Different semantically, compare by computation order
7020 return wasComputedBefore(LHS, RHS);
7021 }
7022
7023 public:
7024 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7025 /// results, if available, otherwise does a recursive semantic comparison.
7026 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7027 // Check cache first for faster lookup
7028 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
7029 if (CachedResultIt != CachedEqualityComparisons.end())
7030 return CachedResultIt->second;
7031
7032 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7033
7034 // Cache the result for future lookups (both orders since semantic
7035 // equality is commutative)
7036 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7037 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7038 return ComparisonResult;
7039 }
7040
7041 /// Compare the two attach-ptr expressions by their computation order.
7042 /// Returns true iff LHS was computed before RHS by
7043 /// collectAttachPtrExprInfo().
7044 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7045 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
7046 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
7047
7048 return OrderLHS < OrderRHS;
7049 }
7050
7051 private:
7052 /// Helper function to compare attach-pointer expressions semantically.
7053 /// This function handles various expression types that can be part of an
7054 /// attach-pointer.
7055 /// TODO: Not urgent, but we should ideally return true when comparing
7056 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7057 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7058 if (LHS == RHS)
7059 return true;
7060
7061 // If only one is null, they aren't equal
7062 if (!LHS || !RHS)
7063 return false;
7064
7065 ASTContext &Ctx = Handler.CGF.getContext();
7066 // Strip away parentheses and no-op casts to get to the core expression
7067 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7068 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7069
7070 // Direct pointer comparison of the underlying expressions
7071 if (LHS == RHS)
7072 return true;
7073
7074 // Check if the expression classes match
7075 if (LHS->getStmtClass() != RHS->getStmtClass())
7076 return false;
7077
7078 // Handle DeclRefExpr (variable references)
7079 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
7080 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
7081 if (!RD)
7082 return false;
7083 return LD->getDecl()->getCanonicalDecl() ==
7084 RD->getDecl()->getCanonicalDecl();
7085 }
7086
7087 // Handle ArraySubscriptExpr (array indexing like a[i])
7088 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
7089 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
7090 if (!RA)
7091 return false;
7092 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
7093 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
7094 }
7095
7096 // Handle MemberExpr (member access like s.m or p->m)
7097 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
7098 const auto *RM = dyn_cast<MemberExpr>(RHS);
7099 if (!RM)
7100 return false;
7101 if (LM->getMemberDecl()->getCanonicalDecl() !=
7102 RM->getMemberDecl()->getCanonicalDecl())
7103 return false;
7104 return areSemanticallyEqual(LM->getBase(), RM->getBase());
7105 }
7106
7107 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7108 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
7109 const auto *RU = dyn_cast<UnaryOperator>(RHS);
7110 if (!RU)
7111 return false;
7112 if (LU->getOpcode() != RU->getOpcode())
7113 return false;
7114 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
7115 }
7116
7117 // Handle BinaryOperator (binary operations like p + offset)
7118 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
7119 const auto *RB = dyn_cast<BinaryOperator>(RHS);
7120 if (!RB)
7121 return false;
7122 if (LB->getOpcode() != RB->getOpcode())
7123 return false;
7124 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
7125 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
7126 }
7127
7128 // Handle ArraySectionExpr (array sections like a[0:1])
7129 // Attach pointers should not contain array-sections, but currently we
7130 // don't emit an error.
7131 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
7132 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
7133 if (!RAS)
7134 return false;
7135 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
7136 areSemanticallyEqual(LAS->getLowerBound(),
7137 RAS->getLowerBound()) &&
7138 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
7139 }
7140
7141 // Handle CastExpr (explicit casts)
7142 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
7143 const auto *RC = dyn_cast<CastExpr>(RHS);
7144 if (!RC)
7145 return false;
7146 if (LC->getCastKind() != RC->getCastKind())
7147 return false;
7148 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
7149 }
7150
7151 // Handle CXXThisExpr (this pointer)
7152 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
7153 return true;
7154
7155 // Handle IntegerLiteral (integer constants)
7156 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
7157 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
7158 if (!RI)
7159 return false;
7160 return LI->getValue() == RI->getValue();
7161 }
7162
7163 // Handle CharacterLiteral (character constants)
7164 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7165 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7166 if (!RC)
7167 return false;
7168 return LC->getValue() == RC->getValue();
7169 }
7170
7171 // Handle FloatingLiteral (floating point constants)
7172 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7173 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7174 if (!RF)
7175 return false;
7176 // Use bitwise comparison for floating point literals
7177 return LF->getValue().bitwiseIsEqual(RF->getValue());
7178 }
7179
7180 // Handle StringLiteral (string constants)
7181 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7182 const auto *RS = dyn_cast<StringLiteral>(RHS);
7183 if (!RS)
7184 return false;
7185 return LS->getString() == RS->getString();
7186 }
7187
7188 // Handle CXXNullPtrLiteralExpr (nullptr)
7190 return true;
7191
7192 // Handle CXXBoolLiteralExpr (true/false)
7193 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7194 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7195 if (!RB)
7196 return false;
7197 return LB->getValue() == RB->getValue();
7198 }
7199
7200 // Fallback for other forms - use the existing comparison method
7201 return Expr::isSameComparisonOperand(LHS, RHS);
7202 }
7203 };
7204
7205 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7206 static unsigned getFlagMemberOffset() {
7207 unsigned Offset = 0;
7208 for (uint64_t Remain =
7209 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7210 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7211 !(Remain & 1); Remain = Remain >> 1)
7212 Offset++;
7213 return Offset;
7214 }
7215
7216 /// Class that holds debugging information for a data mapping to be passed to
7217 /// the runtime library.
7218 class MappingExprInfo {
7219 /// The variable declaration used for the data mapping.
7220 const ValueDecl *MapDecl = nullptr;
7221 /// The original expression used in the map clause, or null if there is
7222 /// none.
7223 const Expr *MapExpr = nullptr;
7224
7225 public:
7226 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7227 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7228
7229 const ValueDecl *getMapDecl() const { return MapDecl; }
7230 const Expr *getMapExpr() const { return MapExpr; }
7231 };
7232
7233 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7234 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7235 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7236 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7237 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7238 using MapNonContiguousArrayTy =
7239 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7240 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7241 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7242 using MapData =
7244 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7245 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7246 using MapDataArrayTy = SmallVector<MapData, 4>;
7247
7248 /// This structure contains combined information generated for mappable
7249 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7250 /// mappers, and non-contiguous information.
7251 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7252 MapExprsArrayTy Exprs;
7253 MapValueDeclsArrayTy Mappers;
7254 MapValueDeclsArrayTy DevicePtrDecls;
7255
7256 /// Append arrays in \a CurInfo.
7257 void append(MapCombinedInfoTy &CurInfo) {
7258 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7259 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7260 CurInfo.DevicePtrDecls.end());
7261 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7262 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7263 }
7264 };
7265
7266 /// Map between a struct and the its lowest & highest elements which have been
7267 /// mapped.
7268 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7269 /// HE(FieldIndex, Pointer)}
7270 struct StructRangeInfoTy {
7271 MapCombinedInfoTy PreliminaryMapData;
7272 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7273 0, Address::invalid()};
7274 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7275 0, Address::invalid()};
7278 bool IsArraySection = false;
7279 bool HasCompleteRecord = false;
7280 };
7281
7282 /// A struct to store the attach pointer and pointee information, to be used
7283 /// when emitting an attach entry.
7284 struct AttachInfoTy {
7285 Address AttachPtrAddr = Address::invalid();
7286 Address AttachPteeAddr = Address::invalid();
7287 const ValueDecl *AttachPtrDecl = nullptr;
7288 const Expr *AttachMapExpr = nullptr;
7289
7290 bool isValid() const {
7291 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7292 }
7293 };
7294
7295 /// Check if there's any component list where the attach pointer expression
7296 /// matches the given captured variable.
7297 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7298 for (const auto &AttachEntry : AttachPtrExprMap) {
7299 if (AttachEntry.second) {
7300 // Check if the attach pointer expression is a DeclRefExpr that
7301 // references the captured variable
7302 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7303 if (DRE->getDecl() == VD)
7304 return true;
7305 }
7306 }
7307 return false;
7308 }
7309
7310 /// Get the previously-cached attach pointer for a component list, if-any.
7311 const Expr *getAttachPtrExpr(
7313 const {
7314 const auto It = AttachPtrExprMap.find(Components);
7315 if (It != AttachPtrExprMap.end())
7316 return It->second;
7317
7318 return nullptr;
7319 }
7320
7321private:
7322 /// Kind that defines how a device pointer has to be returned.
7323 struct MapInfo {
7326 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7327 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7328 bool ReturnDevicePointer = false;
7329 bool IsImplicit = false;
7330 const ValueDecl *Mapper = nullptr;
7331 const Expr *VarRef = nullptr;
7332 bool ForDeviceAddr = false;
7333 bool HasUdpFbNullify = false;
7334
7335 MapInfo() = default;
7336 MapInfo(
7338 OpenMPMapClauseKind MapType,
7339 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7340 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7341 bool ReturnDevicePointer, bool IsImplicit,
7342 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7343 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7344 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7345 MotionModifiers(MotionModifiers),
7346 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7347 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7348 HasUdpFbNullify(HasUdpFbNullify) {}
7349 };
7350
7351 /// The target directive from where the mappable clauses were extracted. It
7352 /// is either a executable directive or a user-defined mapper directive.
7353 llvm::PointerUnion<const OMPExecutableDirective *,
7354 const OMPDeclareMapperDecl *>
7355 CurDir;
7356
7357 /// Function the directive is being generated for.
7358 CodeGenFunction &CGF;
7359
7360 /// Set of all first private variables in the current directive.
7361 /// bool data is set to true if the variable is implicitly marked as
7362 /// firstprivate, false otherwise.
7363 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7364
7365 /// Set of defaultmap clause kinds that use firstprivate behavior.
7366 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7367
7368 /// Map between device pointer declarations and their expression components.
7369 /// The key value for declarations in 'this' is null.
7370 llvm::DenseMap<
7371 const ValueDecl *,
7372 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7373 DevPointersMap;
7374
7375 /// Map between device addr declarations and their expression components.
7376 /// The key value for declarations in 'this' is null.
7377 llvm::DenseMap<
7378 const ValueDecl *,
7379 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7380 HasDevAddrsMap;
7381
7382 /// Map between lambda declarations and their map type.
7383 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7384
7385 /// Map from component lists to their attach pointer expressions.
7387 const Expr *>
7388 AttachPtrExprMap;
7389
7390 /// Map from attach pointer expressions to their component depth.
7391 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7392 /// expressions with increasing/decreasing depth.
7393 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7394 /// TODO: Not urgent, but we should ideally use the number of pointer
7395 /// dereferences in an expr as an indicator of its complexity, instead of the
7396 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7397 /// `*(p + 5 + 5)` together.
7398 llvm::DenseMap<const Expr *, std::optional<size_t>>
7399 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7400
7401 /// Map from attach pointer expressions to the order they were computed in, in
7402 /// collectAttachPtrExprInfo().
7403 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7404 {nullptr, 0}};
7405
7406 /// An instance of attach-ptr-expr comparator that can be used throughout the
7407 /// lifetime of this handler.
7408 AttachPtrExprComparator AttachPtrComparator;
7409
7410 llvm::Value *getExprTypeSize(const Expr *E) const {
7411 QualType ExprTy = E->getType().getCanonicalType();
7412
7413 // Calculate the size for array shaping expression.
7414 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7415 llvm::Value *Size =
7416 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7417 for (const Expr *SE : OAE->getDimensions()) {
7418 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7419 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7420 CGF.getContext().getSizeType(),
7421 SE->getExprLoc());
7422 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7423 }
7424 return Size;
7425 }
7426
7427 // Reference types are ignored for mapping purposes.
7428 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7429 ExprTy = RefTy->getPointeeType().getCanonicalType();
7430
7431 // Given that an array section is considered a built-in type, we need to
7432 // do the calculation based on the length of the section instead of relying
7433 // on CGF.getTypeSize(E->getType()).
7434 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7435 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7436 OAE->getBase()->IgnoreParenImpCasts())
7438
7439 // If there is no length associated with the expression and lower bound is
7440 // not specified too, that means we are using the whole length of the
7441 // base.
7442 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7443 !OAE->getLowerBound())
7444 return CGF.getTypeSize(BaseTy);
7445
7446 llvm::Value *ElemSize;
7447 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7448 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7449 } else {
7450 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7451 assert(ATy && "Expecting array type if not a pointer type.");
7452 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7453 }
7454
7455 // If we don't have a length at this point, that is because we have an
7456 // array section with a single element.
7457 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7458 return ElemSize;
7459
7460 if (const Expr *LenExpr = OAE->getLength()) {
7461 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7462 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7463 CGF.getContext().getSizeType(),
7464 LenExpr->getExprLoc());
7465 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7466 }
7467 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7468 OAE->getLowerBound() && "expected array_section[lb:].");
7469 // Size = sizetype - lb * elemtype;
7470 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7471 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7472 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7473 CGF.getContext().getSizeType(),
7474 OAE->getLowerBound()->getExprLoc());
7475 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7476 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7477 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7478 LengthVal = CGF.Builder.CreateSelect(
7479 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7480 return LengthVal;
7481 }
7482 return CGF.getTypeSize(ExprTy);
7483 }
7484
7485 /// Return the corresponding bits for a given map clause modifier. Add
7486 /// a flag marking the map as a pointer if requested. Add a flag marking the
7487 /// map as the first one of a series of maps that relate to the same map
7488 /// expression.
7489 OpenMPOffloadMappingFlags getMapTypeBits(
7490 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7491 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7492 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7493 OpenMPOffloadMappingFlags Bits =
7494 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7495 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7496 switch (MapType) {
7497 case OMPC_MAP_alloc:
7498 case OMPC_MAP_release:
7499 // alloc and release is the default behavior in the runtime library, i.e.
7500 // if we don't pass any bits alloc/release that is what the runtime is
7501 // going to do. Therefore, we don't need to signal anything for these two
7502 // type modifiers.
7503 break;
7504 case OMPC_MAP_to:
7505 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7506 break;
7507 case OMPC_MAP_from:
7508 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7509 break;
7510 case OMPC_MAP_tofrom:
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7512 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7513 break;
7514 case OMPC_MAP_delete:
7515 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7516 break;
7517 case OMPC_MAP_unknown:
7518 llvm_unreachable("Unexpected map type!");
7519 }
7520 if (AddPtrFlag)
7521 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7522 if (AddIsTargetParamFlag)
7523 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7524 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7525 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7526 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7527 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7528 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7529 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7530 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7531 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7532 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7533 if (IsNonContiguous)
7534 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7535 return Bits;
7536 }
7537
7538 /// Return true if the provided expression is a final array section. A
7539 /// final array section, is one whose length can't be proved to be one.
7540 bool isFinalArraySectionExpression(const Expr *E) const {
7541 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7542
7543 // It is not an array section and therefore not a unity-size one.
7544 if (!OASE)
7545 return false;
7546
7547 // An array section with no colon always refer to a single element.
7548 if (OASE->getColonLocFirst().isInvalid())
7549 return false;
7550
7551 const Expr *Length = OASE->getLength();
7552
7553 // If we don't have a length we have to check if the array has size 1
7554 // for this dimension. Also, we should always expect a length if the
7555 // base type is pointer.
7556 if (!Length) {
7557 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7558 OASE->getBase()->IgnoreParenImpCasts())
7560 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7561 return ATy->getSExtSize() != 1;
7562 // If we don't have a constant dimension length, we have to consider
7563 // the current section as having any size, so it is not necessarily
7564 // unitary. If it happen to be unity size, that's user fault.
7565 return true;
7566 }
7567
7568 // Check if the length evaluates to 1.
7569 Expr::EvalResult Result;
7570 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7571 return true; // Can have more that size 1.
7572
7573 llvm::APSInt ConstLength = Result.Val.getInt();
7574 return ConstLength.getSExtValue() != 1;
7575 }
7576
7577 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7578 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7579 /// an attach entry has the following form:
7580 /// &p, &p[1], sizeof(void*), ATTACH
7581 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7582 const AttachInfoTy &AttachInfo) const {
7583 assert(AttachInfo.isValid() &&
7584 "Expected valid attach pointer/pointee information!");
7585
7586 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7587 // size
7588 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7589 llvm::ConstantInt::get(
7590 CGF.CGM.SizeTy, CGF.getContext()
7592 .getQuantity()),
7593 CGF.Int64Ty, /*isSigned=*/true);
7594
7595 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7596 AttachInfo.AttachMapExpr);
7597 CombinedInfo.BasePointers.push_back(
7598 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7599 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7600 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7601 CombinedInfo.Pointers.push_back(
7602 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7603 CombinedInfo.Sizes.push_back(PointerSize);
7604 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7605 CombinedInfo.Mappers.push_back(nullptr);
7606 CombinedInfo.NonContigInfo.Dims.push_back(1);
7607 }
7608
7609 /// A helper class to copy structures with overlapped elements, i.e. those
7610 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7611 /// are not explicitly copied have mapping nodes synthesized for them,
7612 /// taking care to avoid generating zero-sized copies.
7613 class CopyOverlappedEntryGaps {
7614 CodeGenFunction &CGF;
7615 MapCombinedInfoTy &CombinedInfo;
7616 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7617 const ValueDecl *MapDecl = nullptr;
7618 const Expr *MapExpr = nullptr;
7620 bool IsNonContiguous = false;
7621 uint64_t DimSize = 0;
7622 // These elements track the position as the struct is iterated over
7623 // (in order of increasing element address).
7624 const RecordDecl *LastParent = nullptr;
7625 uint64_t Cursor = 0;
7626 unsigned LastIndex = -1u;
7628
7629 public:
7630 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7631 MapCombinedInfoTy &CombinedInfo,
7632 OpenMPOffloadMappingFlags Flags,
7633 const ValueDecl *MapDecl, const Expr *MapExpr,
7634 Address BP, Address LB, bool IsNonContiguous,
7635 uint64_t DimSize)
7636 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7637 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7638 DimSize(DimSize), LB(LB) {}
7639
7640 void processField(
7641 const OMPClauseMappableExprCommon::MappableComponent &MC,
7642 const FieldDecl *FD,
7643 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7644 EmitMemberExprBase) {
7645 const RecordDecl *RD = FD->getParent();
7646 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7647 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7648 uint64_t FieldSize =
7650 Address ComponentLB = Address::invalid();
7651
7652 if (FD->getType()->isLValueReferenceType()) {
7653 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7654 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7655 ComponentLB =
7656 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7657 } else {
7658 ComponentLB =
7660 }
7661
7662 if (!LastParent)
7663 LastParent = RD;
7664 if (FD->getParent() == LastParent) {
7665 if (FD->getFieldIndex() != LastIndex + 1)
7666 copyUntilField(FD, ComponentLB);
7667 } else {
7668 LastParent = FD->getParent();
7669 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7670 copyUntilField(FD, ComponentLB);
7671 }
7672 Cursor = FieldOffset + FieldSize;
7673 LastIndex = FD->getFieldIndex();
7674 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7675 }
7676
7677 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7678 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7679 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7680 llvm::Value *Size = CGF.Builder.CreatePtrDiff(ComponentLBPtr, LBPtr);
7681 copySizedChunk(LBPtr, Size);
7682 }
7683
7684 void copyUntilEnd(Address HB) {
7685 if (LastParent) {
7686 const ASTRecordLayout &RL =
7687 CGF.getContext().getASTRecordLayout(LastParent);
7688 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7689 return;
7690 }
7691 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7692 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7693 CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), LBPtr);
7694 copySizedChunk(LBPtr, Size);
7695 }
7696
7697 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7698 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7699 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7700 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7701 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7702 CombinedInfo.Pointers.push_back(Base);
7703 CombinedInfo.Sizes.push_back(
7704 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/false));
7705 CombinedInfo.Types.push_back(Flags);
7706 CombinedInfo.Mappers.push_back(nullptr);
7707 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7708 }
7709 };
7710
7711 /// Generate the base pointers, section pointers, sizes, map type bits, and
7712 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7713 /// map type, map or motion modifiers, and expression components.
7714 /// \a IsFirstComponent should be set to true if the provided set of
7715 /// components is the first associated with a capture.
7716 void generateInfoForComponentList(
7717 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7718 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7720 MapCombinedInfoTy &CombinedInfo,
7721 MapCombinedInfoTy &StructBaseCombinedInfo,
7722 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7723 bool IsFirstComponentList, bool IsImplicit,
7724 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7725 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7726 const Expr *MapExpr = nullptr,
7727 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7728 OverlappedElements = {}) const {
7729
7730 // The following summarizes what has to be generated for each map and the
7731 // types below. The generated information is expressed in this order:
7732 // base pointer, section pointer, size, flags
7733 // (to add to the ones that come from the map type and modifier).
7734 // Entries annotated with (+) are only generated for "target" constructs,
7735 // and only if the variable at the beginning of the expression is used in
7736 // the region.
7737 //
7738 // double d;
7739 // int i[100];
7740 // float *p;
7741 // int **a = &i;
7742 //
7743 // struct S1 {
7744 // int i;
7745 // float f[50];
7746 // }
7747 // struct S2 {
7748 // int i;
7749 // float f[50];
7750 // S1 s;
7751 // double *p;
7752 // double *&pref;
7753 // struct S2 *ps;
7754 // int &ref;
7755 // }
7756 // S2 s;
7757 // S2 *ps;
7758 //
7759 // map(d)
7760 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7761 //
7762 // map(i)
7763 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7764 //
7765 // map(i[1:23])
7766 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7767 //
7768 // map(p)
7769 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7770 //
7771 // map(p[1:24])
7772 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7773 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7774 // // are present, and either is new
7775 //
7776 // map(([22])p)
7777 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7778 // &p, p, sizeof(void*), ATTACH
7779 //
7780 // map((*a)[0:3])
7781 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7782 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7783 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7784 // (+) Only on target, if a is used in the region
7785 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7786 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7787 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7788 // referenced in the target region, because it is a pointer.
7789 //
7790 // map(**a)
7791 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7792 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7793 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7794 // (+) Only on target, if a is used in the region
7795 //
7796 // map(s)
7797 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7798 // effect is supposed to be same as if the user had a map for every element
7799 // of the struct. We currently do a shallow-map of s.
7800 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7801 //
7802 // map(s.i)
7803 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7804 //
7805 // map(s.s.f)
7806 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7807 //
7808 // map(s.p)
7809 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7810 //
7811 // map(to: s.p[:22])
7812 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7813 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7814 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7815 //
7816 // map(to: s.ref)
7817 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7818 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7819 // (*) alloc space for struct members, only this is a target parameter.
7820 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7821 // optimizes this entry out, same in the examples below)
7822 // (***) map the pointee (map: to)
7823 // Note: ptr(s.ref) represents the referring pointer of s.ref
7824 // ptee(s.ref) represents the referenced pointee of s.ref
7825 //
7826 // map(to: s.pref)
7827 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7828 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7829 //
7830 // map(to: s.pref[:22])
7831 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7832 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7833 // FROM | IMPLICIT // (+)
7834 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7835 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7836 //
7837 // map(s.ps)
7838 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7839 //
7840 // map(from: s.ps->s.i)
7841 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7842 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7843 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7844 //
7845 // map(to: s.ps->ps)
7846 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7847 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7848 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7849 //
7850 // map(s.ps->ps->ps)
7851 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7852 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7853 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7854 //
7855 // map(to: s.ps->ps->s.f[:22])
7856 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7857 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7858 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7859 //
7860 // map(ps)
7861 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7862 //
7863 // map(ps->i)
7864 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7865 // &ps, &(ps->i), sizeof(void*), ATTACH
7866 //
7867 // map(ps->s.f)
7868 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7869 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7870 //
7871 // map(from: ps->p)
7872 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7873 // &ps, &(ps->p), sizeof(ps), ATTACH
7874 //
7875 // map(to: ps->p[:22])
7876 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7877 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7878 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7879 //
7880 // map(ps->ps)
7881 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7882 // &ps, &(ps->ps), sizeof(ps), ATTACH
7883 //
7884 // map(from: ps->ps->s.i)
7885 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7886 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7887 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7888 //
7889 // map(from: ps->ps->ps)
7890 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7891 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7892 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7893 //
7894 // map(ps->ps->ps->ps)
7895 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7896 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7897 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7898 //
7899 // map(to: ps->ps->ps->s.f[:22])
7900 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7901 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7902 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7903 //
7904 // map(to: s.f[:22]) map(from: s.p[:33])
7905 // On target, and if s is used in the region:
7906 //
7907 // &s, &(s.f[0]), 50*sizeof(float) +
7908 // sizeof(struct S1) +
7909 // sizeof(double*) (**), TARGET_PARAM
7910 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7911 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7912 // FROM | IMPLICIT
7913 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7914 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7915 // (**) allocate contiguous space needed to fit all mapped members even if
7916 // we allocate space for members not mapped (in this example,
7917 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7918 // them as well because they fall between &s.f[0] and &s.p)
7919 //
7920 // On other constructs, and, if s is not used in the region, on target:
7921 // &s, &(s.f[0]), 22*sizeof(float), TO
7922 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7923 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7924 //
7925 // map(from: s.f[:22]) map(to: ps->p[:33])
7926 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7927 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7928 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7929 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7930 //
7931 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7932 // &s, &(s.f[0]), 50*sizeof(float) +
7933 // sizeof(struct S1), TARGET_PARAM
7934 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7935 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7936 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7937 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7938 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7939 //
7940 // map(p[:100], p)
7941 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7942 // p, &p[0], 100*sizeof(float), TO | FROM
7943 // &p, &p[0], sizeof(float*), ATTACH
7944
7945 // Track if the map information being generated is the first for a capture.
7946 bool IsCaptureFirstInfo = IsFirstComponentList;
7947 // When the variable is on a declare target link or in a to clause with
7948 // unified memory, a reference is needed to hold the host/device address
7949 // of the variable.
7950 bool RequiresReference = false;
7951
7952 // Scan the components from the base to the complete expression.
7953 auto CI = Components.rbegin();
7954 auto CE = Components.rend();
7955 auto I = CI;
7956
7957 // Track if the map information being generated is the first for a list of
7958 // components.
7959 bool IsExpressionFirstInfo = true;
7960 bool FirstPointerInComplexData = false;
7962 Address FinalLowestElem = Address::invalid();
7963 const Expr *AssocExpr = I->getAssociatedExpression();
7964 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7965 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7966 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7967
7968 // Get the pointer-attachment base-pointer for the given list, if any.
7969 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7970 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7971 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7972
7973 bool HasAttachPtr = AttachPtrExpr != nullptr;
7974 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7975 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7976
7977 if (FirstComponentIsForAttachPtr) {
7978 // No need to process AttachPtr here. It will be processed at the end
7979 // after we have computed the pointee's address.
7980 ++I;
7981 } else if (isa<MemberExpr>(AssocExpr)) {
7982 // The base is the 'this' pointer. The content of the pointer is going
7983 // to be the base of the field being mapped.
7984 BP = CGF.LoadCXXThisAddress();
7985 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7986 (OASE &&
7987 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7988 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7989 } else if (OAShE &&
7990 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7991 BP = Address(
7992 CGF.EmitScalarExpr(OAShE->getBase()),
7993 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7994 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7995 } else {
7996 // The base is the reference to the variable.
7997 // BP = &Var.
7998 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7999 if (const auto *VD =
8000 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
8001 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8002 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8003 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8004 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
8005 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
8007 RequiresReference = true;
8009 }
8010 }
8011 }
8012
8013 // If the variable is a pointer and is being dereferenced (i.e. is not
8014 // the last component), the base has to be the pointer itself, not its
8015 // reference. References are ignored for mapping purposes.
8016 QualType Ty =
8017 I->getAssociatedDeclaration()->getType().getNonReferenceType();
8018 if (Ty->isAnyPointerType() && std::next(I) != CE) {
8019 // No need to generate individual map information for the pointer, it
8020 // can be associated with the combined storage if shared memory mode is
8021 // active or the base declaration is not global variable.
8022 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
8024 !VD || VD->hasLocalStorage() || HasAttachPtr)
8025 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8026 else
8027 FirstPointerInComplexData = true;
8028 ++I;
8029 }
8030 }
8031
8032 // Track whether a component of the list should be marked as MEMBER_OF some
8033 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8034 // in a component list should be marked as MEMBER_OF, all subsequent entries
8035 // do not belong to the base struct. E.g.
8036 // struct S2 s;
8037 // s.ps->ps->ps->f[:]
8038 // (1) (2) (3) (4)
8039 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8040 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8041 // is the pointee of ps(2) which is not member of struct s, so it should not
8042 // be marked as such (it is still PTR_AND_OBJ).
8043 // The variable is initialized to false so that PTR_AND_OBJ entries which
8044 // are not struct members are not considered (e.g. array of pointers to
8045 // data).
8046 bool ShouldBeMemberOf = false;
8047
8048 // Variable keeping track of whether or not we have encountered a component
8049 // in the component list which is a member expression. Useful when we have a
8050 // pointer or a final array section, in which case it is the previous
8051 // component in the list which tells us whether we have a member expression.
8052 // E.g. X.f[:]
8053 // While processing the final array section "[:]" it is "f" which tells us
8054 // whether we are dealing with a member of a declared struct.
8055 const MemberExpr *EncounteredME = nullptr;
8056
8057 // Track for the total number of dimension. Start from one for the dummy
8058 // dimension.
8059 uint64_t DimSize = 1;
8060
8061 // Detects non-contiguous updates due to strided accesses.
8062 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8063 // correctly when generating information to be passed to the runtime. The
8064 // flag is set to true if any array section has a stride not equal to 1, or
8065 // if the stride is not a constant expression (conservatively assumed
8066 // non-contiguous).
8067 bool IsNonContiguous =
8068 CombinedInfo.NonContigInfo.IsNonContiguous ||
8069 any_of(Components, [&](const auto &Component) {
8070 const auto *OASE =
8071 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8072 if (!OASE)
8073 return false;
8074
8075 const Expr *StrideExpr = OASE->getStride();
8076 if (!StrideExpr)
8077 return false;
8078
8079 assert(StrideExpr->getType()->isIntegerType() &&
8080 "Stride expression must be of integer type");
8081
8082 // If stride is not evaluatable as a constant, treat as
8083 // non-contiguous.
8084 const auto Constant =
8085 StrideExpr->getIntegerConstantExpr(CGF.getContext());
8086 if (!Constant)
8087 return true;
8088
8089 // Treat non-unitary strides as non-contiguous.
8090 return !Constant->isOne();
8091 });
8092
8093 bool IsPrevMemberReference = false;
8094
8095 bool IsPartialMapped =
8096 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8097
8098 // We need to check if we will be encountering any MEs. If we do not
8099 // encounter any ME expression it means we will be mapping the whole struct.
8100 // In that case we need to skip adding an entry for the struct to the
8101 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8102 // list only when generating all info for clauses.
8103 bool IsMappingWholeStruct = true;
8104 if (!GenerateAllInfoForClauses) {
8105 IsMappingWholeStruct = false;
8106 } else {
8107 for (auto TempI = I; TempI != CE; ++TempI) {
8108 const MemberExpr *PossibleME =
8109 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
8110 if (PossibleME) {
8111 IsMappingWholeStruct = false;
8112 break;
8113 }
8114 }
8115 }
8116
8117 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8118 for (; I != CE; ++I) {
8119 // If we have a valid attach-ptr, we skip processing all components until
8120 // after the attach-ptr.
8121 if (HasAttachPtr && !SeenAttachPtr) {
8122 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8123 continue;
8124 }
8125
8126 // After finding the attach pointer, skip binary-ops, to skip past
8127 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8128 // the attach-ptr.
8129 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8130 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8131 if (BO)
8132 continue;
8133
8134 // Found the first non-binary-operator component after attach
8135 SeenFirstNonBinOpExprAfterAttachPtr = true;
8136 BP = AttachPteeBaseAddr;
8137 }
8138
8139 // If the current component is member of a struct (parent struct) mark it.
8140 if (!EncounteredME) {
8141 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8142 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8143 // as MEMBER_OF the parent struct.
8144 if (EncounteredME) {
8145 ShouldBeMemberOf = true;
8146 // Do not emit as complex pointer if this is actually not array-like
8147 // expression.
8148 if (FirstPointerInComplexData) {
8149 QualType Ty = std::prev(I)
8150 ->getAssociatedDeclaration()
8151 ->getType()
8152 .getNonReferenceType();
8153 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8154 FirstPointerInComplexData = false;
8155 }
8156 }
8157 }
8158
8159 auto Next = std::next(I);
8160
8161 // We need to generate the addresses and sizes if this is the last
8162 // component, if the component is a pointer or if it is an array section
8163 // whose length can't be proved to be one. If this is a pointer, it
8164 // becomes the base address for the following components.
8165
8166 // A final array section, is one whose length can't be proved to be one.
8167 // If the map item is non-contiguous then we don't treat any array section
8168 // as final array section.
8169 bool IsFinalArraySection =
8170 !IsNonContiguous &&
8171 isFinalArraySectionExpression(I->getAssociatedExpression());
8172
8173 // If we have a declaration for the mapping use that, otherwise use
8174 // the base declaration of the map clause.
8175 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8176 ? I->getAssociatedDeclaration()
8177 : BaseDecl;
8178 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8179 : MapExpr;
8180
8181 // Get information on whether the element is a pointer. Have to do a
8182 // special treatment for array sections given that they are built-in
8183 // types.
8184 const auto *OASE =
8185 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8186 const auto *OAShE =
8187 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8188 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8189 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8190 bool IsPointer =
8191 OAShE ||
8194 ->isAnyPointerType()) ||
8195 I->getAssociatedExpression()->getType()->isAnyPointerType();
8196 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8197 MapDecl &&
8198 MapDecl->getType()->isLValueReferenceType();
8199 bool IsNonDerefPointer = IsPointer &&
8200 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8201 !IsNonContiguous;
8202
8203 if (OASE)
8204 ++DimSize;
8205
8206 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8207 IsFinalArraySection) {
8208 // If this is not the last component, we expect the pointer to be
8209 // associated with an array expression or member expression.
8210 assert((Next == CE ||
8211 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8212 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8213 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8214 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8215 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8216 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8217 "Unexpected expression");
8218
8220 Address LowestElem = Address::invalid();
8221 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8222 const MemberExpr *E) {
8223 const Expr *BaseExpr = E->getBase();
8224 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8225 // scalar.
8226 LValue BaseLV;
8227 if (E->isArrow()) {
8228 LValueBaseInfo BaseInfo;
8229 TBAAAccessInfo TBAAInfo;
8230 Address Addr =
8231 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8232 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8233 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8234 } else {
8235 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8236 }
8237 return BaseLV;
8238 };
8239 if (OAShE) {
8240 LowestElem = LB =
8241 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8243 OAShE->getBase()->getType()->getPointeeType()),
8245 OAShE->getBase()->getType()));
8246 } else if (IsMemberReference) {
8247 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8248 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8249 LowestElem = CGF.EmitLValueForFieldInitialization(
8250 BaseLVal, cast<FieldDecl>(MapDecl))
8251 .getAddress();
8252 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8253 .getAddress();
8254 } else {
8255 LowestElem = LB =
8256 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8257 .getAddress();
8258 }
8259
8260 // Save the final LowestElem, to use it as the pointee in attach maps,
8261 // if emitted.
8262 if (Next == CE)
8263 FinalLowestElem = LowestElem;
8264
8265 // If this component is a pointer inside the base struct then we don't
8266 // need to create any entry for it - it will be combined with the object
8267 // it is pointing to into a single PTR_AND_OBJ entry.
8268 bool IsMemberPointerOrAddr =
8269 EncounteredME &&
8270 (((IsPointer || ForDeviceAddr) &&
8271 I->getAssociatedExpression() == EncounteredME) ||
8272 (IsPrevMemberReference && !IsPointer) ||
8273 (IsMemberReference && Next != CE &&
8274 !Next->getAssociatedExpression()->getType()->isPointerType()));
8275 if (!OverlappedElements.empty() && Next == CE) {
8276 // Handle base element with the info for overlapped elements.
8277 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8278 assert(!IsPointer &&
8279 "Unexpected base element with the pointer type.");
8280 // Mark the whole struct as the struct that requires allocation on the
8281 // device.
8282 PartialStruct.LowestElem = {0, LowestElem};
8283 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8284 I->getAssociatedExpression()->getType());
8287 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8288 TypeSize.getQuantity() - 1);
8289 PartialStruct.HighestElem = {
8290 std::numeric_limits<decltype(
8291 PartialStruct.HighestElem.first)>::max(),
8292 HB};
8293 PartialStruct.Base = BP;
8294 PartialStruct.LB = LB;
8295 assert(
8296 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8297 "Overlapped elements must be used only once for the variable.");
8298 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8299 // Emit data for non-overlapped data.
8300 OpenMPOffloadMappingFlags Flags =
8301 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8302 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8303 /*AddPtrFlag=*/false,
8304 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8305 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8306 MapExpr, BP, LB, IsNonContiguous,
8307 DimSize);
8308 // Do bitcopy of all non-overlapped structure elements.
8310 Component : OverlappedElements) {
8311 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8312 Component) {
8313 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8314 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8315 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8316 }
8317 }
8318 }
8319 }
8320 CopyGaps.copyUntilEnd(HB);
8321 break;
8322 }
8323 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8324 // Skip adding an entry in the CurInfo of this combined entry if the
8325 // whole struct is currently being mapped. The struct needs to be added
8326 // in the first position before any data internal to the struct is being
8327 // mapped.
8328 // Skip adding an entry in the CurInfo of this combined entry if the
8329 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8330 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8331 (Next == CE && MapType != OMPC_MAP_unknown)) {
8332 if (!IsMappingWholeStruct) {
8333 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8334 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8335 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8336 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8337 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8338 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8339 Size, CGF.Int64Ty, /*isSigned=*/true));
8340 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8341 : 1);
8342 } else {
8343 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8344 StructBaseCombinedInfo.BasePointers.push_back(
8345 BP.emitRawPointer(CGF));
8346 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8347 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8348 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8349 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8350 Size, CGF.Int64Ty, /*isSigned=*/true));
8351 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8352 IsNonContiguous ? DimSize : 1);
8353 }
8354
8355 // If Mapper is valid, the last component inherits the mapper.
8356 bool HasMapper = Mapper && Next == CE;
8357 if (!IsMappingWholeStruct)
8358 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8359 else
8360 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8361 : nullptr);
8362
8363 // We need to add a pointer flag for each map that comes from the
8364 // same expression except for the first one. We also need to signal
8365 // this map is the first one that relates with the current capture
8366 // (there is a set of entries for each capture).
8367 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8368 MapType, MapModifiers, MotionModifiers, IsImplicit,
8369 !IsExpressionFirstInfo || RequiresReference ||
8370 FirstPointerInComplexData || IsMemberReference,
8371 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8372
8373 if (!IsExpressionFirstInfo || IsMemberReference) {
8374 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8375 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8376 if (IsPointer || (IsMemberReference && Next != CE))
8377 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8378 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8379 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8380 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8381 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8382
8383 if (ShouldBeMemberOf) {
8384 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8385 // should be later updated with the correct value of MEMBER_OF.
8386 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8387 // From now on, all subsequent PTR_AND_OBJ entries should not be
8388 // marked as MEMBER_OF.
8389 ShouldBeMemberOf = false;
8390 }
8391 }
8392
8393 if (!IsMappingWholeStruct)
8394 CombinedInfo.Types.push_back(Flags);
8395 else
8396 StructBaseCombinedInfo.Types.push_back(Flags);
8397 }
8398
8399 // If we have encountered a member expression so far, keep track of the
8400 // mapped member. If the parent is "*this", then the value declaration
8401 // is nullptr.
8402 if (EncounteredME) {
8403 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8404 unsigned FieldIndex = FD->getFieldIndex();
8405
8406 // Update info about the lowest and highest elements for this struct
8407 if (!PartialStruct.Base.isValid()) {
8408 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8409 if (IsFinalArraySection && OASE) {
8410 Address HB =
8411 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8412 .getAddress();
8413 PartialStruct.HighestElem = {FieldIndex, HB};
8414 } else {
8415 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8416 }
8417 PartialStruct.Base = BP;
8418 PartialStruct.LB = BP;
8419 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8420 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8421 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8422 if (IsFinalArraySection && OASE) {
8423 Address HB =
8424 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8425 .getAddress();
8426 PartialStruct.HighestElem = {FieldIndex, HB};
8427 } else {
8428 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8429 }
8430 }
8431 }
8432
8433 // Need to emit combined struct for array sections.
8434 if (IsFinalArraySection || IsNonContiguous)
8435 PartialStruct.IsArraySection = true;
8436
8437 // If we have a final array section, we are done with this expression.
8438 if (IsFinalArraySection)
8439 break;
8440
8441 // The pointer becomes the base for the next element.
8442 if (Next != CE)
8443 BP = IsMemberReference ? LowestElem : LB;
8444 if (!IsPartialMapped)
8445 IsExpressionFirstInfo = false;
8446 IsCaptureFirstInfo = false;
8447 FirstPointerInComplexData = false;
8448 IsPrevMemberReference = IsMemberReference;
8449 } else if (FirstPointerInComplexData) {
8450 QualType Ty = Components.rbegin()
8451 ->getAssociatedDeclaration()
8452 ->getType()
8453 .getNonReferenceType();
8454 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8455 FirstPointerInComplexData = false;
8456 }
8457 }
8458 // If ran into the whole component - allocate the space for the whole
8459 // record.
8460 if (!EncounteredME)
8461 PartialStruct.HasCompleteRecord = true;
8462
8463 // Populate ATTACH information for later processing by emitAttachEntry.
8464 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8465 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8466 AttachInfo.AttachPteeAddr = FinalLowestElem;
8467 AttachInfo.AttachPtrDecl = BaseDecl;
8468 AttachInfo.AttachMapExpr = MapExpr;
8469 }
8470
8471 if (!IsNonContiguous)
8472 return;
8473
8474 const ASTContext &Context = CGF.getContext();
8475
8476 // For supporting stride in array section, we need to initialize the first
8477 // dimension size as 1, first offset as 0, and first count as 1
8478 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8479 MapValuesArrayTy CurCounts;
8480 MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8481 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8482 uint64_t ElementTypeSize;
8483
8484 // Collect Size information for each dimension and get the element size as
8485 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8486 // should be [10, 10] and the first stride is 4 btyes.
8487 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8488 Components) {
8489 const Expr *AssocExpr = Component.getAssociatedExpression();
8490 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8491
8492 if (!OASE)
8493 continue;
8494
8495 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8496 auto *CAT = Context.getAsConstantArrayType(Ty);
8497 auto *VAT = Context.getAsVariableArrayType(Ty);
8498
8499 // We need all the dimension size except for the last dimension.
8500 assert((VAT || CAT || &Component == &*Components.begin()) &&
8501 "Should be either ConstantArray or VariableArray if not the "
8502 "first Component");
8503
8504 // Get element size if CurCounts is empty.
8505 if (CurCounts.empty()) {
8506 const Type *ElementType = nullptr;
8507 if (CAT)
8508 ElementType = CAT->getElementType().getTypePtr();
8509 else if (VAT)
8510 ElementType = VAT->getElementType().getTypePtr();
8511 else if (&Component == &*Components.begin()) {
8512 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8513 // there was no earlier CAT/VAT/array handling to establish
8514 // ElementType. Capture the pointee type now so that subsequent
8515 // components (offset/length/stride) have a concrete element type to
8516 // work with. This makes pointer-backed sections behave consistently
8517 // with CAT/VAT/array bases.
8518 if (const auto *PtrType = Ty->getAs<PointerType>())
8519 ElementType = PtrType->getPointeeType().getTypePtr();
8520 } else {
8521 // Any component after the first should never have a raw pointer type;
8522 // by this point. ElementType must already be known (set above or in
8523 // prior array / CAT / VAT handling).
8524 assert(!Ty->isPointerType() &&
8525 "Non-first components should not be raw pointers");
8526 }
8527
8528 // At this stage, if ElementType was a base pointer and we are in the
8529 // first iteration, it has been computed.
8530 if (ElementType) {
8531 // For the case that having pointer as base, we need to remove one
8532 // level of indirection.
8533 if (&Component != &*Components.begin())
8534 ElementType = ElementType->getPointeeOrArrayElementType();
8535 ElementTypeSize =
8536 Context.getTypeSizeInChars(ElementType).getQuantity();
8537 CurCounts.push_back(
8538 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8539 }
8540 }
8541 // Get dimension value except for the last dimension since we don't need
8542 // it.
8543 if (DimSizes.size() < Components.size() - 1) {
8544 if (CAT)
8545 DimSizes.push_back(
8546 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8547 else if (VAT)
8548 DimSizes.push_back(CGF.Builder.CreateIntCast(
8549 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8550 /*IsSigned=*/false));
8551 }
8552 }
8553
8554 // Skip the dummy dimension since we have already have its information.
8555 auto *DI = DimSizes.begin() + 1;
8556 // Product of dimension.
8557 llvm::Value *DimProd =
8558 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8559
8560 // Collect info for non-contiguous. Notice that offset, count, and stride
8561 // are only meaningful for array-section, so we insert a null for anything
8562 // other than array-section.
8563 // Also, the size of offset, count, and stride are not the same as
8564 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8565 // count, and stride are the same as the number of non-contiguous
8566 // declaration in target update to/from clause.
8567 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8568 Components) {
8569 const Expr *AssocExpr = Component.getAssociatedExpression();
8570
8571 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8572 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8573 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8574 /*isSigned=*/false);
8575 CurOffsets.push_back(Offset);
8576 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8577 CurStrides.push_back(CurStrides.back());
8578 continue;
8579 }
8580
8581 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8582
8583 if (!OASE)
8584 continue;
8585
8586 // Offset
8587 const Expr *OffsetExpr = OASE->getLowerBound();
8588 llvm::Value *Offset = nullptr;
8589 if (!OffsetExpr) {
8590 // If offset is absent, then we just set it to zero.
8591 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8592 } else {
8593 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8594 CGF.Int64Ty,
8595 /*isSigned=*/false);
8596 }
8597
8598 // Count
8599 const Expr *CountExpr = OASE->getLength();
8600 llvm::Value *Count = nullptr;
8601 if (!CountExpr) {
8602 // In Clang, once a high dimension is an array section, we construct all
8603 // the lower dimension as array section, however, for case like
8604 // arr[0:2][2], Clang construct the inner dimension as an array section
8605 // but it actually is not in an array section form according to spec.
8606 if (!OASE->getColonLocFirst().isValid() &&
8607 !OASE->getColonLocSecond().isValid()) {
8608 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8609 } else {
8610 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8611 // When the length is absent it defaults to ⌈(size −
8612 // lower-bound)/stride⌉, where size is the size of the array
8613 // dimension.
8614 const Expr *StrideExpr = OASE->getStride();
8615 llvm::Value *Stride =
8616 StrideExpr
8617 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8618 CGF.Int64Ty, /*isSigned=*/false)
8619 : nullptr;
8620 if (Stride)
8621 Count = CGF.Builder.CreateUDiv(
8622 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8623 else
8624 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8625 }
8626 } else {
8627 Count = CGF.EmitScalarExpr(CountExpr);
8628 }
8629 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8630 CurCounts.push_back(Count);
8631
8632 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8633 // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
8634 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8635 // Offset Count Stride
8636 // D0 0 4 1 (int) <- dummy dimension
8637 // D1 0 2 8 (2 * (1) * 4)
8638 // D2 100 2 20 (1 * (1 * 5) * 4)
8639 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8640 const Expr *StrideExpr = OASE->getStride();
8641 llvm::Value *Stride =
8642 StrideExpr
8643 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8644 CGF.Int64Ty, /*isSigned=*/false)
8645 : nullptr;
8646 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8647 if (Stride)
8648 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8649 else
8650 CurStrides.push_back(DimProd);
8651
8652 Offset = CGF.Builder.CreateNUWMul(DimProd, Offset);
8653 CurOffsets.push_back(Offset);
8654
8655 if (DI != DimSizes.end())
8656 ++DI;
8657 }
8658
8659 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8660 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8661 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8662 }
8663
8664 /// Return the adjusted map modifiers if the declaration a capture refers to
8665 /// appears in a first-private clause. This is expected to be used only with
8666 /// directives that start with 'target'.
8667 OpenMPOffloadMappingFlags
8668 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8669 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8670
8671 // A first private variable captured by reference will use only the
8672 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8673 // declaration is known as first-private in this handler.
8674 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8675 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8676 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8677 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8678 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8679 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8680 }
8681 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8682 if (I != LambdasMap.end())
8683 // for map(to: lambda): using user specified map type.
8684 return getMapTypeBits(
8685 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8686 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8687 /*AddPtrFlag=*/false,
8688 /*AddIsTargetParamFlag=*/false,
8689 /*isNonContiguous=*/false);
8690 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8691 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8692 }
8693
8694 void getPlainLayout(const CXXRecordDecl *RD,
8695 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8696 bool AsBase) const {
8697 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8698
8699 llvm::StructType *St =
8700 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8701
8702 unsigned NumElements = St->getNumElements();
8703 llvm::SmallVector<
8704 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8705 RecordLayout(NumElements);
8706
8707 // Fill bases.
8708 for (const auto &I : RD->bases()) {
8709 if (I.isVirtual())
8710 continue;
8711
8712 QualType BaseTy = I.getType();
8713 const auto *Base = BaseTy->getAsCXXRecordDecl();
8714 // Ignore empty bases.
8715 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8716 CGF.getContext()
8717 .getASTRecordLayout(Base)
8719 .isZero())
8720 continue;
8721
8722 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8723 RecordLayout[FieldIndex] = Base;
8724 }
8725 // Fill in virtual bases.
8726 for (const auto &I : RD->vbases()) {
8727 QualType BaseTy = I.getType();
8728 // Ignore empty bases.
8729 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8730 continue;
8731
8732 const auto *Base = BaseTy->getAsCXXRecordDecl();
8733 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8734 if (RecordLayout[FieldIndex])
8735 continue;
8736 RecordLayout[FieldIndex] = Base;
8737 }
8738 // Fill in all the fields.
8739 assert(!RD->isUnion() && "Unexpected union.");
8740 for (const auto *Field : RD->fields()) {
8741 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8742 // will fill in later.)
8743 if (!Field->isBitField() &&
8744 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8745 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8746 RecordLayout[FieldIndex] = Field;
8747 }
8748 }
8749 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8750 &Data : RecordLayout) {
8751 if (Data.isNull())
8752 continue;
8753 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8754 getPlainLayout(Base, Layout, /*AsBase=*/true);
8755 else
8756 Layout.push_back(cast<const FieldDecl *>(Data));
8757 }
8758 }
8759
8760 /// Returns the address corresponding to \p PointerExpr.
8761 static Address getAttachPtrAddr(const Expr *PointerExpr,
8762 CodeGenFunction &CGF) {
8763 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8764 Address AttachPtrAddr = Address::invalid();
8765
8766 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8767 // If the pointer is a variable, we can use its address directly.
8768 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8769 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8770 AttachPtrAddr =
8771 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8772 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8773 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8774 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8775 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8776 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8777 assert(UO->getOpcode() == UO_Deref &&
8778 "Unexpected unary-operator on attach-ptr-expr");
8779 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8780 }
8781 assert(AttachPtrAddr.isValid() &&
8782 "Failed to get address for attach pointer expression");
8783 return AttachPtrAddr;
8784 }
8785
8786 /// Get the address of the attach pointer, and a load from it, to get the
8787 /// pointee base address.
8788 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8789 /// contains invalid addresses if \p AttachPtrExpr is null.
8790 static std::pair<Address, Address>
8791 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8792 CodeGenFunction &CGF) {
8793
8794 if (!AttachPtrExpr)
8795 return {Address::invalid(), Address::invalid()};
8796
8797 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8798 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8799
8800 QualType AttachPtrType =
8803
8804 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8805 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8806 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8807
8808 return {AttachPtrAddr, AttachPteeBaseAddr};
8809 }
8810
8811 /// Returns whether an attach entry should be emitted for a map on
8812 /// \p MapBaseDecl on the directive \p CurDir.
8813 static bool
8814 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8815 CodeGenFunction &CGF,
8816 llvm::PointerUnion<const OMPExecutableDirective *,
8817 const OMPDeclareMapperDecl *>
8818 CurDir) {
8819 if (!PointerExpr)
8820 return false;
8821
8822 // Pointer attachment is needed at map-entering time or for declare
8823 // mappers.
8824 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8827 ->getDirectiveKind());
8828 }
8829
8830 /// Computes the attach-ptr expr for \p Components, and updates various maps
8831 /// with the information.
8832 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8833 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8834 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8835 /// AttachPtrExprMap.
8836 void collectAttachPtrExprInfo(
8838 llvm::PointerUnion<const OMPExecutableDirective *,
8839 const OMPDeclareMapperDecl *>
8840 CurDir) {
8841
8842 OpenMPDirectiveKind CurDirectiveID =
8844 ? OMPD_declare_mapper
8845 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8846
8847 const auto &[AttachPtrExpr, Depth] =
8849 CurDirectiveID);
8850
8851 AttachPtrComputationOrderMap.try_emplace(
8852 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8853 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8854 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8855 }
8856
8857 /// Generate all the base pointers, section pointers, sizes, map types, and
8858 /// mappers for the extracted mappable expressions (all included in \a
8859 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8860 /// pair of the relevant declaration and index where it occurs is appended to
8861 /// the device pointers info array.
8862 void generateAllInfoForClauses(
8863 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8864 llvm::OpenMPIRBuilder &OMPBuilder,
8865 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8866 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8867 // We have to process the component lists that relate with the same
8868 // declaration in a single chunk so that we can generate the map flags
8869 // correctly. Therefore, we organize all lists in a map.
8870 enum MapKind { Present, Allocs, Other, Total };
8871 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8872 SmallVector<SmallVector<MapInfo, 8>, 4>>
8873 Info;
8874
8875 // Helper function to fill the information map for the different supported
8876 // clauses.
8877 auto &&InfoGen =
8878 [&Info, &SkipVarSet](
8879 const ValueDecl *D, MapKind Kind,
8881 OpenMPMapClauseKind MapType,
8882 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8883 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8884 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8885 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8886 if (SkipVarSet.contains(D))
8887 return;
8888 auto It = Info.try_emplace(D, Total).first;
8889 It->second[Kind].emplace_back(
8890 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8891 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8892 };
8893
8894 for (const auto *Cl : Clauses) {
8895 const auto *C = dyn_cast<OMPMapClause>(Cl);
8896 if (!C)
8897 continue;
8898 MapKind Kind = Other;
8899 if (llvm::is_contained(C->getMapTypeModifiers(),
8900 OMPC_MAP_MODIFIER_present))
8901 Kind = Present;
8902 else if (C->getMapType() == OMPC_MAP_alloc)
8903 Kind = Allocs;
8904 const auto *EI = C->getVarRefs().begin();
8905 for (const auto L : C->component_lists()) {
8906 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8907 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8908 C->getMapTypeModifiers(), {},
8909 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8910 E);
8911 ++EI;
8912 }
8913 }
8914 for (const auto *Cl : Clauses) {
8915 const auto *C = dyn_cast<OMPToClause>(Cl);
8916 if (!C)
8917 continue;
8918 MapKind Kind = Other;
8919 if (llvm::is_contained(C->getMotionModifiers(),
8920 OMPC_MOTION_MODIFIER_present))
8921 Kind = Present;
8922 if (llvm::is_contained(C->getMotionModifiers(),
8923 OMPC_MOTION_MODIFIER_iterator)) {
8924 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8925 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8926 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8927 CGF.EmitVarDecl(*VD);
8928 }
8929 }
8930
8931 const auto *EI = C->getVarRefs().begin();
8932 for (const auto L : C->component_lists()) {
8933 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8934 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8935 C->isImplicit(), std::get<2>(L), *EI);
8936 ++EI;
8937 }
8938 }
8939 for (const auto *Cl : Clauses) {
8940 const auto *C = dyn_cast<OMPFromClause>(Cl);
8941 if (!C)
8942 continue;
8943 MapKind Kind = Other;
8944 if (llvm::is_contained(C->getMotionModifiers(),
8945 OMPC_MOTION_MODIFIER_present))
8946 Kind = Present;
8947 if (llvm::is_contained(C->getMotionModifiers(),
8948 OMPC_MOTION_MODIFIER_iterator)) {
8949 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8950 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8951 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8952 CGF.EmitVarDecl(*VD);
8953 }
8954 }
8955
8956 const auto *EI = C->getVarRefs().begin();
8957 for (const auto L : C->component_lists()) {
8958 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8959 C->getMotionModifiers(),
8960 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8961 *EI);
8962 ++EI;
8963 }
8964 }
8965
8966 // Look at the use_device_ptr and use_device_addr clauses information and
8967 // mark the existing map entries as such. If there is no map information for
8968 // an entry in the use_device_ptr and use_device_addr list, we create one
8969 // with map type 'return_param' and zero size section. It is the user's
8970 // fault if that was not mapped before. If there is no map information, then
8971 // we defer the emission of that entry until all the maps for the same VD
8972 // have been handled.
8973 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8974
8975 auto &&UseDeviceDataCombinedInfoGen =
8976 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8977 CodeGenFunction &CGF, bool IsDevAddr,
8978 bool HasUdpFbNullify = false) {
8979 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8980 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8981 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8982 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8983 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8984 // FIXME: For use_device_addr on array-sections, this should
8985 // be the starting address of the section.
8986 // e.g. int *p;
8987 // ... use_device_addr(p[3])
8988 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8989 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8990 UseDeviceDataCombinedInfo.Sizes.push_back(
8991 llvm::Constant::getNullValue(CGF.Int64Ty));
8992 OpenMPOffloadMappingFlags Flags =
8993 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8994 if (HasUdpFbNullify)
8995 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8996 UseDeviceDataCombinedInfo.Types.push_back(Flags);
8997 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8998 };
8999
9000 auto &&MapInfoGen =
9001 [&UseDeviceDataCombinedInfoGen](
9002 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
9004 Components,
9005 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
9006 bool HasUdpFbNullify = false) {
9007 // We didn't find any match in our map information - generate a zero
9008 // size array section.
9009 llvm::Value *Ptr;
9010 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
9011 if (IE->isGLValue())
9012 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
9013 else
9014 Ptr = CGF.EmitScalarExpr(IE);
9015 } else {
9016 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
9017 }
9018 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
9019 // For the purpose of address-translation, treat something like the
9020 // following:
9021 // int *p;
9022 // ... use_device_addr(p[1])
9023 // equivalent to
9024 // ... use_device_ptr(p)
9025 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9026 !TreatDevAddrAsDevPtr,
9027 HasUdpFbNullify);
9028 };
9029
9030 auto &&IsMapInfoExist =
9031 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9032 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9033 bool HasUdpFbNullify = false) -> bool {
9034 // We potentially have map information for this declaration already.
9035 // Look for the first set of components that refer to it. If found,
9036 // return true.
9037 // If the first component is a member expression, we have to look into
9038 // 'this', which maps to null in the map of map information. Otherwise
9039 // look directly for the information.
9040 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
9041 if (It != Info.end()) {
9042 bool Found = false;
9043 for (auto &Data : It->second) {
9044 MapInfo *CI = nullptr;
9045 // We potentially have multiple maps for the same decl. We need to
9046 // only consider those for which the attach-ptr matches the desired
9047 // attach-ptr.
9048 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
9049 if (MI.Components.back().getAssociatedDeclaration() != VD)
9050 return false;
9051
9052 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
9053 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
9054 DesiredAttachPtrExpr);
9055 return Match;
9056 });
9057
9058 if (It != Data.end())
9059 CI = &*It;
9060
9061 if (CI) {
9062 if (IsDevAddr) {
9063 CI->ForDeviceAddr = true;
9064 CI->ReturnDevicePointer = true;
9065 CI->HasUdpFbNullify = HasUdpFbNullify;
9066 Found = true;
9067 break;
9068 } else {
9069 auto PrevCI = std::next(CI->Components.rbegin());
9070 const auto *VarD = dyn_cast<VarDecl>(VD);
9071 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
9072 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9073 isa<MemberExpr>(IE) ||
9074 !VD->getType().getNonReferenceType()->isPointerType() ||
9075 PrevCI == CI->Components.rend() ||
9076 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
9077 VarD->hasLocalStorage() ||
9078 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
9079 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
9080 CI->ForDeviceAddr = IsDevAddr;
9081 CI->ReturnDevicePointer = true;
9082 CI->HasUdpFbNullify = HasUdpFbNullify;
9083 Found = true;
9084 break;
9085 }
9086 }
9087 }
9088 }
9089 return Found;
9090 }
9091 return false;
9092 };
9093
9094 // Look at the use_device_ptr clause information and mark the existing map
9095 // entries as such. If there is no map information for an entry in the
9096 // use_device_ptr list, we create one with map type 'alloc' and zero size
9097 // section. It is the user fault if that was not mapped before. If there is
9098 // no map information and the pointer is a struct member, then we defer the
9099 // emission of that entry until the whole struct has been processed.
9100 for (const auto *Cl : Clauses) {
9101 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
9102 if (!C)
9103 continue;
9104 bool HasUdpFbNullify =
9105 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9106 for (const auto L : C->component_lists()) {
9108 std::get<1>(L);
9109 assert(!Components.empty() &&
9110 "Not expecting empty list of components!");
9111 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9113 const Expr *IE = Components.back().getAssociatedExpression();
9114 // For use_device_ptr, we match an existing map clause if its attach-ptr
9115 // is same as the use_device_ptr operand. e.g.
9116 // map expr | use_device_ptr expr | current behavior
9117 // ---------|---------------------|-----------------
9118 // p[1] | p | match
9119 // ps->a | ps | match
9120 // p | p | no match
9121 const Expr *UDPOperandExpr =
9122 Components.front().getAssociatedExpression();
9123 if (IsMapInfoExist(CGF, VD, IE,
9124 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9125 /*IsDevAddr=*/false, HasUdpFbNullify))
9126 continue;
9127 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9128 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9129 }
9130 }
9131
9132 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9133 for (const auto *Cl : Clauses) {
9134 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
9135 if (!C)
9136 continue;
9137 for (const auto L : C->component_lists()) {
9139 std::get<1>(L);
9140 assert(!std::get<1>(L).empty() &&
9141 "Not expecting empty list of components!");
9142 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
9143 if (!Processed.insert(VD).second)
9144 continue;
9146 // For use_device_addr, we match an existing map clause if the
9147 // use_device_addr operand's attach-ptr matches the map operand's
9148 // attach-ptr.
9149 // We chould also restrict to only match cases when there is a full
9150 // match between the map/use_device_addr clause exprs, but that may be
9151 // unnecessary.
9152 //
9153 // map expr | use_device_addr expr | current | possible restrictive/
9154 // | | behavior | safer behavior
9155 // ---------|----------------------|-----------|-----------------------
9156 // p | p | match | match
9157 // p[0] | p[0] | match | match
9158 // p[0:1] | p[0] | match | no match
9159 // p[0:1] | p[2:1] | match | no match
9160 // p[1] | p[0] | match | no match
9161 // ps->a | ps->b | match | no match
9162 // p | p[0] | no match | no match
9163 // pp | pp[0][0] | no match | no match
9164 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9165 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
9166 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9167 "use_device_addr operand has an attach-ptr, but does not match "
9168 "last component's expr.");
9169 if (IsMapInfoExist(CGF, VD, IE,
9170 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9171 /*IsDevAddr=*/true))
9172 continue;
9173 MapInfoGen(CGF, IE, VD, Components,
9174 /*IsDevAddr=*/true,
9175 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9176 }
9177 }
9178
9179 for (const auto &Data : Info) {
9180 MapCombinedInfoTy CurInfo;
9181 const Decl *D = Data.first;
9182 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
9183 // Group component lists by their AttachPtrExpr and process them in order
9184 // of increasing complexity (nullptr first, then simple expressions like
9185 // p, then more complex ones like p[0], etc.)
9186 //
9187 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9188 // grouping for target constructs.
9189 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9190
9191 // First, collect all MapData entries with their attach-ptr exprs.
9192 for (const auto &M : Data.second) {
9193 for (const MapInfo &L : M) {
9194 assert(!L.Components.empty() &&
9195 "Not expecting declaration with no component lists.");
9196
9197 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9198 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9199 }
9200 }
9201
9202 // Next, sort by increasing order of their complexity.
9203 llvm::stable_sort(AttachPtrMapInfoPairs,
9204 [this](const auto &LHS, const auto &RHS) {
9205 return AttachPtrComparator(LHS.first, RHS.first);
9206 });
9207
9208 // And finally, process them all in order, grouping those with
9209 // equivalent attach-ptr exprs together.
9210 auto *It = AttachPtrMapInfoPairs.begin();
9211 while (It != AttachPtrMapInfoPairs.end()) {
9212 const Expr *AttachPtrExpr = It->first;
9213
9214 SmallVector<MapInfo, 8> GroupLists;
9215 while (It != AttachPtrMapInfoPairs.end() &&
9216 (It->first == AttachPtrExpr ||
9217 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9218 GroupLists.push_back(It->second);
9219 ++It;
9220 }
9221 assert(!GroupLists.empty() && "GroupLists should not be empty");
9222
9223 StructRangeInfoTy PartialStruct;
9224 AttachInfoTy AttachInfo;
9225 MapCombinedInfoTy GroupCurInfo;
9226 // Current group's struct base information:
9227 MapCombinedInfoTy GroupStructBaseCurInfo;
9228 for (const MapInfo &L : GroupLists) {
9229 // Remember the current base pointer index.
9230 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9231 unsigned StructBasePointersIdx =
9232 GroupStructBaseCurInfo.BasePointers.size();
9233
9234 GroupCurInfo.NonContigInfo.IsNonContiguous =
9235 L.Components.back().isNonContiguous();
9236 generateInfoForComponentList(
9237 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9238 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9239 /*IsFirstComponentList=*/false, L.IsImplicit,
9240 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9241 L.VarRef, /*OverlappedElements*/ {});
9242
9243 // If this entry relates to a device pointer, set the relevant
9244 // declaration and add the 'return pointer' flag.
9245 if (L.ReturnDevicePointer) {
9246 // Check whether a value was added to either GroupCurInfo or
9247 // GroupStructBaseCurInfo and error if no value was added to either
9248 // of them:
9249 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9250 StructBasePointersIdx <
9251 GroupStructBaseCurInfo.BasePointers.size()) &&
9252 "Unexpected number of mapped base pointers.");
9253
9254 // Choose a base pointer index which is always valid:
9255 const ValueDecl *RelevantVD =
9256 L.Components.back().getAssociatedDeclaration();
9257 assert(RelevantVD &&
9258 "No relevant declaration related with device pointer??");
9259
9260 // If GroupStructBaseCurInfo has been updated this iteration then
9261 // work on the first new entry added to it i.e. make sure that when
9262 // multiple values are added to any of the lists, the first value
9263 // added is being modified by the assignments below (not the last
9264 // value added).
9265 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9266 unsigned Idx) {
9267 Info.DevicePtrDecls[Idx] = RelevantVD;
9268 Info.DevicePointers[Idx] = L.ForDeviceAddr
9269 ? DeviceInfoTy::Address
9270 : DeviceInfoTy::Pointer;
9271 Info.Types[Idx] |=
9272 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9273 if (L.HasUdpFbNullify)
9274 Info.Types[Idx] |=
9275 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9276 };
9277
9278 if (StructBasePointersIdx <
9279 GroupStructBaseCurInfo.BasePointers.size())
9280 SetDevicePointerInfo(GroupStructBaseCurInfo,
9281 StructBasePointersIdx);
9282 else
9283 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9284 }
9285 }
9286
9287 // Unify entries in one list making sure the struct mapping precedes the
9288 // individual fields:
9289 MapCombinedInfoTy GroupUnionCurInfo;
9290 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9291 GroupUnionCurInfo.append(GroupCurInfo);
9292
9293 // If there is an entry in PartialStruct it means we have a struct with
9294 // individual members mapped. Emit an extra combined entry.
9295 if (PartialStruct.Base.isValid()) {
9296 // Prepend a synthetic dimension of length 1 to represent the
9297 // aggregated struct object. Using 1 (not 0, as 0 produced an
9298 // incorrect non-contiguous descriptor (DimSize==1), causing the
9299 // non-contiguous motion clause path to be skipped.) is important:
9300 // * It preserves the correct rank so targetDataUpdate() computes
9301 // DimSize == 2 for cases like strided array sections originating
9302 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9303 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9304 GroupUnionCurInfo.NonContigInfo.Dims.begin(), 1);
9305 emitCombinedEntry(
9306 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9307 /*IsMapThis=*/!VD, OMPBuilder, VD,
9308 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9309 /*NotTargetParams=*/true);
9310 }
9311
9312 // Append this group's results to the overall CurInfo in the correct
9313 // order: combined-entry -> original-field-entries -> attach-entry
9314 CurInfo.append(GroupUnionCurInfo);
9315 if (AttachInfo.isValid())
9316 emitAttachEntry(CGF, CurInfo, AttachInfo);
9317 }
9318
9319 // We need to append the results of this capture to what we already have.
9320 CombinedInfo.append(CurInfo);
9321 }
9322 // Append data for use_device_ptr/addr clauses.
9323 CombinedInfo.append(UseDeviceDataCombinedInfo);
9324 }
9325
9326public:
9327 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9328 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9329 // Extract firstprivate clause information.
9330 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9331 for (const auto *D : C->varlist())
9332 FirstPrivateDecls.try_emplace(
9333 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9334 // Extract implicit firstprivates from uses_allocators clauses.
9335 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9336 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9337 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9338 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9339 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9340 /*Implicit=*/true);
9341 else if (const auto *VD = dyn_cast<VarDecl>(
9342 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9343 ->getDecl()))
9344 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9345 }
9346 }
9347 // Extract defaultmap clause information.
9348 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9349 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9350 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9351 // Extract device pointer clause information.
9352 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9353 for (auto L : C->component_lists())
9354 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9355 // Extract device addr clause information.
9356 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9357 for (auto L : C->component_lists())
9358 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9359 // Extract map information.
9360 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9361 if (C->getMapType() != OMPC_MAP_to)
9362 continue;
9363 for (auto L : C->component_lists()) {
9364 const ValueDecl *VD = std::get<0>(L);
9365 const auto *RD = VD ? VD->getType()
9366 .getCanonicalType()
9367 .getNonReferenceType()
9368 ->getAsCXXRecordDecl()
9369 : nullptr;
9370 if (RD && RD->isLambda())
9371 LambdasMap.try_emplace(std::get<0>(L), C);
9372 }
9373 }
9374
9375 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9376 for (auto L : C->component_lists()) {
9378 std::get<1>(L);
9379 if (!Components.empty())
9380 collectAttachPtrExprInfo(Components, CurDir);
9381 }
9382 };
9383
9384 // Populate the AttachPtrExprMap for all component lists from map-related
9385 // clauses.
9386 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9387 CollectAttachPtrExprsForClauseComponents(C);
9388 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9389 CollectAttachPtrExprsForClauseComponents(C);
9390 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9391 CollectAttachPtrExprsForClauseComponents(C);
9392 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9393 CollectAttachPtrExprsForClauseComponents(C);
9394 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9395 CollectAttachPtrExprsForClauseComponents(C);
9396 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9397 CollectAttachPtrExprsForClauseComponents(C);
9398 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9399 CollectAttachPtrExprsForClauseComponents(C);
9400 }
9401
9402 /// Constructor for the declare mapper directive.
9403 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9404 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9405
9406 /// Generate code for the combined entry if we have a partially mapped struct
9407 /// and take care of the mapping flags of the arguments corresponding to
9408 /// individual struct members.
9409 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9410 /// to the combined-entry's begin address, if emitted.
9411 /// \p PartialStruct contains attach base-pointer information.
9412 /// \returns The index of the combined entry if one was added, std::nullopt
9413 /// otherwise.
9414 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9415 MapFlagsArrayTy &CurTypes,
9416 const StructRangeInfoTy &PartialStruct,
9417 AttachInfoTy &AttachInfo, bool IsMapThis,
9418 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9419 unsigned OffsetForMemberOfFlag,
9420 bool NotTargetParams) const {
9421 if (CurTypes.size() == 1 &&
9422 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9423 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9424 !PartialStruct.IsArraySection)
9425 return;
9426 Address LBAddr = PartialStruct.LowestElem.second;
9427 Address HBAddr = PartialStruct.HighestElem.second;
9428 if (PartialStruct.HasCompleteRecord) {
9429 LBAddr = PartialStruct.LB;
9430 HBAddr = PartialStruct.LB;
9431 }
9432 CombinedInfo.Exprs.push_back(VD);
9433 // Base is the base of the struct
9434 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9435 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9436 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9437 // Pointer is the address of the lowest element
9438 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9439 const CXXMethodDecl *MD =
9440 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9441 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9442 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9443 // There should not be a mapper for a combined entry.
9444 if (HasBaseClass) {
9445 // OpenMP 5.2 148:21:
9446 // If the target construct is within a class non-static member function,
9447 // and a variable is an accessible data member of the object for which the
9448 // non-static data member function is invoked, the variable is treated as
9449 // if the this[:1] expression had appeared in a map clause with a map-type
9450 // of tofrom.
9451 // Emit this[:1]
9452 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9453 QualType Ty = MD->getFunctionObjectParameterType();
9454 llvm::Value *Size =
9455 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9456 /*isSigned=*/true);
9457 CombinedInfo.Sizes.push_back(Size);
9458 } else {
9459 CombinedInfo.Pointers.push_back(LB);
9460 // Size is (addr of {highest+1} element) - (addr of lowest element)
9461 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9462 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9463 HBAddr.getElementType(), HB, /*Idx0=*/1);
9464 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9465 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9466 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
9467 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9468 /*isSigned=*/false);
9469 CombinedInfo.Sizes.push_back(Size);
9470 }
9471 CombinedInfo.Mappers.push_back(nullptr);
9472 // Map type is always TARGET_PARAM, if generate info for captures.
9473 CombinedInfo.Types.push_back(
9474 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9475 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9476 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9477 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9478 // If any element has the present modifier, then make sure the runtime
9479 // doesn't attempt to allocate the struct.
9480 if (CurTypes.end() !=
9481 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9482 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9483 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9484 }))
9485 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9486 // Remove TARGET_PARAM flag from the first element
9487 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9488 // If any element has the ompx_hold modifier, then make sure the runtime
9489 // uses the hold reference count for the struct as a whole so that it won't
9490 // be unmapped by an extra dynamic reference count decrement. Add it to all
9491 // elements as well so the runtime knows which reference count to check
9492 // when determining whether it's time for device-to-host transfers of
9493 // individual elements.
9494 if (CurTypes.end() !=
9495 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9496 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9497 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9498 })) {
9499 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9500 for (auto &M : CurTypes)
9501 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9502 }
9503
9504 // All other current entries will be MEMBER_OF the combined entry
9505 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9506 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9507 // to be handled by themselves, after all other maps).
9508 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9509 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9510 for (auto &M : CurTypes)
9511 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9512
9513 // When we are emitting a combined entry. If there were any pending
9514 // attachments to be done, we do them to the begin address of the combined
9515 // entry. Note that this means only one attachment per combined-entry will
9516 // be done. So, for instance, if we have:
9517 // S *ps;
9518 // ... map(ps->a, ps->b)
9519 // When we are emitting a combined entry. If AttachInfo is valid,
9520 // update the pointee address to point to the begin address of the combined
9521 // entry. This ensures that if we have multiple maps like:
9522 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9523 //
9524 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9525 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9526 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9527 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9528 if (AttachInfo.isValid())
9529 AttachInfo.AttachPteeAddr = LBAddr;
9530 }
9531
9532 /// Generate all the base pointers, section pointers, sizes, map types, and
9533 /// mappers for the extracted mappable expressions (all included in \a
9534 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9535 /// pair of the relevant declaration and index where it occurs is appended to
9536 /// the device pointers info array.
9537 void generateAllInfo(
9538 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9539 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9540 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9541 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9542 "Expect a executable directive");
9543 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9544 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9545 SkipVarSet);
9546 }
9547
9548 /// Generate all the base pointers, section pointers, sizes, map types, and
9549 /// mappers for the extracted map clauses of user-defined mapper (all included
9550 /// in \a CombinedInfo).
9551 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9552 llvm::OpenMPIRBuilder &OMPBuilder) const {
9553 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9554 "Expect a declare mapper directive");
9555 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9556 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9557 OMPBuilder);
9558 }
9559
9560 /// Emit capture info for lambdas for variables captured by reference.
9561 void generateInfoForLambdaCaptures(
9562 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9563 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9564 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9565 const auto *RD = VDType->getAsCXXRecordDecl();
9566 if (!RD || !RD->isLambda())
9567 return;
9568 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9569 CGF.getContext().getDeclAlign(VD));
9570 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9571 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9572 FieldDecl *ThisCapture = nullptr;
9573 RD->getCaptureFields(Captures, ThisCapture);
9574 if (ThisCapture) {
9575 LValue ThisLVal =
9576 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9577 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9578 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9579 VDLVal.getPointer(CGF));
9580 CombinedInfo.Exprs.push_back(VD);
9581 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9582 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9583 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9584 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9585 CombinedInfo.Sizes.push_back(
9586 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9587 CGF.Int64Ty, /*isSigned=*/true));
9588 CombinedInfo.Types.push_back(
9589 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9590 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9591 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9592 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9593 CombinedInfo.Mappers.push_back(nullptr);
9594 }
9595 for (const LambdaCapture &LC : RD->captures()) {
9596 if (!LC.capturesVariable())
9597 continue;
9598 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9599 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9600 continue;
9601 auto It = Captures.find(VD);
9602 assert(It != Captures.end() && "Found lambda capture without field.");
9603 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9604 if (LC.getCaptureKind() == LCK_ByRef) {
9605 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9606 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9607 VDLVal.getPointer(CGF));
9608 CombinedInfo.Exprs.push_back(VD);
9609 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9610 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9611 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9612 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9613 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9614 CGF.getTypeSize(
9616 CGF.Int64Ty, /*isSigned=*/true));
9617 } else {
9618 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9619 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9620 VDLVal.getPointer(CGF));
9621 CombinedInfo.Exprs.push_back(VD);
9622 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9623 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9624 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9625 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9626 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9627 }
9628 CombinedInfo.Types.push_back(
9629 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9630 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9631 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9632 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9633 CombinedInfo.Mappers.push_back(nullptr);
9634 }
9635 }
9636
9637 /// Set correct indices for lambdas captures.
9638 void adjustMemberOfForLambdaCaptures(
9639 llvm::OpenMPIRBuilder &OMPBuilder,
9640 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9641 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9642 MapFlagsArrayTy &Types) const {
9643 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9644 // Set correct member_of idx for all implicit lambda captures.
9645 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9646 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9647 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9648 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9649 continue;
9650 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9651 assert(BasePtr && "Unable to find base lambda address.");
9652 int TgtIdx = -1;
9653 for (unsigned J = I; J > 0; --J) {
9654 unsigned Idx = J - 1;
9655 if (Pointers[Idx] != BasePtr)
9656 continue;
9657 TgtIdx = Idx;
9658 break;
9659 }
9660 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9661 // All other current entries will be MEMBER_OF the combined entry
9662 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9663 // 0xFFFF in the MEMBER_OF field).
9664 OpenMPOffloadMappingFlags MemberOfFlag =
9665 OMPBuilder.getMemberOfFlag(TgtIdx);
9666 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9667 }
9668 }
9669
9670 /// Populate component lists for non-lambda captured variables from map,
9671 /// is_device_ptr and has_device_addr clause info.
9672 void populateComponentListsForNonLambdaCaptureFromClauses(
9673 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9674 SmallVectorImpl<
9675 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9676 &StorageForImplicitlyAddedComponentLists) const {
9677 if (VD && LambdasMap.count(VD))
9678 return;
9679
9680 // For member fields list in is_device_ptr, store it in
9681 // DeclComponentLists for generating components info.
9683 auto It = DevPointersMap.find(VD);
9684 if (It != DevPointersMap.end())
9685 for (const auto &MCL : It->second)
9686 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9687 /*IsImpicit = */ true, nullptr,
9688 nullptr);
9689 auto I = HasDevAddrsMap.find(VD);
9690 if (I != HasDevAddrsMap.end())
9691 for (const auto &MCL : I->second)
9692 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9693 /*IsImpicit = */ true, nullptr,
9694 nullptr);
9695 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9696 "Expect a executable directive");
9697 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9698 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9699 const auto *EI = C->getVarRefs().begin();
9700 for (const auto L : C->decl_component_lists(VD)) {
9701 const ValueDecl *VDecl, *Mapper;
9702 // The Expression is not correct if the mapping is implicit
9703 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9705 std::tie(VDecl, Components, Mapper) = L;
9706 assert(VDecl == VD && "We got information for the wrong declaration??");
9707 assert(!Components.empty() &&
9708 "Not expecting declaration with no component lists.");
9709 DeclComponentLists.emplace_back(Components, C->getMapType(),
9710 C->getMapTypeModifiers(),
9711 C->isImplicit(), Mapper, E);
9712 ++EI;
9713 }
9714 }
9715
9716 // For the target construct, if there's a map with a base-pointer that's
9717 // a member of an implicitly captured struct, of the current class,
9718 // we need to emit an implicit map on the pointer.
9719 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9720 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9721 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9722
9723 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9724 const MapData &RHS) {
9725 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9726 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9727 bool HasPresent =
9728 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9729 bool HasAllocs = MapType == OMPC_MAP_alloc;
9730 MapModifiers = std::get<2>(RHS);
9731 MapType = std::get<1>(LHS);
9732 bool HasPresentR =
9733 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9734 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9735 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9736 });
9737 }
9738
9739 /// On a target construct, if there's an implicit map on a struct, or that of
9740 /// this[:], and an explicit map with a member of that struct/class as the
9741 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9742 /// to make sure we don't map the full struct/class. For example:
9743 ///
9744 /// \code
9745 /// struct S {
9746 /// int dummy[10000];
9747 /// int *p;
9748 /// void f1() {
9749 /// #pragma omp target map(p[0:1])
9750 /// (void)this;
9751 /// }
9752 /// }; S s;
9753 ///
9754 /// void f2() {
9755 /// #pragma omp target map(s.p[0:10])
9756 /// (void)s;
9757 /// }
9758 /// \endcode
9759 ///
9760 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9761 //
9762 // OpenMP 6.0: 7.9.6 map clause, pg 285
9763 // If a list item with an implicitly determined data-mapping attribute does
9764 // not have any corresponding storage in the device data environment prior to
9765 // a task encountering the construct associated with the map clause, and one
9766 // or more contiguous parts of the original storage are either list items or
9767 // base pointers to list items that are explicitly mapped on the construct,
9768 // only those parts of the original storage will have corresponding storage in
9769 // the device data environment as a result of the map clauses on the
9770 // construct.
9771 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9772 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9773 SmallVectorImpl<
9774 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9775 &ComponentVectorStorage) const {
9776 bool IsThisCapture = CapturedVD == nullptr;
9777
9778 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9780 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9781 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9782 if (!AttachPtrExpr)
9783 continue;
9784
9785 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9786 if (!ME)
9787 continue;
9788
9789 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9790
9791 // If we are handling a "this" capture, then we are looking for
9792 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9793 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9794 continue;
9795
9796 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9797 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9798 continue;
9799
9800 // For non-this captures, we are looking for attach-ptrs of form
9801 // `s.p`.
9802 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9803 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9804 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9805 continue;
9806
9807 // Check if we have an existing map on either:
9808 // this[:], s, this->p, or s.p, in which case, we don't need to add
9809 // an implicit one for the attach-ptr s.p/this->p.
9810 bool FoundExistingMap = false;
9811 for (const MapData &ExistingL : DeclComponentLists) {
9813 ExistingComponents = std::get<0>(ExistingL);
9814
9815 if (ExistingComponents.empty())
9816 continue;
9817
9818 // First check if we have a map like map(this->p) or map(s.p).
9819 const auto &FirstComponent = ExistingComponents.front();
9820 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9821
9822 if (!FirstExpr)
9823 continue;
9824
9825 // First check if we have a map like map(this->p) or map(s.p).
9826 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9827 FoundExistingMap = true;
9828 break;
9829 }
9830
9831 // Check if we have a map like this[0:1]
9832 if (IsThisCapture) {
9833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9834 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9835 FoundExistingMap = true;
9836 break;
9837 }
9838 }
9839 continue;
9840 }
9841
9842 // When the attach-ptr is something like `s.p`, check if
9843 // `s` itself is mapped explicitly.
9844 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9845 if (DRE->getDecl() == CapturedVD) {
9846 FoundExistingMap = true;
9847 break;
9848 }
9849 }
9850 }
9851
9852 if (FoundExistingMap)
9853 continue;
9854
9855 // If no base map is found, we need to create an implicit map for the
9856 // attach-pointer expr.
9857
9858 ComponentVectorStorage.emplace_back();
9859 auto &AttachPtrComponents = ComponentVectorStorage.back();
9860
9862 bool SeenAttachPtrComponent = false;
9863 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9864 // components from the component-list which has `s.p/this->p`
9865 // as the attach-ptr, starting from the component which matches
9866 // `s.p/this->p`. This way, we'll have component-lists of
9867 // `s.p` -> `s`, and `this->p` -> `this`.
9868 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9869 const auto &Component = ComponentsWithAttachPtr[i];
9870 const Expr *ComponentExpr = Component.getAssociatedExpression();
9871
9872 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9873 continue;
9874 SeenAttachPtrComponent = true;
9875
9876 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9877 Component.getAssociatedDeclaration(),
9878 Component.isNonContiguous());
9879 }
9880 assert(!AttachPtrComponents.empty() &&
9881 "Could not populate component-lists for mapping attach-ptr");
9882
9883 DeclComponentLists.emplace_back(
9884 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9885 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9886 }
9887 }
9888
9889 /// For a capture that has an associated clause, generate the base pointers,
9890 /// section pointers, sizes, map types, and mappers (all included in
9891 /// \a CurCaptureVarInfo).
9892 void generateInfoForCaptureFromClauseInfo(
9893 const MapDataArrayTy &DeclComponentListsFromClauses,
9894 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9895 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9896 unsigned OffsetForMemberOfFlag) const {
9897 assert(!Cap->capturesVariableArrayType() &&
9898 "Not expecting to generate map info for a variable array type!");
9899
9900 // We need to know when we generating information for the first component
9901 const ValueDecl *VD = Cap->capturesThis()
9902 ? nullptr
9903 : Cap->getCapturedVar()->getCanonicalDecl();
9904
9905 // for map(to: lambda): skip here, processing it in
9906 // generateDefaultMapInfo
9907 if (LambdasMap.count(VD))
9908 return;
9909
9910 // If this declaration appears in a is_device_ptr clause we just have to
9911 // pass the pointer by value. If it is a reference to a declaration, we just
9912 // pass its value.
9913 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9914 CurCaptureVarInfo.Exprs.push_back(VD);
9915 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9916 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9917 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9918 CurCaptureVarInfo.Pointers.push_back(Arg);
9919 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9920 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9921 /*isSigned=*/true));
9922 CurCaptureVarInfo.Types.push_back(
9923 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9924 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9925 CurCaptureVarInfo.Mappers.push_back(nullptr);
9926 return;
9927 }
9928
9929 auto GenerateInfoForComponentLists =
9930 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9931 bool IsEligibleForTargetParamFlag) {
9932 MapCombinedInfoTy CurInfoForComponentLists;
9933 StructRangeInfoTy PartialStruct;
9934 AttachInfoTy AttachInfo;
9935
9936 if (DeclComponentListsFromClauses.empty())
9937 return;
9938
9939 generateInfoForCaptureFromComponentLists(
9940 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9941 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9942
9943 // If there is an entry in PartialStruct it means we have a
9944 // struct with individual members mapped. Emit an extra combined
9945 // entry.
9946 if (PartialStruct.Base.isValid()) {
9947 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9948 emitCombinedEntry(
9949 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9950 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9951 /*VD=*/nullptr, OffsetForMemberOfFlag,
9952 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9953 }
9954
9955 // We do the appends to get the entries in the following order:
9956 // combined-entry -> individual-field-entries -> attach-entry,
9957 CurCaptureVarInfo.append(CurInfoForComponentLists);
9958 if (AttachInfo.isValid())
9959 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9960 };
9961
9962 // Group component lists by their AttachPtrExpr and process them in order
9963 // of increasing complexity (nullptr first, then simple expressions like p,
9964 // then more complex ones like p[0], etc.)
9965 //
9966 // This ensure that we:
9967 // * handle maps that can contribute towards setting the kernel argument,
9968 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9969 // * allocate a single contiguous storage for all exprs with the same
9970 // captured var and having the same attach-ptr.
9971 //
9972 // Example: The map clauses below should be handled grouped together based
9973 // on their attachable-base-pointers:
9974 // map-clause | attachable-base-pointer
9975 // --------------------------+------------------------
9976 // map(p, ps) | nullptr
9977 // map(p[0]) | p
9978 // map(p[0]->b, p[0]->c) | p[0]
9979 // map(ps->d, ps->e, ps->pt) | ps
9980 // map(ps->pt->d, ps->pt->e) | ps->pt
9981
9982 // First, collect all MapData entries with their attach-ptr exprs.
9983 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9984
9985 for (const MapData &L : DeclComponentListsFromClauses) {
9987 std::get<0>(L);
9988 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9989 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9990 }
9991
9992 // Next, sort by increasing order of their complexity.
9993 llvm::stable_sort(AttachPtrMapDataPairs,
9994 [this](const auto &LHS, const auto &RHS) {
9995 return AttachPtrComparator(LHS.first, RHS.first);
9996 });
9997
9998 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9999 bool IsFirstGroup = true;
10000
10001 // And finally, process them all in order, grouping those with
10002 // equivalent attach-ptr exprs together.
10003 auto *It = AttachPtrMapDataPairs.begin();
10004 while (It != AttachPtrMapDataPairs.end()) {
10005 const Expr *AttachPtrExpr = It->first;
10006
10007 MapDataArrayTy GroupLists;
10008 while (It != AttachPtrMapDataPairs.end() &&
10009 (It->first == AttachPtrExpr ||
10010 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
10011 GroupLists.push_back(It->second);
10012 ++It;
10013 }
10014 assert(!GroupLists.empty() && "GroupLists should not be empty");
10015
10016 // Determine if this group of component-lists is eligible for TARGET_PARAM
10017 // flag. Only the first group processed should be eligible, and only if no
10018 // default mapping was done.
10019 bool IsEligibleForTargetParamFlag =
10020 IsFirstGroup && NoDefaultMappingDoneForVD;
10021
10022 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
10023 IsFirstGroup = false;
10024 }
10025 }
10026
10027 /// Generate the base pointers, section pointers, sizes, map types, and
10028 /// mappers associated to \a DeclComponentLists for a given capture
10029 /// \a VD (all included in \a CurComponentListInfo).
10030 void generateInfoForCaptureFromComponentLists(
10031 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10032 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10033 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10034 // Find overlapping elements (including the offset from the base element).
10035 llvm::SmallDenseMap<
10036 const MapData *,
10037 llvm::SmallVector<
10039 4>
10040 OverlappedData;
10041 size_t Count = 0;
10042 for (const MapData &L : DeclComponentLists) {
10044 OpenMPMapClauseKind MapType;
10045 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10046 bool IsImplicit;
10047 const ValueDecl *Mapper;
10048 const Expr *VarRef;
10049 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10050 L;
10051 ++Count;
10052 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
10054 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
10055 VarRef) = L1;
10056 auto CI = Components.rbegin();
10057 auto CE = Components.rend();
10058 auto SI = Components1.rbegin();
10059 auto SE = Components1.rend();
10060 for (; CI != CE && SI != SE; ++CI, ++SI) {
10061 if (CI->getAssociatedExpression()->getStmtClass() !=
10062 SI->getAssociatedExpression()->getStmtClass())
10063 break;
10064 // Are we dealing with different variables/fields?
10065 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10066 break;
10067 }
10068 // Found overlapping if, at least for one component, reached the head
10069 // of the components list.
10070 if (CI == CE || SI == SE) {
10071 // Ignore it if it is the same component.
10072 if (CI == CE && SI == SE)
10073 continue;
10074 const auto It = (SI == SE) ? CI : SI;
10075 // If one component is a pointer and another one is a kind of
10076 // dereference of this pointer (array subscript, section, dereference,
10077 // etc.), it is not an overlapping.
10078 // Same, if one component is a base and another component is a
10079 // dereferenced pointer memberexpr with the same base.
10080 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
10081 (std::prev(It)->getAssociatedDeclaration() &&
10082 std::prev(It)
10083 ->getAssociatedDeclaration()
10084 ->getType()
10085 ->isPointerType()) ||
10086 (It->getAssociatedDeclaration() &&
10087 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10088 std::next(It) != CE && std::next(It) != SE))
10089 continue;
10090 const MapData &BaseData = CI == CE ? L : L1;
10092 SI == SE ? Components : Components1;
10093 OverlappedData[&BaseData].push_back(SubData);
10094 }
10095 }
10096 }
10097 // Sort the overlapped elements for each item.
10098 llvm::SmallVector<const FieldDecl *, 4> Layout;
10099 if (!OverlappedData.empty()) {
10100 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10101 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10102 while (BaseType != OrigType) {
10103 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10104 OrigType = BaseType->getPointeeOrArrayElementType();
10105 }
10106
10107 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10108 getPlainLayout(CRD, Layout, /*AsBase=*/false);
10109 else {
10110 const auto *RD = BaseType->getAsRecordDecl();
10111 Layout.append(RD->field_begin(), RD->field_end());
10112 }
10113 }
10114 for (auto &Pair : OverlappedData) {
10115 llvm::stable_sort(
10116 Pair.getSecond(),
10117 [&Layout](
10120 Second) {
10121 auto CI = First.rbegin();
10122 auto CE = First.rend();
10123 auto SI = Second.rbegin();
10124 auto SE = Second.rend();
10125 for (; CI != CE && SI != SE; ++CI, ++SI) {
10126 if (CI->getAssociatedExpression()->getStmtClass() !=
10127 SI->getAssociatedExpression()->getStmtClass())
10128 break;
10129 // Are we dealing with different variables/fields?
10130 if (CI->getAssociatedDeclaration() !=
10131 SI->getAssociatedDeclaration())
10132 break;
10133 }
10134
10135 // Lists contain the same elements.
10136 if (CI == CE && SI == SE)
10137 return false;
10138
10139 // List with less elements is less than list with more elements.
10140 if (CI == CE || SI == SE)
10141 return CI == CE;
10142
10143 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
10144 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
10145 if (FD1->getParent() == FD2->getParent())
10146 return FD1->getFieldIndex() < FD2->getFieldIndex();
10147 const auto *It =
10148 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
10149 return FD == FD1 || FD == FD2;
10150 });
10151 return *It == FD1;
10152 });
10153 }
10154
10155 // Associated with a capture, because the mapping flags depend on it.
10156 // Go through all of the elements with the overlapped elements.
10157 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10158 MapCombinedInfoTy StructBaseCombinedInfo;
10159 for (const auto &Pair : OverlappedData) {
10160 const MapData &L = *Pair.getFirst();
10162 OpenMPMapClauseKind MapType;
10163 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10164 bool IsImplicit;
10165 const ValueDecl *Mapper;
10166 const Expr *VarRef;
10167 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10168 L;
10169 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10170 OverlappedComponents = Pair.getSecond();
10171 generateInfoForComponentList(
10172 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10173 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
10174 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10175 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
10176 AddTargetParamFlag = false;
10177 }
10178 // Go through other elements without overlapped elements.
10179 for (const MapData &L : DeclComponentLists) {
10181 OpenMPMapClauseKind MapType;
10182 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10183 bool IsImplicit;
10184 const ValueDecl *Mapper;
10185 const Expr *VarRef;
10186 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
10187 L;
10188 auto It = OverlappedData.find(&L);
10189 if (It == OverlappedData.end())
10190 generateInfoForComponentList(
10191 MapType, MapModifiers, {}, Components, CurComponentListInfo,
10192 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10193 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10194 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
10195 /*OverlappedElements*/ {});
10196 AddTargetParamFlag = false;
10197 }
10198 }
10199
10200 /// Check if a variable should be treated as firstprivate due to explicit
10201 /// firstprivate clause or defaultmap(firstprivate:...).
10202 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10203 // Check explicit firstprivate clauses (not implicit from defaultmap)
10204 auto I = FirstPrivateDecls.find(VD);
10205 if (I != FirstPrivateDecls.end() && !I->getSecond())
10206 return true; // Explicit firstprivate only
10207
10208 // Check defaultmap(firstprivate:scalar) for scalar types
10209 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10210 if (Type->isScalarType())
10211 return true;
10212 }
10213
10214 // Check defaultmap(firstprivate:pointer) for pointer types
10215 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10216 if (Type->isAnyPointerType())
10217 return true;
10218 }
10219
10220 // Check defaultmap(firstprivate:aggregate) for aggregate types
10221 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10222 if (Type->isAggregateType())
10223 return true;
10224 }
10225
10226 // Check defaultmap(firstprivate:all) for all types
10227 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10228 }
10229
10230 /// Generate the default map information for a given capture \a CI,
10231 /// record field declaration \a RI and captured value \a CV.
10232 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10233 const FieldDecl &RI, llvm::Value *CV,
10234 MapCombinedInfoTy &CombinedInfo) const {
10235 bool IsImplicit = true;
10236 // Do the default mapping.
10237 if (CI.capturesThis()) {
10238 CombinedInfo.Exprs.push_back(nullptr);
10239 CombinedInfo.BasePointers.push_back(CV);
10240 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10241 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10242 CombinedInfo.Pointers.push_back(CV);
10243 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10244 CombinedInfo.Sizes.push_back(
10245 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10246 CGF.Int64Ty, /*isSigned=*/true));
10247 // Default map type.
10248 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10249 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10250 } else if (CI.capturesVariableByCopy()) {
10251 const VarDecl *VD = CI.getCapturedVar();
10252 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10253 CombinedInfo.BasePointers.push_back(CV);
10254 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10255 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10256 CombinedInfo.Pointers.push_back(CV);
10257 bool IsFirstprivate =
10258 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10259
10260 if (!RI.getType()->isAnyPointerType()) {
10261 // We have to signal to the runtime captures passed by value that are
10262 // not pointers.
10263 CombinedInfo.Types.push_back(
10264 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10265 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10266 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10267 } else if (IsFirstprivate) {
10268 // Firstprivate pointers should be passed by value (as literals)
10269 // without performing a present table lookup at runtime.
10270 CombinedInfo.Types.push_back(
10271 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10272 // Use zero size for pointer literals (just passing the pointer value)
10273 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10274 } else {
10275 // Pointers are implicitly mapped with a zero size and no flags
10276 // (other than first map that is added for all implicit maps).
10277 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10278 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10279 }
10280 auto I = FirstPrivateDecls.find(VD);
10281 if (I != FirstPrivateDecls.end())
10282 IsImplicit = I->getSecond();
10283 } else {
10284 assert(CI.capturesVariable() && "Expected captured reference.");
10285 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10286 QualType ElementType = PtrTy->getPointeeType();
10287 const VarDecl *VD = CI.getCapturedVar();
10288 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10289 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10290 CombinedInfo.BasePointers.push_back(CV);
10291 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10292 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10293
10294 // For firstprivate pointers, pass by value instead of dereferencing
10295 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10296 // Treat as a literal value (pass the pointer value itself)
10297 CombinedInfo.Pointers.push_back(CV);
10298 // Use zero size for pointer literals
10299 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10300 CombinedInfo.Types.push_back(
10301 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10302 } else {
10303 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10304 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10305 // The default map type for a scalar/complex type is 'to' because by
10306 // default the value doesn't have to be retrieved. For an aggregate
10307 // type, the default is 'tofrom'.
10308 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10309 CombinedInfo.Pointers.push_back(CV);
10310 }
10311 auto I = FirstPrivateDecls.find(VD);
10312 if (I != FirstPrivateDecls.end())
10313 IsImplicit = I->getSecond();
10314 }
10315 // Every default map produces a single argument which is a target parameter.
10316 CombinedInfo.Types.back() |=
10317 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10318
10319 // Add flag stating this is an implicit map.
10320 if (IsImplicit)
10321 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10322
10323 // No user-defined mapper for default mapping.
10324 CombinedInfo.Mappers.push_back(nullptr);
10325 }
10326};
10327} // anonymous namespace
10328
10329// Try to extract the base declaration from a `this->x` expression if possible.
10331 if (!E)
10332 return nullptr;
10333
10334 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10335 if (const MemberExpr *ME =
10336 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10337 return ME->getMemberDecl();
10338 return nullptr;
10339}
10340
10341/// Emit a string constant containing the names of the values mapped to the
10342/// offloading runtime library.
10343static llvm::Constant *
10344emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10345 MappableExprsHandler::MappingExprInfo &MapExprs) {
10346
10347 uint32_t SrcLocStrSize;
10348 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10349 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10350
10351 SourceLocation Loc;
10352 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10353 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10354 Loc = VD->getLocation();
10355 else
10356 Loc = MapExprs.getMapExpr()->getExprLoc();
10357 } else {
10358 Loc = MapExprs.getMapDecl()->getLocation();
10359 }
10360
10361 std::string ExprName;
10362 if (MapExprs.getMapExpr()) {
10364 llvm::raw_string_ostream OS(ExprName);
10365 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10366 } else {
10367 ExprName = MapExprs.getMapDecl()->getNameAsString();
10368 }
10369
10370 std::string FileName;
10372 if (auto *DbgInfo = CGF.getDebugInfo())
10373 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10374 else
10375 FileName = PLoc.getFilename();
10376 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10377 PLoc.getColumn(), SrcLocStrSize);
10378}
10379/// Emit the arrays used to pass the captures and map information to the
10380/// offloading runtime library. If there is no map or capture information,
10381/// return nullptr by reference.
10383 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10384 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10385 bool IsNonContiguous = false, bool ForEndCall = false) {
10386 CodeGenModule &CGM = CGF.CGM;
10387
10388 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10389 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10390 CGF.AllocaInsertPt->getIterator());
10391 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10392 CGF.Builder.GetInsertPoint());
10393
10394 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10395 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10396 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10397 }
10398 };
10399
10400 auto CustomMapperCB = [&](unsigned int I) {
10401 llvm::Function *MFunc = nullptr;
10402 if (CombinedInfo.Mappers[I]) {
10403 Info.HasMapper = true;
10405 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10406 }
10407 return MFunc;
10408 };
10409 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10410 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10411 IsNonContiguous, ForEndCall, DeviceAddrCB));
10412}
10413
10414/// Check for inner distribute directive.
10415static const OMPExecutableDirective *
10417 const auto *CS = D.getInnermostCapturedStmt();
10418 const auto *Body =
10419 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10420 const Stmt *ChildStmt =
10422
10423 if (const auto *NestedDir =
10424 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10425 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10426 switch (D.getDirectiveKind()) {
10427 case OMPD_target:
10428 // For now, treat 'target' with nested 'teams loop' as if it's
10429 // distributed (target teams distribute).
10430 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10431 return NestedDir;
10432 if (DKind == OMPD_teams) {
10433 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10434 /*IgnoreCaptured=*/true);
10435 if (!Body)
10436 return nullptr;
10437 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10438 if (const auto *NND =
10439 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10440 DKind = NND->getDirectiveKind();
10441 if (isOpenMPDistributeDirective(DKind))
10442 return NND;
10443 }
10444 }
10445 return nullptr;
10446 case OMPD_target_teams:
10447 if (isOpenMPDistributeDirective(DKind))
10448 return NestedDir;
10449 return nullptr;
10450 case OMPD_target_parallel:
10451 case OMPD_target_simd:
10452 case OMPD_target_parallel_for:
10453 case OMPD_target_parallel_for_simd:
10454 return nullptr;
10455 case OMPD_target_teams_distribute:
10456 case OMPD_target_teams_distribute_simd:
10457 case OMPD_target_teams_distribute_parallel_for:
10458 case OMPD_target_teams_distribute_parallel_for_simd:
10459 case OMPD_parallel:
10460 case OMPD_for:
10461 case OMPD_parallel_for:
10462 case OMPD_parallel_master:
10463 case OMPD_parallel_sections:
10464 case OMPD_for_simd:
10465 case OMPD_parallel_for_simd:
10466 case OMPD_cancel:
10467 case OMPD_cancellation_point:
10468 case OMPD_ordered:
10469 case OMPD_threadprivate:
10470 case OMPD_allocate:
10471 case OMPD_task:
10472 case OMPD_simd:
10473 case OMPD_tile:
10474 case OMPD_unroll:
10475 case OMPD_sections:
10476 case OMPD_section:
10477 case OMPD_single:
10478 case OMPD_master:
10479 case OMPD_critical:
10480 case OMPD_taskyield:
10481 case OMPD_barrier:
10482 case OMPD_taskwait:
10483 case OMPD_taskgroup:
10484 case OMPD_atomic:
10485 case OMPD_flush:
10486 case OMPD_depobj:
10487 case OMPD_scan:
10488 case OMPD_teams:
10489 case OMPD_target_data:
10490 case OMPD_target_exit_data:
10491 case OMPD_target_enter_data:
10492 case OMPD_distribute:
10493 case OMPD_distribute_simd:
10494 case OMPD_distribute_parallel_for:
10495 case OMPD_distribute_parallel_for_simd:
10496 case OMPD_teams_distribute:
10497 case OMPD_teams_distribute_simd:
10498 case OMPD_teams_distribute_parallel_for:
10499 case OMPD_teams_distribute_parallel_for_simd:
10500 case OMPD_target_update:
10501 case OMPD_declare_simd:
10502 case OMPD_declare_variant:
10503 case OMPD_begin_declare_variant:
10504 case OMPD_end_declare_variant:
10505 case OMPD_declare_target:
10506 case OMPD_end_declare_target:
10507 case OMPD_declare_reduction:
10508 case OMPD_declare_mapper:
10509 case OMPD_taskloop:
10510 case OMPD_taskloop_simd:
10511 case OMPD_master_taskloop:
10512 case OMPD_master_taskloop_simd:
10513 case OMPD_parallel_master_taskloop:
10514 case OMPD_parallel_master_taskloop_simd:
10515 case OMPD_requires:
10516 case OMPD_metadirective:
10517 case OMPD_unknown:
10518 default:
10519 llvm_unreachable("Unexpected directive.");
10520 }
10521 }
10522
10523 return nullptr;
10524}
10525
10526/// Emit the user-defined mapper function. The code generation follows the
10527/// pattern in the example below.
10528/// \code
10529/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10530/// void *base, void *begin,
10531/// int64_t size, int64_t type,
10532/// void *name = nullptr) {
10533/// // Allocate space for an array section first.
10534/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10535/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10536/// size*sizeof(Ty), clearToFromMember(type));
10537/// // Map members.
10538/// for (unsigned i = 0; i < size; i++) {
10539/// // For each component specified by this mapper:
10540/// for (auto c : begin[i]->all_components) {
10541/// if (c.hasMapper())
10542/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10543/// c.arg_type, c.arg_name);
10544/// else
10545/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10546/// c.arg_begin, c.arg_size, c.arg_type,
10547/// c.arg_name);
10548/// }
10549/// }
10550/// // Delete the array section.
10551/// if (size > 1 && maptype.IsDelete)
10552/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10553/// size*sizeof(Ty), clearToFromMember(type));
10554/// }
10555/// \endcode
10557 CodeGenFunction *CGF) {
10558 if (UDMMap.count(D) > 0)
10559 return;
10560 ASTContext &C = CGM.getContext();
10561 QualType Ty = D->getType();
10562 auto *MapperVarDecl =
10564 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10565 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10566
10567 CodeGenFunction MapperCGF(CGM);
10568 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10569 auto PrivatizeAndGenMapInfoCB =
10570 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10571 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10572 MapperCGF.Builder.restoreIP(CodeGenIP);
10573
10574 // Privatize the declared variable of mapper to be the current array
10575 // element.
10576 Address PtrCurrent(
10577 PtrPHI, ElemTy,
10578 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10579 .getAlignment()
10580 .alignmentOfArrayElement(ElementSize));
10582 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10583 (void)Scope.Privatize();
10584
10585 // Get map clause information.
10586 MappableExprsHandler MEHandler(*D, MapperCGF);
10587 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10588
10589 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10590 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10591 };
10592 if (CGM.getCodeGenOpts().getDebugInfo() !=
10593 llvm::codegenoptions::NoDebugInfo) {
10594 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10595 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10596 FillInfoMap);
10597 }
10598
10599 return CombinedInfo;
10600 };
10601
10602 auto CustomMapperCB = [&](unsigned I) {
10603 llvm::Function *MapperFunc = nullptr;
10604 if (CombinedInfo.Mappers[I]) {
10605 // Call the corresponding mapper function.
10607 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10608 assert(MapperFunc && "Expect a valid mapper function is available.");
10609 }
10610 return MapperFunc;
10611 };
10612
10613 SmallString<64> TyStr;
10614 llvm::raw_svector_ostream Out(TyStr);
10615 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10616 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10617
10618 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10619 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10620 UDMMap.try_emplace(D, NewFn);
10621 if (CGF)
10622 FunctionUDMMap[CGF->CurFn].push_back(D);
10623}
10624
10626 const OMPDeclareMapperDecl *D) {
10627 auto I = UDMMap.find(D);
10628 if (I != UDMMap.end())
10629 return I->second;
10631 return UDMMap.lookup(D);
10632}
10633
10636 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10637 const OMPLoopDirective &D)>
10638 SizeEmitter) {
10639 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10640 const OMPExecutableDirective *TD = &D;
10641 // Get nested teams distribute kind directive, if any. For now, treat
10642 // 'target_teams_loop' as if it's really a target_teams_distribute.
10643 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10644 Kind != OMPD_target_teams_loop)
10645 TD = getNestedDistributeDirective(CGM.getContext(), D);
10646 if (!TD)
10647 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10648
10649 const auto *LD = cast<OMPLoopDirective>(TD);
10650 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10651 return NumIterations;
10652 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10653}
10654
10655static void
10656emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10657 const OMPExecutableDirective &D,
10659 bool RequiresOuterTask, const CapturedStmt &CS,
10660 bool OffloadingMandatory, CodeGenFunction &CGF) {
10661 if (OffloadingMandatory) {
10662 CGF.Builder.CreateUnreachable();
10663 } else {
10664 if (RequiresOuterTask) {
10665 CapturedVars.clear();
10666 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10667 }
10668 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10669 CapturedVars.end());
10670 Args.push_back(llvm::Constant::getNullValue(CGF.Builder.getPtrTy()));
10671 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10672 Args);
10673 }
10674}
10675
10676static llvm::Value *emitDeviceID(
10677 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10678 CodeGenFunction &CGF) {
10679 // Emit device ID if any.
10680 llvm::Value *DeviceID;
10681 if (Device.getPointer()) {
10682 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10683 Device.getInt() == OMPC_DEVICE_device_num) &&
10684 "Expected device_num modifier.");
10685 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10686 DeviceID =
10687 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10688 } else {
10689 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10690 }
10691 return DeviceID;
10692}
10693
10694static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10696 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10697 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10698
10699 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10700 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10701 llvm::Value *DynGPVal =
10702 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10703 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10704 /*isSigned=*/false);
10705 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10706 switch (FallbackModifier) {
10707 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10708 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10709 break;
10710 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10711 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10712 break;
10713 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10715 // This is the default for dyn_groupprivate.
10716 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10717 break;
10718 default:
10719 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10720 }
10721 } else if (auto *OMPXDynCGClause =
10722 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10723 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10724 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10725 /*IgnoreResultAssign=*/true);
10726 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10727 /*isSigned=*/false);
10728 }
10729 return {DynGP, DynGPFallback};
10730}
10731
10733 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10734 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10735 llvm::OpenMPIRBuilder &OMPBuilder,
10736 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10737 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10738
10739 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10740 auto RI = CS.getCapturedRecordDecl()->field_begin();
10741 auto *CV = CapturedVars.begin();
10743 CE = CS.capture_end();
10744 CI != CE; ++CI, ++RI, ++CV) {
10745 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10746
10747 // VLA sizes are passed to the outlined region by copy and do not have map
10748 // information associated.
10749 if (CI->capturesVariableArrayType()) {
10750 CurInfo.Exprs.push_back(nullptr);
10751 CurInfo.BasePointers.push_back(*CV);
10752 CurInfo.DevicePtrDecls.push_back(nullptr);
10753 CurInfo.DevicePointers.push_back(
10754 MappableExprsHandler::DeviceInfoTy::None);
10755 CurInfo.Pointers.push_back(*CV);
10756 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10757 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10758 // Copy to the device as an argument. No need to retrieve it.
10759 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10760 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10761 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10762 CurInfo.Mappers.push_back(nullptr);
10763 } else {
10764 const ValueDecl *CapturedVD =
10765 CI->capturesThis() ? nullptr
10767 bool HasEntryWithCVAsAttachPtr = false;
10768 if (CapturedVD)
10769 HasEntryWithCVAsAttachPtr =
10770 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10771
10772 // Populate component lists for the captured variable from clauses.
10773 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10776 StorageForImplicitlyAddedComponentLists;
10777 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10778 CapturedVD, DeclComponentLists,
10779 StorageForImplicitlyAddedComponentLists);
10780
10781 // OpenMP 6.0, 15.8, target construct, restrictions:
10782 // * A list item in a map clause that is specified on a target construct
10783 // must have a base variable or base pointer.
10784 //
10785 // Map clauses on a target construct must either have a base pointer, or a
10786 // base-variable. So, if we don't have a base-pointer, that means that it
10787 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10788 // etc. In such cases, we do not need to handle default map generation
10789 // for `s`.
10790 bool HasEntryWithoutAttachPtr =
10791 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10793 Components = std::get<0>(MapData);
10794 return !MEHandler.getAttachPtrExpr(Components);
10795 });
10796
10797 // Generate default map info first if there's no direct map with CV as
10798 // the base-variable, or attach pointer.
10799 if (DeclComponentLists.empty() ||
10800 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10801 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10802
10803 // If we have any information in the map clause, we use it, otherwise we
10804 // just do a default mapping.
10805 MEHandler.generateInfoForCaptureFromClauseInfo(
10806 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10807 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10808
10809 if (!CI->capturesThis())
10810 MappedVarSet.insert(CI->getCapturedVar());
10811 else
10812 MappedVarSet.insert(nullptr);
10813
10814 // Generate correct mapping for variables captured by reference in
10815 // lambdas.
10816 if (CI->capturesVariable())
10817 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10818 CurInfo, LambdaPointers);
10819 }
10820 // We expect to have at least an element of information for this capture.
10821 assert(!CurInfo.BasePointers.empty() &&
10822 "Non-existing map pointer for capture!");
10823 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10824 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10825 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10826 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10827 "Inconsistent map information sizes!");
10828
10829 // We need to append the results of this capture to what we already have.
10830 CombinedInfo.append(CurInfo);
10831 }
10832 // Adjust MEMBER_OF flags for the lambdas captures.
10833 MEHandler.adjustMemberOfForLambdaCaptures(
10834 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10835 CombinedInfo.Pointers, CombinedInfo.Types);
10836}
10837static void
10838genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10839 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10840 llvm::OpenMPIRBuilder &OMPBuilder,
10841 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10842 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10843
10844 CodeGenModule &CGM = CGF.CGM;
10845 // Map any list items in a map clause that were not captures because they
10846 // weren't referenced within the construct.
10847 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10848
10849 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10850 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10851 };
10852 if (CGM.getCodeGenOpts().getDebugInfo() !=
10853 llvm::codegenoptions::NoDebugInfo) {
10854 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10855 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10856 FillInfoMap);
10857 }
10858}
10859
10861 const CapturedStmt &CS,
10863 llvm::OpenMPIRBuilder &OMPBuilder,
10864 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10865 // Get mappable expression information.
10866 MappableExprsHandler MEHandler(D, CGF);
10867 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10868
10869 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10870 MappedVarSet, CombinedInfo);
10871 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10872}
10873
10874template <typename ClauseTy>
10875static void
10877 const OMPExecutableDirective &D,
10879 const auto *C = D.getSingleClause<ClauseTy>();
10880 assert(!C->varlist_empty() &&
10881 "ompx_bare requires explicit num_teams and thread_limit");
10883 for (auto *E : C->varlist()) {
10884 llvm::Value *V = CGF.EmitScalarExpr(E);
10885 Values.push_back(
10886 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10887 }
10888}
10889
10891 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10892 const OMPExecutableDirective &D,
10893 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10894 const CapturedStmt &CS, bool OffloadingMandatory,
10895 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10896 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10897 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10898 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10899 const OMPLoopDirective &D)>
10900 SizeEmitter,
10901 CodeGenFunction &CGF, CodeGenModule &CGM) {
10902 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10903
10904 // Fill up the arrays with all the captured variables.
10905 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10907 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10908
10909 // Append a null entry for the implicit dyn_ptr argument.
10910 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10911 auto *NullPtr = llvm::Constant::getNullValue(CGF.Builder.getPtrTy());
10912 CombinedInfo.BasePointers.push_back(NullPtr);
10913 CombinedInfo.Pointers.push_back(NullPtr);
10914 CombinedInfo.DevicePointers.push_back(
10915 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10916 CombinedInfo.Sizes.push_back(CGF.Builder.getInt64(0));
10917 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10918 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10919 if (!CombinedInfo.Names.empty())
10920 CombinedInfo.Names.push_back(NullPtr);
10921 CombinedInfo.Exprs.push_back(nullptr);
10922 CombinedInfo.Mappers.push_back(nullptr);
10923 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10924
10925 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10926 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10927
10928 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10929 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10930 CGF.VoidPtrTy, CGM.getPointerAlign());
10931 InputInfo.PointersArray =
10932 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10933 InputInfo.SizesArray =
10934 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10935 InputInfo.MappersArray =
10936 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10937 MapTypesArray = Info.RTArgs.MapTypesArray;
10938 MapNamesArray = Info.RTArgs.MapNamesArray;
10939
10940 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10941 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10942 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10943 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10944 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10945
10946 if (IsReverseOffloading) {
10947 // Reverse offloading is not supported, so just execute on the host.
10948 // FIXME: This fallback solution is incorrect since it ignores the
10949 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10950 // assert here and ensure SEMA emits an error.
10951 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10952 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10953 return;
10954 }
10955
10956 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10957 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10958
10959 llvm::Value *BasePointersArray =
10960 InputInfo.BasePointersArray.emitRawPointer(CGF);
10961 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10962 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10963 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10964
10965 auto &&EmitTargetCallFallbackCB =
10966 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10967 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10968 -> llvm::OpenMPIRBuilder::InsertPointTy {
10969 CGF.Builder.restoreIP(IP);
10970 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10971 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10972 return CGF.Builder.saveIP();
10973 };
10974
10975 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10978 if (IsBare) {
10981 NumThreads);
10982 } else {
10983 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10984 NumThreads.push_back(
10985 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10986 }
10987
10988 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10989 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10990 llvm::Value *NumIterations =
10991 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10992 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10993 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10994 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10995
10996 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10997 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10998 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10999
11000 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
11001 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
11002 DynCGroupMem, HasNoWait, /*StrictBlocksAndThreads=*/IsBare,
11003 DynCGroupMemFallback);
11004
11005 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11006 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
11007 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
11008 RTLoc, AllocaIP));
11009 CGF.Builder.restoreIP(AfterIP);
11010 };
11011
11012 if (RequiresOuterTask)
11013 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11014 else
11015 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11016}
11017
11018static void
11019emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
11020 const OMPExecutableDirective &D,
11022 bool RequiresOuterTask, const CapturedStmt &CS,
11023 bool OffloadingMandatory, CodeGenFunction &CGF) {
11024
11025 // Notify that the host version must be executed.
11026 auto &&ElseGen =
11027 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11028 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11029 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11030 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11031 };
11032
11033 if (RequiresOuterTask) {
11035 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
11036 } else {
11037 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
11038 }
11039}
11040
11043 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11044 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11045 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11046 const OMPLoopDirective &D)>
11047 SizeEmitter) {
11048 if (!CGF.HaveInsertPoint())
11049 return;
11050
11051 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11052 CGM.getLangOpts().OpenMPOffloadMandatory;
11053
11054 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11055
11056 const bool RequiresOuterTask =
11057 D.hasClausesOfKind<OMPDependClause>() ||
11058 D.hasClausesOfKind<OMPNowaitClause>() ||
11059 D.hasClausesOfKind<OMPInReductionClause>() ||
11060 (CGM.getLangOpts().OpenMP >= 51 &&
11061 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
11062 D.hasClausesOfKind<OMPThreadLimitClause>());
11064 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
11065 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11066 PrePostActionTy &) {
11067 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
11068 };
11069 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
11070
11072 llvm::Value *MapTypesArray = nullptr;
11073 llvm::Value *MapNamesArray = nullptr;
11074
11075 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11076 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11077 OutlinedFnID, &InputInfo, &MapTypesArray,
11078 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11079 PrePostActionTy &) {
11080 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
11081 RequiresOuterTask, CS, OffloadingMandatory,
11082 Device, OutlinedFnID, InputInfo, MapTypesArray,
11083 MapNamesArray, SizeEmitter, CGF, CGM);
11084 };
11085
11086 auto &&TargetElseGen =
11087 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11088 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11089 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11090 CS, OffloadingMandatory, CGF);
11091 };
11092
11093 // If we have a target function ID it means that we need to support
11094 // offloading, otherwise, just execute on the host. We need to execute on host
11095 // regardless of the conditional in the if clause if, e.g., the user do not
11096 // specify target triples.
11097 if (OutlinedFnID) {
11098 if (IfCond) {
11099 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
11100 } else {
11101 RegionCodeGenTy ThenRCG(TargetThenGen);
11102 ThenRCG(CGF);
11103 }
11104 } else {
11105 RegionCodeGenTy ElseRCG(TargetElseGen);
11106 ElseRCG(CGF);
11107 }
11108}
11109
11111 StringRef ParentName) {
11112 if (!S)
11113 return;
11114
11115 // Register vtable from device for target data and target directives.
11116 // Add this block here since scanForTargetRegionsFunctions ignores
11117 // target data by checking if S is a executable directive (target).
11118 if (auto *E = dyn_cast<OMPExecutableDirective>(S);
11119 E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
11120 // Don't need to check if it's device compile
11121 // since scanForTargetRegionsFunctions currently only called
11122 // in device compilation.
11123 registerVTable(*E);
11124 }
11125
11126 // Codegen OMP target directives that offload compute to the device.
11127 bool RequiresDeviceCodegen =
11130 cast<OMPExecutableDirective>(S)->getDirectiveKind());
11131
11132 if (RequiresDeviceCodegen) {
11133 const auto &E = *cast<OMPExecutableDirective>(S);
11134
11135 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11136 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
11137
11138 // Is this a target region that should not be emitted as an entry point? If
11139 // so just signal we are done with this target region.
11140 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11141 return;
11142
11143 switch (E.getDirectiveKind()) {
11144 case OMPD_target:
11147 break;
11148 case OMPD_target_parallel:
11150 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
11151 break;
11152 case OMPD_target_teams:
11154 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
11155 break;
11156 case OMPD_target_teams_distribute:
11159 break;
11160 case OMPD_target_teams_distribute_simd:
11163 break;
11164 case OMPD_target_parallel_for:
11167 break;
11168 case OMPD_target_parallel_for_simd:
11171 break;
11172 case OMPD_target_simd:
11174 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
11175 break;
11176 case OMPD_target_teams_distribute_parallel_for:
11178 CGM, ParentName,
11180 break;
11181 case OMPD_target_teams_distribute_parallel_for_simd:
11184 CGM, ParentName,
11186 break;
11187 case OMPD_target_teams_loop:
11190 break;
11191 case OMPD_target_parallel_loop:
11194 break;
11195 case OMPD_parallel:
11196 case OMPD_for:
11197 case OMPD_parallel_for:
11198 case OMPD_parallel_master:
11199 case OMPD_parallel_sections:
11200 case OMPD_for_simd:
11201 case OMPD_parallel_for_simd:
11202 case OMPD_cancel:
11203 case OMPD_cancellation_point:
11204 case OMPD_ordered:
11205 case OMPD_threadprivate:
11206 case OMPD_allocate:
11207 case OMPD_task:
11208 case OMPD_simd:
11209 case OMPD_tile:
11210 case OMPD_unroll:
11211 case OMPD_sections:
11212 case OMPD_section:
11213 case OMPD_single:
11214 case OMPD_master:
11215 case OMPD_critical:
11216 case OMPD_taskyield:
11217 case OMPD_barrier:
11218 case OMPD_taskwait:
11219 case OMPD_taskgroup:
11220 case OMPD_atomic:
11221 case OMPD_flush:
11222 case OMPD_depobj:
11223 case OMPD_scan:
11224 case OMPD_teams:
11225 case OMPD_target_data:
11226 case OMPD_target_exit_data:
11227 case OMPD_target_enter_data:
11228 case OMPD_distribute:
11229 case OMPD_distribute_simd:
11230 case OMPD_distribute_parallel_for:
11231 case OMPD_distribute_parallel_for_simd:
11232 case OMPD_teams_distribute:
11233 case OMPD_teams_distribute_simd:
11234 case OMPD_teams_distribute_parallel_for:
11235 case OMPD_teams_distribute_parallel_for_simd:
11236 case OMPD_target_update:
11237 case OMPD_declare_simd:
11238 case OMPD_declare_variant:
11239 case OMPD_begin_declare_variant:
11240 case OMPD_end_declare_variant:
11241 case OMPD_declare_target:
11242 case OMPD_end_declare_target:
11243 case OMPD_declare_reduction:
11244 case OMPD_declare_mapper:
11245 case OMPD_taskloop:
11246 case OMPD_taskloop_simd:
11247 case OMPD_master_taskloop:
11248 case OMPD_master_taskloop_simd:
11249 case OMPD_parallel_master_taskloop:
11250 case OMPD_parallel_master_taskloop_simd:
11251 case OMPD_requires:
11252 case OMPD_metadirective:
11253 case OMPD_unknown:
11254 default:
11255 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11256 }
11257 return;
11258 }
11259
11260 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11261 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11262 return;
11263
11264 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11265 return;
11266 }
11267
11268 // If this is a lambda function, look into its body.
11269 if (const auto *L = dyn_cast<LambdaExpr>(S))
11270 S = L->getBody();
11271
11272 // Keep looking for target regions recursively.
11273 for (const Stmt *II : S->children())
11274 scanForTargetRegionsFunctions(II, ParentName);
11275}
11276
11277static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11278 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11279 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11280 if (!DevTy)
11281 return false;
11282 // Do not emit device_type(nohost) functions for the host.
11283 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11284 return true;
11285 // Do not emit device_type(host) functions for the device.
11286 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11287 return true;
11288 return false;
11289}
11290
11292 // If emitting code for the host, we do not process FD here. Instead we do
11293 // the normal code generation.
11294 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11295 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11297 CGM.getLangOpts().OpenMPIsTargetDevice))
11298 return true;
11299 return false;
11300 }
11301
11302 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11303 // Try to detect target regions in the function.
11304 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11305 StringRef Name = CGM.getMangledName(GD);
11308 CGM.getLangOpts().OpenMPIsTargetDevice))
11309 return true;
11310 }
11311
11312 // Do not emit function if it is not marked as declare target.
11313 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11314 AlreadyEmittedTargetDecls.count(VD) == 0;
11315}
11316
11319 CGM.getLangOpts().OpenMPIsTargetDevice))
11320 return true;
11321
11322 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11323 return false;
11324
11325 // Check if there are Ctors/Dtors in this declaration and look for target
11326 // regions in it. We use the complete variant to produce the kernel name
11327 // mangling.
11328 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11329 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11330 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11331 StringRef ParentName =
11332 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11333 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11334 }
11335 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11336 StringRef ParentName =
11337 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11338 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11339 }
11340 }
11341
11342 // Do not emit variable if it is not marked as declare target.
11343 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11344 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11345 cast<VarDecl>(GD.getDecl()));
11346 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11347 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11348 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11351 return true;
11352 }
11353 return false;
11354}
11355
11357 llvm::Constant *Addr) {
11358 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11359 !CGM.getLangOpts().OpenMPIsTargetDevice)
11360 return;
11361
11362 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11363 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11364
11365 // If this is an 'extern' declaration we defer to the canonical definition and
11366 // do not emit an offloading entry.
11367 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11368 VD->hasExternalStorage())
11369 return;
11370
11371 // MT_Local variables use direct access with no host-device mapping.
11372 // No offload entry needed — the device global keeps its own initializer.
11373 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local)
11374 return;
11375
11376 if (!Res) {
11377 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11378 // Register non-target variables being emitted in device code (debug info
11379 // may cause this).
11380 StringRef VarName = CGM.getMangledName(VD);
11381 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11382 }
11383 return;
11384 }
11385
11386 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11387 auto LinkageForVariable = [&VD, this]() {
11388 return CGM.getLLVMLinkageVarDefinition(VD);
11389 };
11390
11391 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11392 OMPBuilder.registerTargetGlobalVariable(
11394 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11395 VD->isExternallyVisible(),
11397 VD->getCanonicalDecl()->getBeginLoc()),
11398 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11399 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11400 CGM.getTypes().ConvertTypeForMem(
11401 CGM.getContext().getPointerType(VD->getType())),
11402 Addr);
11403
11404 for (auto *ref : GeneratedRefs)
11405 CGM.addCompilerUsedGlobal(ref);
11406}
11407
11409 if (isa<FunctionDecl>(GD.getDecl()) ||
11411 return emitTargetFunctions(GD);
11412
11413 return emitTargetGlobalVariable(GD);
11414}
11415
11417 for (const VarDecl *VD : DeferredGlobalVariables) {
11418 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11419 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11420 if (!Res)
11421 continue;
11422 // MT_Local and MT_To/MT_Enter without USM are always emitted.
11423 if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
11424 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11425 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11427 CGM.EmitGlobal(VD);
11428 } else {
11429 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11430 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11431 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11432 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11434 "Expected link clause or to clause with unified memory.");
11435 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11436 }
11437 }
11438}
11439
11441 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11442 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11443 " Expected target-based directive.");
11444}
11445
11447 for (const OMPClause *Clause : D->clauselists()) {
11448 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11450 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11451 } else if (const auto *AC =
11452 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11453 switch (AC->getAtomicDefaultMemOrderKind()) {
11454 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11455 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11456 break;
11457 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11458 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11459 break;
11460 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11461 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11462 break;
11464 break;
11465 }
11466 }
11467 }
11468}
11469
11470llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11472}
11473
11475 LangAS &AS) {
11476 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11477 return false;
11478 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11479 switch(A->getAllocatorType()) {
11480 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11481 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11482 // Not supported, fallback to the default mem space.
11483 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11484 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11485 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11486 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11487 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11488 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11489 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11490 AS = LangAS::Default;
11491 return true;
11492 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11493 llvm_unreachable("Expected predefined allocator for the variables with the "
11494 "static storage.");
11495 }
11496 return false;
11497}
11498
11502
11504 CodeGenModule &CGM)
11505 : CGM(CGM) {
11506 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11507 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11508 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11509 }
11510}
11511
11513 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11514 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11515}
11516
11518 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11519 return true;
11520
11521 const auto *D = cast<FunctionDecl>(GD.getDecl());
11522 // Do not emit function if it is marked as declare target as it was already
11523 // emitted.
11524 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11525 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11526 if (auto *F = dyn_cast_or_null<llvm::Function>(
11527 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11528 return !F->isDeclaration();
11529 return false;
11530 }
11531 return true;
11532 }
11533
11534 return !AlreadyEmittedTargetDecls.insert(D).second;
11535}
11536
11538 const OMPExecutableDirective &D,
11539 SourceLocation Loc,
11540 llvm::Function *OutlinedFn,
11541 ArrayRef<llvm::Value *> CapturedVars) {
11542 if (!CGF.HaveInsertPoint())
11543 return;
11544
11545 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11547
11548 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11549 llvm::Value *Args[] = {
11550 RTLoc,
11551 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11552 OutlinedFn};
11554 RealArgs.append(std::begin(Args), std::end(Args));
11555 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11556
11557 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11558 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11559 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11560}
11561
11563 const Expr *NumTeams,
11564 const Expr *ThreadLimit,
11565 SourceLocation Loc) {
11566 if (!CGF.HaveInsertPoint())
11567 return;
11568
11569 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11570
11571 llvm::Value *NumTeamsVal =
11572 NumTeams
11573 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11574 CGF.CGM.Int32Ty, /* isSigned = */ true)
11575 : CGF.Builder.getInt32(0);
11576
11577 llvm::Value *ThreadLimitVal =
11578 ThreadLimit
11579 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11580 CGF.CGM.Int32Ty, /* isSigned = */ true)
11581 : CGF.Builder.getInt32(0);
11582
11583 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11584 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11585 ThreadLimitVal};
11586 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11587 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11588 PushNumTeamsArgs);
11589}
11590
11592 const Expr *ThreadLimit,
11593 SourceLocation Loc) {
11594 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11595 llvm::Value *ThreadLimitVal =
11596 ThreadLimit
11597 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11598 CGF.CGM.Int32Ty, /* isSigned = */ true)
11599 : CGF.Builder.getInt32(0);
11600
11601 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11602 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11603 ThreadLimitVal};
11604 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11605 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11606 ThreadLimitArgs);
11607}
11608
11610 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11611 const Expr *Device, const RegionCodeGenTy &CodeGen,
11613 if (!CGF.HaveInsertPoint())
11614 return;
11615
11616 // Action used to replace the default codegen action and turn privatization
11617 // off.
11618 PrePostActionTy NoPrivAction;
11619
11620 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11621
11622 llvm::Value *IfCondVal = nullptr;
11623 if (IfCond)
11624 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11625
11626 // Emit device ID if any.
11627 llvm::Value *DeviceID = nullptr;
11628 if (Device) {
11629 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11630 CGF.Int64Ty, /*isSigned=*/true);
11631 } else {
11632 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11633 }
11634
11635 // Fill up the arrays with all the mapped variables.
11636 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11637 auto GenMapInfoCB =
11638 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11639 CGF.Builder.restoreIP(CodeGenIP);
11640 // Get map clause information.
11641 MappableExprsHandler MEHandler(D, CGF);
11642 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11643
11644 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11645 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11646 };
11647 if (CGM.getCodeGenOpts().getDebugInfo() !=
11648 llvm::codegenoptions::NoDebugInfo) {
11649 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11650 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11651 FillInfoMap);
11652 }
11653
11654 return CombinedInfo;
11655 };
11656 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11657 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11658 CGF.Builder.restoreIP(CodeGenIP);
11659 switch (BodyGenType) {
11660 case BodyGenTy::Priv:
11661 if (!Info.CaptureDeviceAddrMap.empty())
11662 CodeGen(CGF);
11663 break;
11664 case BodyGenTy::DupNoPriv:
11665 if (!Info.CaptureDeviceAddrMap.empty()) {
11666 CodeGen.setAction(NoPrivAction);
11667 CodeGen(CGF);
11668 }
11669 break;
11670 case BodyGenTy::NoPriv:
11671 if (Info.CaptureDeviceAddrMap.empty()) {
11672 CodeGen.setAction(NoPrivAction);
11673 CodeGen(CGF);
11674 }
11675 break;
11676 }
11677 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11678 CGF.Builder.GetInsertPoint());
11679 };
11680
11681 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11682 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11683 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11684 }
11685 };
11686
11687 auto CustomMapperCB = [&](unsigned int I) {
11688 llvm::Function *MFunc = nullptr;
11689 if (CombinedInfo.Mappers[I]) {
11690 Info.HasMapper = true;
11692 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11693 }
11694 return MFunc;
11695 };
11696
11697 // Source location for the ident struct
11698 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11699
11700 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11701 CGF.AllocaInsertPt->getIterator());
11702 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11703 CGF.Builder.GetInsertPoint());
11704 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11705 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11706 cantFail(OMPBuilder.createTargetData(
11707 OmpLoc, AllocaIP, CodeGenIP, /*DeallocBlocks=*/{}, DeviceID,
11708 IfCondVal, Info, GenMapInfoCB, CustomMapperCB,
11709 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11710 CGF.Builder.restoreIP(AfterIP);
11711}
11712
11714 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11715 const Expr *Device) {
11716 if (!CGF.HaveInsertPoint())
11717 return;
11718
11722 "Expecting either target enter, exit data, or update directives.");
11723
11725 llvm::Value *MapTypesArray = nullptr;
11726 llvm::Value *MapNamesArray = nullptr;
11727 // Generate the code for the opening of the data environment.
11728 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11729 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11730 // Emit device ID if any.
11731 llvm::Value *DeviceID = nullptr;
11732 if (Device) {
11733 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11734 CGF.Int64Ty, /*isSigned=*/true);
11735 } else {
11736 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11737 }
11738
11739 // Emit the number of elements in the offloading arrays.
11740 llvm::Constant *PointerNum =
11741 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11742
11743 // Source location for the ident struct
11744 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11745
11746 SmallVector<llvm::Value *, 13> OffloadingArgs(
11747 {RTLoc, DeviceID, PointerNum,
11748 InputInfo.BasePointersArray.emitRawPointer(CGF),
11749 InputInfo.PointersArray.emitRawPointer(CGF),
11750 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11751 InputInfo.MappersArray.emitRawPointer(CGF)});
11752
11753 // Select the right runtime function call for each standalone
11754 // directive.
11755 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11756 RuntimeFunction RTLFn;
11757 switch (D.getDirectiveKind()) {
11758 case OMPD_target_enter_data:
11759 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11760 : OMPRTL___tgt_target_data_begin_mapper;
11761 break;
11762 case OMPD_target_exit_data:
11763 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11764 : OMPRTL___tgt_target_data_end_mapper;
11765 break;
11766 case OMPD_target_update:
11767 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11768 : OMPRTL___tgt_target_data_update_mapper;
11769 break;
11770 case OMPD_parallel:
11771 case OMPD_for:
11772 case OMPD_parallel_for:
11773 case OMPD_parallel_master:
11774 case OMPD_parallel_sections:
11775 case OMPD_for_simd:
11776 case OMPD_parallel_for_simd:
11777 case OMPD_cancel:
11778 case OMPD_cancellation_point:
11779 case OMPD_ordered:
11780 case OMPD_threadprivate:
11781 case OMPD_allocate:
11782 case OMPD_task:
11783 case OMPD_simd:
11784 case OMPD_tile:
11785 case OMPD_unroll:
11786 case OMPD_sections:
11787 case OMPD_section:
11788 case OMPD_single:
11789 case OMPD_master:
11790 case OMPD_critical:
11791 case OMPD_taskyield:
11792 case OMPD_barrier:
11793 case OMPD_taskwait:
11794 case OMPD_taskgroup:
11795 case OMPD_atomic:
11796 case OMPD_flush:
11797 case OMPD_depobj:
11798 case OMPD_scan:
11799 case OMPD_teams:
11800 case OMPD_target_data:
11801 case OMPD_distribute:
11802 case OMPD_distribute_simd:
11803 case OMPD_distribute_parallel_for:
11804 case OMPD_distribute_parallel_for_simd:
11805 case OMPD_teams_distribute:
11806 case OMPD_teams_distribute_simd:
11807 case OMPD_teams_distribute_parallel_for:
11808 case OMPD_teams_distribute_parallel_for_simd:
11809 case OMPD_declare_simd:
11810 case OMPD_declare_variant:
11811 case OMPD_begin_declare_variant:
11812 case OMPD_end_declare_variant:
11813 case OMPD_declare_target:
11814 case OMPD_end_declare_target:
11815 case OMPD_declare_reduction:
11816 case OMPD_declare_mapper:
11817 case OMPD_taskloop:
11818 case OMPD_taskloop_simd:
11819 case OMPD_master_taskloop:
11820 case OMPD_master_taskloop_simd:
11821 case OMPD_parallel_master_taskloop:
11822 case OMPD_parallel_master_taskloop_simd:
11823 case OMPD_target:
11824 case OMPD_target_simd:
11825 case OMPD_target_teams_distribute:
11826 case OMPD_target_teams_distribute_simd:
11827 case OMPD_target_teams_distribute_parallel_for:
11828 case OMPD_target_teams_distribute_parallel_for_simd:
11829 case OMPD_target_teams:
11830 case OMPD_target_parallel:
11831 case OMPD_target_parallel_for:
11832 case OMPD_target_parallel_for_simd:
11833 case OMPD_requires:
11834 case OMPD_metadirective:
11835 case OMPD_unknown:
11836 default:
11837 llvm_unreachable("Unexpected standalone target data directive.");
11838 break;
11839 }
11840 if (HasNowait) {
11841 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11842 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11843 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11844 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11845 }
11846 CGF.EmitRuntimeCall(
11847 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11848 OffloadingArgs);
11849 };
11850
11851 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11852 &MapNamesArray](CodeGenFunction &CGF,
11853 PrePostActionTy &) {
11854 // Fill up the arrays with all the mapped variables.
11855 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11857 MappableExprsHandler MEHandler(D, CGF);
11858 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11859 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11860 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11861
11862 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11863 D.hasClausesOfKind<OMPNowaitClause>();
11864
11865 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11866 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11867 CGF.VoidPtrTy, CGM.getPointerAlign());
11868 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11869 CGM.getPointerAlign());
11870 InputInfo.SizesArray =
11871 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11872 InputInfo.MappersArray =
11873 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11874 MapTypesArray = Info.RTArgs.MapTypesArray;
11875 MapNamesArray = Info.RTArgs.MapNamesArray;
11876 if (RequiresOuterTask)
11877 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11878 else
11879 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11880 };
11881
11882 if (IfCond) {
11883 emitIfClause(CGF, IfCond, TargetThenGen,
11884 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11885 } else {
11886 RegionCodeGenTy ThenRCG(TargetThenGen);
11887 ThenRCG(CGF);
11888 }
11889}
11890
11891static unsigned
11894 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11895 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11896 // of that clause. The VLEN value must be power of 2.
11897 // In other case the notion of the function`s "characteristic data type" (CDT)
11898 // is used to compute the vector length.
11899 // CDT is defined in the following order:
11900 // a) For non-void function, the CDT is the return type.
11901 // b) If the function has any non-uniform, non-linear parameters, then the
11902 // CDT is the type of the first such parameter.
11903 // c) If the CDT determined by a) or b) above is struct, union, or class
11904 // type which is pass-by-value (except for the type that maps to the
11905 // built-in complex data type), the characteristic data type is int.
11906 // d) If none of the above three cases is applicable, the CDT is int.
11907 // The VLEN is then determined based on the CDT and the size of vector
11908 // register of that ISA for which current vector version is generated. The
11909 // VLEN is computed using the formula below:
11910 // VLEN = sizeof(vector_register) / sizeof(CDT),
11911 // where vector register size specified in section 3.2.1 Registers and the
11912 // Stack Frame of original AMD64 ABI document.
11913 QualType RetType = FD->getReturnType();
11914 if (RetType.isNull())
11915 return 0;
11916 ASTContext &C = FD->getASTContext();
11917 QualType CDT;
11918 if (!RetType.isNull() && !RetType->isVoidType()) {
11919 CDT = RetType;
11920 } else {
11921 unsigned Offset = 0;
11922 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11923 if (ParamAttrs[Offset].Kind ==
11924 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11925 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11926 ++Offset;
11927 }
11928 if (CDT.isNull()) {
11929 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11930 if (ParamAttrs[I + Offset].Kind ==
11931 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11932 CDT = FD->getParamDecl(I)->getType();
11933 break;
11934 }
11935 }
11936 }
11937 }
11938 if (CDT.isNull())
11939 CDT = C.IntTy;
11940 CDT = CDT->getCanonicalTypeUnqualified();
11941 if (CDT->isRecordType() || CDT->isUnionType())
11942 CDT = C.IntTy;
11943 return C.getTypeSize(CDT);
11944}
11945
11946// This are the Functions that are needed to mangle the name of the
11947// vector functions generated by the compiler, according to the rules
11948// defined in the "Vector Function ABI specifications for AArch64",
11949// available at
11950// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11951
11952/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11954 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11955 QT = QT.getCanonicalType();
11956
11957 if (QT->isVoidType())
11958 return false;
11959
11960 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11961 return false;
11962
11963 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11964 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11965 return false;
11966
11967 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11968 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11969 !QT->isReferenceType())
11970 return false;
11971
11972 return true;
11973}
11974
11975/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11977 QT = QT.getCanonicalType();
11978 unsigned Size = C.getTypeSize(QT);
11979
11980 // Only scalars and complex within 16 bytes wide set PVB to true.
11981 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11982 return false;
11983
11984 if (QT->isFloatingType())
11985 return true;
11986
11987 if (QT->isIntegerType())
11988 return true;
11989
11990 if (QT->isPointerType())
11991 return true;
11992
11993 // TODO: Add support for complex types (section 3.1.2, item 2).
11994
11995 return false;
11996}
11997
11998/// Computes the lane size (LS) of a return type or of an input parameter,
11999/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
12000/// TODO: Add support for references, section 3.2.1, item 1.
12001static unsigned getAArch64LS(QualType QT,
12002 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
12003 ASTContext &C) {
12004 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
12006 if (getAArch64PBV(PTy, C))
12007 return C.getTypeSize(PTy);
12008 }
12009 if (getAArch64PBV(QT, C))
12010 return C.getTypeSize(QT);
12011
12012 return C.getTypeSize(C.getUIntPtrType());
12013}
12014
12015// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12016// signature of the scalar function, as defined in 3.2.2 of the
12017// AAVFABI.
12018static std::tuple<unsigned, unsigned, bool>
12021 QualType RetType = FD->getReturnType().getCanonicalType();
12022
12023 ASTContext &C = FD->getASTContext();
12024
12025 bool OutputBecomesInput = false;
12026
12028 if (!RetType->isVoidType()) {
12029 Sizes.push_back(getAArch64LS(
12030 RetType, llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12031 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
12032 OutputBecomesInput = true;
12033 }
12034 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12036 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
12037 }
12038
12039 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12040 // The LS of a function parameter / return value can only be a power
12041 // of 2, starting from 8 bits, up to 128.
12042 assert(llvm::all_of(Sizes,
12043 [](unsigned Size) {
12044 return Size == 8 || Size == 16 || Size == 32 ||
12045 Size == 64 || Size == 128;
12046 }) &&
12047 "Invalid size");
12048
12049 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
12050 OutputBecomesInput);
12051}
12052
12053static llvm::OpenMPIRBuilder::DeclareSimdBranch
12054convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12055 switch (State) {
12056 case OMPDeclareSimdDeclAttr::BS_Undefined:
12057 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12058 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12059 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12060 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12061 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12062 }
12063 llvm_unreachable("unexpected declare simd branch state");
12064}
12065
12066// Check the values provided via `simdlen` by the user.
12068 unsigned UserVLEN, unsigned WDS, char ISA) {
12069 // 1. A `simdlen(1)` doesn't produce vector signatures.
12070 if (UserVLEN == 1) {
12071 CGM.getDiags().Report(SLoc, diag::warn_simdlen_1_no_effect);
12072 return false;
12073 }
12074
12075 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12076 // SIMD.
12077 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12078 CGM.getDiags().Report(SLoc, diag::warn_simdlen_requires_power_of_2);
12079 return false;
12080 }
12081
12082 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12083 if (ISA == 's' && UserVLEN != 0 &&
12084 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12085 CGM.getDiags().Report(SLoc, diag::warn_simdlen_must_fit_lanes) << WDS;
12086 return false;
12087 }
12088
12089 return true;
12090}
12091
12093 llvm::Function *Fn) {
12094 ASTContext &C = CGM.getContext();
12095 FD = FD->getMostRecentDecl();
12096 while (FD) {
12097 // Map params to their positions in function decl.
12098 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12099 if (isa<CXXMethodDecl>(FD))
12100 ParamPositions.try_emplace(FD, 0);
12101 unsigned ParamPos = ParamPositions.size();
12102 for (const ParmVarDecl *P : FD->parameters()) {
12103 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12104 ++ParamPos;
12105 }
12106 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12108 ParamPositions.size());
12109 // Mark uniform parameters.
12110 for (const Expr *E : Attr->uniforms()) {
12111 E = E->IgnoreParenImpCasts();
12112 unsigned Pos;
12113 if (isa<CXXThisExpr>(E)) {
12114 Pos = ParamPositions[FD];
12115 } else {
12116 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12117 ->getCanonicalDecl();
12118 auto It = ParamPositions.find(PVD);
12119 assert(It != ParamPositions.end() && "Function parameter not found");
12120 Pos = It->second;
12121 }
12122 ParamAttrs[Pos].Kind =
12123 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12124 }
12125 // Get alignment info.
12126 auto *NI = Attr->alignments_begin();
12127 for (const Expr *E : Attr->aligneds()) {
12128 E = E->IgnoreParenImpCasts();
12129 unsigned Pos;
12130 QualType ParmTy;
12131 if (isa<CXXThisExpr>(E)) {
12132 Pos = ParamPositions[FD];
12133 ParmTy = E->getType();
12134 } else {
12135 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12136 ->getCanonicalDecl();
12137 auto It = ParamPositions.find(PVD);
12138 assert(It != ParamPositions.end() && "Function parameter not found");
12139 Pos = It->second;
12140 ParmTy = PVD->getType();
12141 }
12142 ParamAttrs[Pos].Alignment =
12143 (*NI)
12144 ? (*NI)->EvaluateKnownConstInt(C)
12145 : llvm::APSInt::getUnsigned(
12146 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12147 .getQuantity());
12148 ++NI;
12149 }
12150 // Mark linear parameters.
12151 auto *SI = Attr->steps_begin();
12152 auto *MI = Attr->modifiers_begin();
12153 for (const Expr *E : Attr->linears()) {
12154 E = E->IgnoreParenImpCasts();
12155 unsigned Pos;
12156 bool IsReferenceType = false;
12157 // Rescaling factor needed to compute the linear parameter
12158 // value in the mangled name.
12159 unsigned PtrRescalingFactor = 1;
12160 if (isa<CXXThisExpr>(E)) {
12161 Pos = ParamPositions[FD];
12162 auto *P = cast<PointerType>(E->getType());
12163 PtrRescalingFactor = CGM.getContext()
12164 .getTypeSizeInChars(P->getPointeeType())
12165 .getQuantity();
12166 } else {
12167 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12168 ->getCanonicalDecl();
12169 auto It = ParamPositions.find(PVD);
12170 assert(It != ParamPositions.end() && "Function parameter not found");
12171 Pos = It->second;
12172 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12173 PtrRescalingFactor = CGM.getContext()
12174 .getTypeSizeInChars(P->getPointeeType())
12175 .getQuantity();
12176 else if (PVD->getType()->isReferenceType()) {
12177 IsReferenceType = true;
12178 PtrRescalingFactor =
12179 CGM.getContext()
12180 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12181 .getQuantity();
12182 }
12183 }
12184 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12185 if (*MI == OMPC_LINEAR_ref)
12186 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12187 else if (*MI == OMPC_LINEAR_uval)
12188 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12189 else if (IsReferenceType)
12190 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12191 else
12192 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12193 // Assuming a stride of 1, for `linear` without modifiers.
12194 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12195 if (*SI) {
12197 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12198 if (const auto *DRE =
12199 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12200 if (const auto *StridePVD =
12201 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12202 ParamAttr.HasVarStride = true;
12203 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12204 assert(It != ParamPositions.end() &&
12205 "Function parameter not found");
12206 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12207 }
12208 }
12209 } else {
12210 ParamAttr.StrideOrArg = Result.Val.getInt();
12211 }
12212 }
12213 // If we are using a linear clause on a pointer, we need to
12214 // rescale the value of linear_step with the byte size of the
12215 // pointee type.
12216 if (!ParamAttr.HasVarStride &&
12217 (ParamAttr.Kind ==
12218 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12219 ParamAttr.Kind ==
12220 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12221 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12222 ++SI;
12223 ++MI;
12224 }
12225 llvm::APSInt VLENVal;
12226 SourceLocation ExprLoc;
12227 const Expr *VLENExpr = Attr->getSimdlen();
12228 if (VLENExpr) {
12229 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12230 ExprLoc = VLENExpr->getExprLoc();
12231 }
12232 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12233 convertDeclareSimdBranch(Attr->getBranchState());
12234 if (CGM.getTriple().isX86()) {
12235 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12236 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12237 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElts, VLENVal, ParamAttrs,
12238 State);
12239 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12240 unsigned VLEN = VLENVal.getExtValue();
12241 // Get basic data for building the vector signature.
12242 const auto Data = getNDSWDS(FD, ParamAttrs);
12243 const unsigned NDS = std::get<0>(Data);
12244 const unsigned WDS = std::get<1>(Data);
12245 const bool OutputBecomesInput = std::get<2>(Data);
12246 if (CGM.getTarget().hasFeature("sve")) {
12247 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 's'))
12248 OMPBuilder.emitAArch64DeclareSimdFunction(
12249 Fn, VLEN, ParamAttrs, State, 's', NDS, OutputBecomesInput);
12250 } else if (CGM.getTarget().hasFeature("neon")) {
12251 if (validateAArch64Simdlen(CGM, ExprLoc, VLEN, WDS, 'n'))
12252 OMPBuilder.emitAArch64DeclareSimdFunction(
12253 Fn, VLEN, ParamAttrs, State, 'n', NDS, OutputBecomesInput);
12254 }
12255 }
12256 }
12257 FD = FD->getPreviousDecl();
12258 }
12259}
12260
12261namespace {
12262/// Cleanup action for doacross support.
12263class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12264public:
12265 static const int DoacrossFinArgs = 2;
12266
12267private:
12268 llvm::FunctionCallee RTLFn;
12269 llvm::Value *Args[DoacrossFinArgs];
12270
12271public:
12272 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12273 ArrayRef<llvm::Value *> CallArgs)
12274 : RTLFn(RTLFn) {
12275 assert(CallArgs.size() == DoacrossFinArgs);
12276 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12277 }
12278 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12279 if (!CGF.HaveInsertPoint())
12280 return;
12281 CGF.EmitRuntimeCall(RTLFn, Args);
12282 }
12283};
12284} // namespace
12285
12287 const OMPLoopDirective &D,
12288 ArrayRef<Expr *> NumIterations) {
12289 if (!CGF.HaveInsertPoint())
12290 return;
12291
12292 ASTContext &C = CGM.getContext();
12293 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12294 RecordDecl *RD;
12295 if (KmpDimTy.isNull()) {
12296 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12297 // kmp_int64 lo; // lower
12298 // kmp_int64 up; // upper
12299 // kmp_int64 st; // stride
12300 // };
12301 RD = C.buildImplicitRecord("kmp_dim");
12302 RD->startDefinition();
12303 addFieldToRecordDecl(C, RD, Int64Ty);
12304 addFieldToRecordDecl(C, RD, Int64Ty);
12305 addFieldToRecordDecl(C, RD, Int64Ty);
12306 RD->completeDefinition();
12307 KmpDimTy = C.getCanonicalTagType(RD);
12308 } else {
12309 RD = KmpDimTy->castAsRecordDecl();
12310 }
12311 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12312 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12314
12315 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12316 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12317 enum { LowerFD = 0, UpperFD, StrideFD };
12318 // Fill dims with data.
12319 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12320 LValue DimsLVal = CGF.MakeAddrLValue(
12321 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12322 // dims.upper = num_iterations;
12323 LValue UpperLVal = CGF.EmitLValueForField(
12324 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12325 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12326 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12327 Int64Ty, NumIterations[I]->getExprLoc());
12328 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12329 // dims.stride = 1;
12330 LValue StrideLVal = CGF.EmitLValueForField(
12331 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12332 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12333 StrideLVal);
12334 }
12335
12336 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12337 // kmp_int32 num_dims, struct kmp_dim * dims);
12338 llvm::Value *Args[] = {
12339 emitUpdateLocation(CGF, D.getBeginLoc()),
12340 getThreadID(CGF, D.getBeginLoc()),
12341 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12343 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12344 CGM.VoidPtrTy)};
12345
12346 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12347 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12348 CGF.EmitRuntimeCall(RTLFn, Args);
12349 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12350 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12351 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12352 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12353 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12354 llvm::ArrayRef(FiniArgs));
12355}
12356
12357template <typename T>
12359 const T *C, llvm::Value *ULoc,
12360 llvm::Value *ThreadID) {
12361 QualType Int64Ty =
12362 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12363 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12365 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12366 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12367 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12368 const Expr *CounterVal = C->getLoopData(I);
12369 assert(CounterVal);
12370 llvm::Value *CntVal = CGF.EmitScalarConversion(
12371 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12372 CounterVal->getExprLoc());
12373 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12374 /*Volatile=*/false, Int64Ty);
12375 }
12376 llvm::Value *Args[] = {
12377 ULoc, ThreadID,
12378 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12379 llvm::FunctionCallee RTLFn;
12380 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12381 OMPDoacrossKind<T> ODK;
12382 if (ODK.isSource(C)) {
12383 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12384 OMPRTL___kmpc_doacross_post);
12385 } else {
12386 assert(ODK.isSink(C) && "Expect sink modifier.");
12387 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12388 OMPRTL___kmpc_doacross_wait);
12389 }
12390 CGF.EmitRuntimeCall(RTLFn, Args);
12391}
12392
12394 const OMPDependClause *C) {
12396 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12397 getThreadID(CGF, C->getBeginLoc()));
12398}
12399
12401 const OMPDoacrossClause *C) {
12403 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12404 getThreadID(CGF, C->getBeginLoc()));
12405}
12406
12408 llvm::FunctionCallee Callee,
12409 ArrayRef<llvm::Value *> Args) const {
12410 assert(Loc.isValid() && "Outlined function call location must be valid.");
12412
12413 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12414 if (Fn->doesNotThrow()) {
12415 CGF.EmitNounwindRuntimeCall(Fn, Args);
12416 return;
12417 }
12418 }
12419 CGF.EmitRuntimeCall(Callee, Args);
12420}
12421
12423 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12424 ArrayRef<llvm::Value *> Args) const {
12425 emitCall(CGF, Loc, OutlinedFn, Args);
12426}
12427
12429 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12430 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12432}
12433
12435 const VarDecl *NativeParam,
12436 const VarDecl *TargetParam) const {
12437 return CGF.GetAddrOfLocalVar(NativeParam);
12438}
12439
12440/// Return allocator value from expression, or return a null allocator (default
12441/// when no allocator specified).
12442static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12443 const Expr *Allocator) {
12444 llvm::Value *AllocVal;
12445 if (Allocator) {
12446 AllocVal = CGF.EmitScalarExpr(Allocator);
12447 // According to the standard, the original allocator type is a enum
12448 // (integer). Convert to pointer type, if required.
12449 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12450 CGF.getContext().VoidPtrTy,
12451 Allocator->getExprLoc());
12452 } else {
12453 // If no allocator specified, it defaults to the null allocator.
12454 AllocVal = llvm::Constant::getNullValue(
12456 }
12457 return AllocVal;
12458}
12459
12460/// Return the alignment from an allocate directive if present.
12461static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12462 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12463
12464 if (!AllocateAlignment)
12465 return nullptr;
12466
12467 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12468}
12469
12471 const VarDecl *VD) {
12472 if (!VD)
12473 return Address::invalid();
12474 Address UntiedAddr = Address::invalid();
12475 Address UntiedRealAddr = Address::invalid();
12476 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12477 if (It != FunctionToUntiedTaskStackMap.end()) {
12478 const UntiedLocalVarsAddressesMap &UntiedData =
12479 UntiedLocalVarsStack[It->second];
12480 auto I = UntiedData.find(VD);
12481 if (I != UntiedData.end()) {
12482 UntiedAddr = I->second.first;
12483 UntiedRealAddr = I->second.second;
12484 }
12485 }
12486 const VarDecl *CVD = VD->getCanonicalDecl();
12487 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12488 // Use the default allocation.
12489 if (!isAllocatableDecl(VD))
12490 return UntiedAddr;
12491 llvm::Value *Size;
12492 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12493 if (CVD->getType()->isVariablyModifiedType()) {
12494 Size = CGF.getTypeSize(CVD->getType());
12495 // Align the size: ((size + align - 1) / align) * align
12496 Size = CGF.Builder.CreateNUWAdd(
12497 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12498 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12499 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12500 } else {
12501 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12502 Size = CGM.getSize(Sz.alignTo(Align));
12503 }
12504 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12505 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12506 const Expr *Allocator = AA->getAllocator();
12507 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12508 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12510 Args.push_back(ThreadID);
12511 if (Alignment)
12512 Args.push_back(Alignment);
12513 Args.push_back(Size);
12514 Args.push_back(AllocVal);
12515 llvm::omp::RuntimeFunction FnID =
12516 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12517 llvm::Value *Addr = CGF.EmitRuntimeCall(
12518 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12519 getName({CVD->getName(), ".void.addr"}));
12520 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12521 CGM.getModule(), OMPRTL___kmpc_free);
12522 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12524 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12525 if (UntiedAddr.isValid())
12526 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12527
12528 // Cleanup action for allocate support.
12529 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12530 llvm::FunctionCallee RTLFn;
12531 SourceLocation::UIntTy LocEncoding;
12532 Address Addr;
12533 const Expr *AllocExpr;
12534
12535 public:
12536 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12537 SourceLocation::UIntTy LocEncoding, Address Addr,
12538 const Expr *AllocExpr)
12539 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12540 AllocExpr(AllocExpr) {}
12541 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12542 if (!CGF.HaveInsertPoint())
12543 return;
12544 llvm::Value *Args[3];
12545 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12546 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12548 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12549 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12550 Args[2] = AllocVal;
12551 CGF.EmitRuntimeCall(RTLFn, Args);
12552 }
12553 };
12554 Address VDAddr =
12555 UntiedRealAddr.isValid()
12556 ? UntiedRealAddr
12557 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12558 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12559 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12560 VDAddr, Allocator);
12561 if (UntiedRealAddr.isValid())
12562 if (auto *Region =
12563 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12564 Region->emitUntiedSwitch(CGF);
12565 return VDAddr;
12566 }
12567 return UntiedAddr;
12568}
12569
12571 const VarDecl *VD) const {
12572 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12573 if (It == FunctionToUntiedTaskStackMap.end())
12574 return false;
12575 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12576}
12577
12579 CodeGenModule &CGM, const OMPLoopDirective &S)
12580 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12581 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12582 if (!NeedToPush)
12583 return;
12585 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12586 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12587 for (const Stmt *Ref : C->private_refs()) {
12588 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12589 const ValueDecl *VD;
12590 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12591 VD = DRE->getDecl();
12592 } else {
12593 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12594 assert((ME->isImplicitCXXThis() ||
12595 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12596 "Expected member of current class.");
12597 VD = ME->getMemberDecl();
12598 }
12599 DS.insert(VD);
12600 }
12601 }
12602}
12603
12605 if (!NeedToPush)
12606 return;
12607 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12608}
12609
12611 CodeGenFunction &CGF,
12612 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12613 std::pair<Address, Address>> &LocalVars)
12614 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12615 if (!NeedToPush)
12616 return;
12617 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12618 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12619 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12620}
12621
12623 if (!NeedToPush)
12624 return;
12625 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12626}
12627
12629 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12630
12631 return llvm::any_of(
12632 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12633 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12634}
12635
12636void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12637 const OMPExecutableDirective &S,
12638 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12639 const {
12640 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12641 // Vars in target/task regions must be excluded completely.
12642 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12643 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12645 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12646 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12647 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12648 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12649 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12650 }
12651 }
12652 // Exclude vars in private clauses.
12653 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12654 for (const Expr *Ref : C->varlist()) {
12655 if (!Ref->getType()->isScalarType())
12656 continue;
12657 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12658 if (!DRE)
12659 continue;
12660 NeedToCheckForLPCs.insert(DRE->getDecl());
12661 }
12662 }
12663 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12664 for (const Expr *Ref : C->varlist()) {
12665 if (!Ref->getType()->isScalarType())
12666 continue;
12667 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12668 if (!DRE)
12669 continue;
12670 NeedToCheckForLPCs.insert(DRE->getDecl());
12671 }
12672 }
12673 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12674 for (const Expr *Ref : C->varlist()) {
12675 if (!Ref->getType()->isScalarType())
12676 continue;
12677 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12678 if (!DRE)
12679 continue;
12680 NeedToCheckForLPCs.insert(DRE->getDecl());
12681 }
12682 }
12683 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12684 for (const Expr *Ref : C->varlist()) {
12685 if (!Ref->getType()->isScalarType())
12686 continue;
12687 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12688 if (!DRE)
12689 continue;
12690 NeedToCheckForLPCs.insert(DRE->getDecl());
12691 }
12692 }
12693 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12694 for (const Expr *Ref : C->varlist()) {
12695 if (!Ref->getType()->isScalarType())
12696 continue;
12697 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12698 if (!DRE)
12699 continue;
12700 NeedToCheckForLPCs.insert(DRE->getDecl());
12701 }
12702 }
12703 for (const Decl *VD : NeedToCheckForLPCs) {
12704 for (const LastprivateConditionalData &Data :
12705 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12706 if (Data.DeclToUniqueName.count(VD) > 0) {
12707 if (!Data.Disabled)
12708 NeedToAddForLPCsAsDisabled.insert(VD);
12709 break;
12710 }
12711 }
12712 }
12713}
12714
12715CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12716 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12717 : CGM(CGF.CGM),
12718 Action((CGM.getLangOpts().OpenMP >= 50 &&
12719 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12720 [](const OMPLastprivateClause *C) {
12721 return C->getKind() ==
12722 OMPC_LASTPRIVATE_conditional;
12723 }))
12724 ? ActionToDo::PushAsLastprivateConditional
12725 : ActionToDo::DoNotPush) {
12726 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12727 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12728 return;
12729 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12730 "Expected a push action.");
12732 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12733 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12734 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12735 continue;
12736
12737 for (const Expr *Ref : C->varlist()) {
12738 Data.DeclToUniqueName.insert(std::make_pair(
12739 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12740 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12741 }
12742 }
12743 Data.IVLVal = IVLVal;
12744 Data.Fn = CGF.CurFn;
12745}
12746
12747CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12749 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12750 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12751 if (CGM.getLangOpts().OpenMP < 50)
12752 return;
12753 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12754 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12755 if (!NeedToAddForLPCsAsDisabled.empty()) {
12756 Action = ActionToDo::DisableLastprivateConditional;
12757 LastprivateConditionalData &Data =
12759 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12760 Data.DeclToUniqueName.try_emplace(VD);
12761 Data.Fn = CGF.CurFn;
12762 Data.Disabled = true;
12763 }
12764}
12765
12766CGOpenMPRuntime::LastprivateConditionalRAII
12768 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12769 return LastprivateConditionalRAII(CGF, S);
12770}
12771
12773 if (CGM.getLangOpts().OpenMP < 50)
12774 return;
12775 if (Action == ActionToDo::DisableLastprivateConditional) {
12776 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12777 "Expected list of disabled private vars.");
12778 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12779 }
12780 if (Action == ActionToDo::PushAsLastprivateConditional) {
12781 assert(
12782 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12783 "Expected list of lastprivate conditional vars.");
12784 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12785 }
12786}
12787
12789 const VarDecl *VD) {
12790 ASTContext &C = CGM.getContext();
12791 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12792 QualType NewType;
12793 const FieldDecl *VDField;
12794 const FieldDecl *FiredField;
12795 LValue BaseLVal;
12796 auto VI = I->getSecond().find(VD);
12797 if (VI == I->getSecond().end()) {
12798 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12799 RD->startDefinition();
12800 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12801 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12802 RD->completeDefinition();
12803 NewType = C.getCanonicalTagType(RD);
12804 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12805 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12806 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12807 } else {
12808 NewType = std::get<0>(VI->getSecond());
12809 VDField = std::get<1>(VI->getSecond());
12810 FiredField = std::get<2>(VI->getSecond());
12811 BaseLVal = std::get<3>(VI->getSecond());
12812 }
12813 LValue FiredLVal =
12814 CGF.EmitLValueForField(BaseLVal, FiredField);
12816 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12817 FiredLVal);
12818 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12819}
12820
12821namespace {
12822/// Checks if the lastprivate conditional variable is referenced in LHS.
12823class LastprivateConditionalRefChecker final
12824 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12826 const Expr *FoundE = nullptr;
12827 const Decl *FoundD = nullptr;
12828 StringRef UniqueDeclName;
12829 LValue IVLVal;
12830 llvm::Function *FoundFn = nullptr;
12831 SourceLocation Loc;
12832
12833public:
12834 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12836 llvm::reverse(LPM)) {
12837 auto It = D.DeclToUniqueName.find(E->getDecl());
12838 if (It == D.DeclToUniqueName.end())
12839 continue;
12840 if (D.Disabled)
12841 return false;
12842 FoundE = E;
12843 FoundD = E->getDecl()->getCanonicalDecl();
12844 UniqueDeclName = It->second;
12845 IVLVal = D.IVLVal;
12846 FoundFn = D.Fn;
12847 break;
12848 }
12849 return FoundE == E;
12850 }
12851 bool VisitMemberExpr(const MemberExpr *E) {
12853 return false;
12855 llvm::reverse(LPM)) {
12856 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12857 if (It == D.DeclToUniqueName.end())
12858 continue;
12859 if (D.Disabled)
12860 return false;
12861 FoundE = E;
12862 FoundD = E->getMemberDecl()->getCanonicalDecl();
12863 UniqueDeclName = It->second;
12864 IVLVal = D.IVLVal;
12865 FoundFn = D.Fn;
12866 break;
12867 }
12868 return FoundE == E;
12869 }
12870 bool VisitStmt(const Stmt *S) {
12871 for (const Stmt *Child : S->children()) {
12872 if (!Child)
12873 continue;
12874 if (const auto *E = dyn_cast<Expr>(Child))
12875 if (!E->isGLValue())
12876 continue;
12877 if (Visit(Child))
12878 return true;
12879 }
12880 return false;
12881 }
12882 explicit LastprivateConditionalRefChecker(
12883 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12884 : LPM(LPM) {}
12885 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12886 getFoundData() const {
12887 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12888 }
12889};
12890} // namespace
12891
12893 LValue IVLVal,
12894 StringRef UniqueDeclName,
12895 LValue LVal,
12896 SourceLocation Loc) {
12897 // Last updated loop counter for the lastprivate conditional var.
12898 // int<xx> last_iv = 0;
12899 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12900 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12901 LLIVTy, getName({UniqueDeclName, "iv"}));
12902 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12903 IVLVal.getAlignment().getAsAlign());
12904 LValue LastIVLVal =
12905 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12906
12907 // Last value of the lastprivate conditional.
12908 // decltype(priv_a) last_a;
12909 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12910 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12911 cast<llvm::GlobalVariable>(Last)->setAlignment(
12912 LVal.getAlignment().getAsAlign());
12913 LValue LastLVal =
12914 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12915
12916 // Global loop counter. Required to handle inner parallel-for regions.
12917 // iv
12918 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12919
12920 // #pragma omp critical(a)
12921 // if (last_iv <= iv) {
12922 // last_iv = iv;
12923 // last_a = priv_a;
12924 // }
12925 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12926 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12927 Action.Enter(CGF);
12928 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12929 // (last_iv <= iv) ? Check if the variable is updated and store new
12930 // value in global var.
12931 llvm::Value *CmpRes;
12932 if (IVLVal.getType()->isSignedIntegerType()) {
12933 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12934 } else {
12935 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12936 "Loop iteration variable must be integer.");
12937 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12938 }
12939 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12940 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12941 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12942 // {
12943 CGF.EmitBlock(ThenBB);
12944
12945 // last_iv = iv;
12946 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12947
12948 // last_a = priv_a;
12949 switch (CGF.getEvaluationKind(LVal.getType())) {
12950 case TEK_Scalar: {
12951 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12952 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12953 break;
12954 }
12955 case TEK_Complex: {
12956 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12957 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12958 break;
12959 }
12960 case TEK_Aggregate:
12961 llvm_unreachable(
12962 "Aggregates are not supported in lastprivate conditional.");
12963 }
12964 // }
12965 CGF.EmitBranch(ExitBB);
12966 // There is no need to emit line number for unconditional branch.
12968 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12969 };
12970
12971 if (CGM.getLangOpts().OpenMPSimd) {
12972 // Do not emit as a critical region as no parallel region could be emitted.
12973 RegionCodeGenTy ThenRCG(CodeGen);
12974 ThenRCG(CGF);
12975 } else {
12976 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12977 }
12978}
12979
12981 const Expr *LHS) {
12982 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12983 return;
12984 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12985 if (!Checker.Visit(LHS))
12986 return;
12987 const Expr *FoundE;
12988 const Decl *FoundD;
12989 StringRef UniqueDeclName;
12990 LValue IVLVal;
12991 llvm::Function *FoundFn;
12992 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12993 Checker.getFoundData();
12994 if (FoundFn != CGF.CurFn) {
12995 // Special codegen for inner parallel regions.
12996 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12997 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12998 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12999 "Lastprivate conditional is not found in outer region.");
13000 QualType StructTy = std::get<0>(It->getSecond());
13001 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
13002 LValue PrivLVal = CGF.EmitLValue(FoundE);
13004 PrivLVal.getAddress(),
13005 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
13006 CGF.ConvertTypeForMem(StructTy));
13007 LValue BaseLVal =
13008 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
13009 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
13010 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
13011 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
13012 FiredLVal, llvm::AtomicOrdering::Unordered,
13013 /*IsVolatile=*/true, /*isInit=*/false);
13014 return;
13015 }
13016
13017 // Private address of the lastprivate conditional in the current context.
13018 // priv_a
13019 LValue LVal = CGF.EmitLValue(FoundE);
13020 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13021 FoundE->getExprLoc());
13022}
13023
13026 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13027 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13028 return;
13029 auto Range = llvm::reverse(LastprivateConditionalStack);
13030 auto It = llvm::find_if(
13031 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13032 if (It == Range.end() || It->Fn != CGF.CurFn)
13033 return;
13034 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13035 assert(LPCI != LastprivateConditionalToTypes.end() &&
13036 "Lastprivates must be registered already.");
13038 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13039 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13040 for (const auto &Pair : It->DeclToUniqueName) {
13041 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13042 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13043 continue;
13044 auto I = LPCI->getSecond().find(Pair.first);
13045 assert(I != LPCI->getSecond().end() &&
13046 "Lastprivate must be rehistered already.");
13047 // bool Cmp = priv_a.Fired != 0;
13048 LValue BaseLVal = std::get<3>(I->getSecond());
13049 LValue FiredLVal =
13050 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13051 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13052 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13053 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13054 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13055 // if (Cmp) {
13056 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13057 CGF.EmitBlock(ThenBB);
13058 Address Addr = CGF.GetAddrOfLocalVar(VD);
13059 LValue LVal;
13060 if (VD->getType()->isReferenceType())
13061 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13063 else
13064 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13066 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13067 D.getBeginLoc());
13069 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13070 // }
13071 }
13072}
13073
13075 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13076 SourceLocation Loc) {
13077 if (CGF.getLangOpts().OpenMP < 50)
13078 return;
13079 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13080 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13081 "Unknown lastprivate conditional variable.");
13082 StringRef UniqueName = It->second;
13083 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13084 // The variable was not updated in the region - exit.
13085 if (!GV)
13086 return;
13087 LValue LPLVal = CGF.MakeRawAddrLValue(
13088 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13089 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13090 CGF.EmitStoreOfScalar(Res, PrivLVal);
13091}
13092
13095 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13096 const RegionCodeGenTy &CodeGen) {
13097 llvm_unreachable("Not supported in SIMD-only mode");
13098}
13099
13102 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13103 const RegionCodeGenTy &CodeGen) {
13104 llvm_unreachable("Not supported in SIMD-only mode");
13105}
13106
13108 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13109 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13110 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13111 bool Tied, unsigned &NumberOfParts) {
13112 llvm_unreachable("Not supported in SIMD-only mode");
13113}
13114
13116 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13117 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13118 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13119 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13120 llvm_unreachable("Not supported in SIMD-only mode");
13121}
13122
13124 CodeGenFunction &CGF, StringRef CriticalName,
13125 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13126 const Expr *Hint) {
13127 llvm_unreachable("Not supported in SIMD-only mode");
13128}
13129
13131 const RegionCodeGenTy &MasterOpGen,
13132 SourceLocation Loc) {
13133 llvm_unreachable("Not supported in SIMD-only mode");
13134}
13135
13137 const RegionCodeGenTy &MasterOpGen,
13138 SourceLocation Loc,
13139 const Expr *Filter) {
13140 llvm_unreachable("Not supported in SIMD-only mode");
13141}
13142
13144 SourceLocation Loc) {
13145 llvm_unreachable("Not supported in SIMD-only mode");
13146}
13147
13149 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13150 SourceLocation Loc) {
13151 llvm_unreachable("Not supported in SIMD-only mode");
13152}
13153
13155 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13156 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13158 ArrayRef<const Expr *> AssignmentOps) {
13159 llvm_unreachable("Not supported in SIMD-only mode");
13160}
13161
13163 const RegionCodeGenTy &OrderedOpGen,
13164 SourceLocation Loc,
13165 bool IsThreads) {
13166 llvm_unreachable("Not supported in SIMD-only mode");
13167}
13168
13170 SourceLocation Loc,
13172 bool EmitChecks,
13173 bool ForceSimpleCall) {
13174 llvm_unreachable("Not supported in SIMD-only mode");
13175}
13176
13179 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13180 bool Ordered, const DispatchRTInput &DispatchValues) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13185 SourceLocation Loc) {
13186 llvm_unreachable("Not supported in SIMD-only mode");
13187}
13188
13191 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13192 llvm_unreachable("Not supported in SIMD-only mode");
13193}
13194
13197 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13198 llvm_unreachable("Not supported in SIMD-only mode");
13199}
13200
13202 SourceLocation Loc,
13203 unsigned IVSize,
13204 bool IVSigned) {
13205 llvm_unreachable("Not supported in SIMD-only mode");
13206}
13207
13209 SourceLocation Loc,
13210 OpenMPDirectiveKind DKind) {
13211 llvm_unreachable("Not supported in SIMD-only mode");
13212}
13213
13215 SourceLocation Loc,
13216 unsigned IVSize, bool IVSigned,
13217 Address IL, Address LB,
13218 Address UB, Address ST) {
13219 llvm_unreachable("Not supported in SIMD-only mode");
13220}
13221
13223 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13225 SourceLocation SeverityLoc, const Expr *Message,
13226 SourceLocation MessageLoc) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13231 ProcBindKind ProcBind,
13232 SourceLocation Loc) {
13233 llvm_unreachable("Not supported in SIMD-only mode");
13234}
13235
13237 const VarDecl *VD,
13238 Address VDAddr,
13239 SourceLocation Loc) {
13240 llvm_unreachable("Not supported in SIMD-only mode");
13241}
13242
13244 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13245 CodeGenFunction *CGF) {
13246 llvm_unreachable("Not supported in SIMD-only mode");
13247}
13248
13250 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13251 llvm_unreachable("Not supported in SIMD-only mode");
13252}
13253
13256 SourceLocation Loc,
13257 llvm::AtomicOrdering AO) {
13258 llvm_unreachable("Not supported in SIMD-only mode");
13259}
13260
13262 const OMPExecutableDirective &D,
13263 llvm::Function *TaskFunction,
13264 QualType SharedsTy, Address Shareds,
13265 const Expr *IfCond,
13266 const OMPTaskDataTy &Data) {
13267 llvm_unreachable("Not supported in SIMD-only mode");
13268}
13269
13272 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13273 const Expr *IfCond, const OMPTaskDataTy &Data) {
13274 llvm_unreachable("Not supported in SIMD-only mode");
13275}
13276
13280 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13281 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13282 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13283 ReductionOps, Options);
13284}
13285
13288 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13289 llvm_unreachable("Not supported in SIMD-only mode");
13290}
13291
13293 SourceLocation Loc,
13294 bool IsWorksharingReduction) {
13295 llvm_unreachable("Not supported in SIMD-only mode");
13296}
13297
13299 SourceLocation Loc,
13300 ReductionCodeGen &RCG,
13301 unsigned N) {
13302 llvm_unreachable("Not supported in SIMD-only mode");
13303}
13304
13306 SourceLocation Loc,
13307 llvm::Value *ReductionsPtr,
13308 LValue SharedLVal) {
13309 llvm_unreachable("Not supported in SIMD-only mode");
13310}
13311
13313 SourceLocation Loc,
13314 const OMPTaskDataTy &Data) {
13315 llvm_unreachable("Not supported in SIMD-only mode");
13316}
13317
13320 OpenMPDirectiveKind CancelRegion) {
13321 llvm_unreachable("Not supported in SIMD-only mode");
13322}
13323
13325 SourceLocation Loc, const Expr *IfCond,
13326 OpenMPDirectiveKind CancelRegion) {
13327 llvm_unreachable("Not supported in SIMD-only mode");
13328}
13329
13331 const OMPExecutableDirective &D, StringRef ParentName,
13332 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13333 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13334 llvm_unreachable("Not supported in SIMD-only mode");
13335}
13336
13339 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13340 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13341 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13342 const OMPLoopDirective &D)>
13343 SizeEmitter) {
13344 llvm_unreachable("Not supported in SIMD-only mode");
13345}
13346
13348 llvm_unreachable("Not supported in SIMD-only mode");
13349}
13350
13352 llvm_unreachable("Not supported in SIMD-only mode");
13353}
13354
13356 return false;
13357}
13358
13360 const OMPExecutableDirective &D,
13361 SourceLocation Loc,
13362 llvm::Function *OutlinedFn,
13363 ArrayRef<llvm::Value *> CapturedVars) {
13364 llvm_unreachable("Not supported in SIMD-only mode");
13365}
13366
13368 const Expr *NumTeams,
13369 const Expr *ThreadLimit,
13370 SourceLocation Loc) {
13371 llvm_unreachable("Not supported in SIMD-only mode");
13372}
13373
13375 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13376 const Expr *Device, const RegionCodeGenTy &CodeGen,
13378 llvm_unreachable("Not supported in SIMD-only mode");
13379}
13380
13382 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13383 const Expr *Device) {
13384 llvm_unreachable("Not supported in SIMD-only mode");
13385}
13386
13388 const OMPLoopDirective &D,
13389 ArrayRef<Expr *> NumIterations) {
13390 llvm_unreachable("Not supported in SIMD-only mode");
13391}
13392
13394 const OMPDependClause *C) {
13395 llvm_unreachable("Not supported in SIMD-only mode");
13396}
13397
13399 const OMPDoacrossClause *C) {
13400 llvm_unreachable("Not supported in SIMD-only mode");
13401}
13402
13403const VarDecl *
13405 const VarDecl *NativeParam) const {
13406 llvm_unreachable("Not supported in SIMD-only mode");
13407}
13408
13409Address
13411 const VarDecl *NativeParam,
13412 const VarDecl *TargetParam) const {
13413 llvm_unreachable("Not supported in SIMD-only mode");
13414}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool getAArch64MTV(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc, unsigned UserVLEN, unsigned WDS, char ISA)
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static unsigned getAArch64LS(QualType QT, llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static llvm::OpenMPIRBuilder::DeclareSimdBranch convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static mlir::omp::DeclareTargetCaptureClause convertCaptureClause(OMPDeclareTargetDeclAttr::MapTypeTy mapTy)
static bool isAssumedToBeNotEmitted(const ValueDecl *vd, bool isDevice)
Returns true if the declaration should be skipped based on its device_type attribute and the current ...
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
Result
Implement __builtin_bit_cast and related operations.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
llvm::json::Array Array
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
This represents clause 'affinity' in the 'pragma omp task'-based directives.
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
ArrayRef< MappableComponent > MappableExprComponentListRef
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents clause 'map' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
SourceManager & getSourceManager()
Definition ASTContext.h:863
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:959
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5394
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3786
Attr - This represents one attribute.
Definition Attr.h:46
Represents a base class of a C++ class.
Definition DeclCXX.h:146
Represents a C++ constructor within a class.
Definition DeclCXX.h:2633
Represents a C++ destructor within a class.
Definition DeclCXX.h:2895
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2284
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2308
Represents a C++ struct/union/class.
Definition DeclCXX.h:258
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1023
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1790
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1102
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2127
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3960
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3994
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1391
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:4000
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3988
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3991
This captures a statement into a function.
Definition Stmt.h:3947
const Capture * const_capture_iterator
Definition Stmt.h:4081
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4098
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4068
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4051
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1517
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4093
capture_range captures()
Definition Stmt.h:4085
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
std::string SampleProfileFile
Name of the profile file to use with -fprofile-sample-use.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:146
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:213
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:251
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:397
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:288
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, const VarDecl *VD)
Register VTable to OpenMP offload entry.
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitAndRegisterVTable(CodeGenModule &CGM, CXXRecordDecl *CXXRecord, const VarDecl *VD)
Emit and register VTable for the C++ class in OpenMP offload entry.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
virtual void registerVTable(const OMPExecutableDirective &D)
Emit code for registering vtable by scanning through map clause in OpenMP target region.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3413
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2415
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::Function * GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, const OMPExecutableDirective &D)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1483
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2299
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3422
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2272
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5766
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:182
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:258
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2519
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5275
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:239
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5940
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:231
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2218
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2770
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3432
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:309
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1598
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:663
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:195
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:204
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1656
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5544
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1714
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1823
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:643
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2108
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2967
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1873
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:747
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::GlobalVariable * GetAddrOfVTable(const CXXRecordDecl *RD)
GetAddrOfVTable - Get the address of the VTable for the given record decl.
Definition CGVTables.cpp:43
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:646
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:376
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:355
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:350
Address getAddress() const
Definition CGValue.h:373
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:358
QualType getType() const
Definition CGValue.h:303
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:347
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1462
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1273
ValueDecl * getDecl()
Definition Expr.h:1341
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:581
bool hasAttrs() const
Definition DeclBase.h:526
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:547
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1100
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:567
SourceLocation getLocation() const
Definition DeclBase.h:447
DeclContext * getDeclContext()
Definition DeclBase.h:456
AttrVec & getAttrs()
Definition DeclBase.h:532
bool hasAttr() const
Definition DeclBase.h:585
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:991
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3126
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:677
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:675
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3104
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3099
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3697
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:283
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4329
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4065
Represents a member of a struct/union/class.
Definition Decl.h:3182
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3267
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3418
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4694
Represents a function declaration or definition.
Definition Decl.h:2018
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2815
QualType getReturnType() const
Definition Decl.h:2863
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2792
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3721
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3800
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5596
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:981
An lvalue reference type, per C++11 [dcl.ref].
Definition TypeBase.h:3681
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3367
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3450
Expr * getBase() const
Definition Expr.h:3444
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5602
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'threadset' clause in the 'pragma omp task ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1181
Represents a parameter to a function.
Definition Decl.h:1808
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3392
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1187
QualType withRestrict() const
Definition TypeBase.h:1190
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8447
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8487
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8632
QualType getCanonicalType() const
Definition TypeBase.h:8499
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1560
Represents a struct/union/class.
Definition Decl.h:4347
field_iterator field_end() const
Definition Decl.h:4553
field_range fields() const
Definition Decl.h:4550
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5285
bool field_empty() const
Definition Decl.h:4558
field_iterator field_begin() const
Definition Decl.cpp:5269
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:304
StmtClass getStmtClass() const
Definition Stmt.h:1503
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:210
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4900
bool isUnion() const
Definition Decl.h:3950
The base class of the type hierarchy.
Definition TypeBase.h:1875
bool isVoidType() const
Definition TypeBase.h:9050
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9237
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2267
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8783
bool isPointerType() const
Definition TypeBase.h:8684
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:9094
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9344
bool isReferenceType() const
Definition TypeBase.h:8708
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:790
bool isLValueReferenceType() const
Definition TypeBase.h:8712
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2504
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3183
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9230
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2864
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9330
bool isFloatingType() const
Definition Type.cpp:2390
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2333
bool isAnyPointerType() const
Definition TypeBase.h:8692
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9277
bool isRecordType() const
Definition TypeBase.h:8811
bool isUnionType() const
Definition Type.cpp:756
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:924
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2236
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2345
const Expr * getInit() const
Definition Decl.h:1381
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1230
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1308
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2354
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1275
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1371
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:4030
Expr * getSizeExpr() const
Definition TypeBase.h:4044
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
@ Address
A pointer to a ValueDecl.
Definition Primitives.h:28
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:830
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
static bool classof(const OMPClause *T)
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:273
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:125
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:6003
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:564
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:136
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:140
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1763
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
#define false
Definition stdbool.h:26
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
Extra information about a function prototype.
Definition TypeBase.h:5456
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Scheduling data for loop-based OpenMP directives.
bool UseFusedDistChunkSchedule
Request the fused distr_static_chunk + static_chunkone runtime schedule in for_static_init.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.